From bf4237a188f872e535de8cbfc7903c1387b83b01 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 29 Jan 2020 17:38:19 +0100 Subject: clk: rockchip: convert rk3399 pll type to use readl_relaxed_poll_timeout Instead of open coding the polling of the lock status, use the handy readl_relaxed_poll_timeout for this. As the pll locking is normally blazingly fast and we don't want to incur additional delays, we're not doing any sleeps similar to for example the imx clk-pllv4 and define a very safe but still short timeout of 1ms. Suggested-by: Stephen Boyd Signed-off-by: Heiko Stuebner Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200129163821.1547295-1-heiko@sntech.de --- drivers/clk/rockchip/clk-pll.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c index 10560d963baf..28b04aad31ad 100644 --- a/drivers/clk/rockchip/clk-pll.c +++ b/drivers/clk/rockchip/clk-pll.c @@ -589,19 +589,20 @@ static const struct clk_ops rockchip_rk3066_pll_clk_ops = { static int rockchip_rk3399_pll_wait_lock(struct rockchip_clk_pll *pll) { u32 pllcon; - int delay = 24000000; + int ret; - /* poll check the lock status in rk3399 xPLLCON2 */ - while (delay > 0) { - pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); - if (pllcon & RK3399_PLLCON2_LOCK_STATUS) - return 0; + /* + * Lock time typical 250, max 500 input clock cycles @24MHz + * So define a very safe maximum of 1000us, meaning 24000 cycles. + */ + ret = readl_relaxed_poll_timeout(pll->reg_base + RK3399_PLLCON(2), + pllcon, + pllcon & RK3399_PLLCON2_LOCK_STATUS, + 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); - delay--; - } - - pr_err("%s: timeout waiting for pll to lock\n", __func__); - return -ETIMEDOUT; + return ret; } static void rockchip_rk3399_pll_get_params(struct rockchip_clk_pll *pll, -- cgit From 3507df1a4615113ae6509e0f14f6546f0d1c84b4 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 29 Jan 2020 17:38:20 +0100 Subject: clk: rockchip: convert basic pll lock_wait to use regmap_read_poll_timeout Instead of open coding the polling of the lock status, use the handy regmap_read_poll_timeout for this. As the pll locking is normally blazingly fast and we don't want to incur additional delays, we're not doing any sleeps similar to for example the imx clk-pllv4 and define a very safe but still short timeout of 1ms. Suggested-by: Stephen Boyd Signed-off-by: Heiko Stuebner Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200129163821.1547295-2-heiko@sntech.de --- drivers/clk/rockchip/clk-pll.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c index 28b04aad31ad..945f8b2cacc1 100644 --- a/drivers/clk/rockchip/clk-pll.c +++ b/drivers/clk/rockchip/clk-pll.c @@ -86,23 +86,14 @@ static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) { struct regmap *grf = pll->ctx->grf; unsigned int val; - int delay = 24000000, ret; - - while (delay > 0) { - ret = regmap_read(grf, pll->lock_offset, &val); - if (ret) { - pr_err("%s: failed to read pll lock status: %d\n", - __func__, ret); - return ret; - } + int ret; - if (val & BIT(pll->lock_shift)) - return 0; - delay--; - } + ret = regmap_read_poll_timeout(grf, pll->lock_offset, val, + val & BIT(pll->lock_shift), 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); - pr_err("%s: timeout waiting for pll to lock\n", __func__); - return -ETIMEDOUT; + return ret; } /** -- cgit From 7f6ffbb885d147557bdca471c37b7b1204005798 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 29 Jan 2020 17:38:21 +0100 Subject: clk: rockchip: convert rk3036 pll type to use internal lock status The rk3036 pll type exposes its lock status in both its pllcon registers as well as the General Register Files. To remove one dependency convert it to the "internal" lock status, similar to how rk3399 handles it. Signed-off-by: Heiko Stuebner Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200129163821.1547295-3-heiko@sntech.de --- drivers/clk/rockchip/clk-pll.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c index 945f8b2cacc1..4c6c9167ef50 100644 --- a/drivers/clk/rockchip/clk-pll.c +++ b/drivers/clk/rockchip/clk-pll.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "clk.h" @@ -109,12 +110,31 @@ static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) #define RK3036_PLLCON1_REFDIV_SHIFT 0 #define RK3036_PLLCON1_POSTDIV2_MASK 0x7 #define RK3036_PLLCON1_POSTDIV2_SHIFT 6 +#define RK3036_PLLCON1_LOCK_STATUS BIT(10) #define RK3036_PLLCON1_DSMPD_MASK 0x1 #define RK3036_PLLCON1_DSMPD_SHIFT 12 +#define RK3036_PLLCON1_PWRDOWN BIT(13) #define RK3036_PLLCON2_FRAC_MASK 0xffffff #define RK3036_PLLCON2_FRAC_SHIFT 0 -#define RK3036_PLLCON1_PWRDOWN (1 << 13) +static int rockchip_rk3036_pll_wait_lock(struct rockchip_clk_pll *pll) +{ + u32 pllcon; + int ret; + + /* + * Lock time typical 250, max 500 input clock cycles @24MHz + * So define a very safe maximum of 1000us, meaning 24000 cycles. + */ + ret = readl_relaxed_poll_timeout(pll->reg_base + RK3036_PLLCON(1), + pllcon, + pllcon & RK3036_PLLCON1_LOCK_STATUS, + 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); + + return ret; +} static void rockchip_rk3036_pll_get_params(struct rockchip_clk_pll *pll, struct rockchip_pll_rate_table *rate) @@ -212,7 +232,7 @@ static int rockchip_rk3036_pll_set_params(struct rockchip_clk_pll *pll, writel_relaxed(pllcon, pll->reg_base + RK3036_PLLCON(2)); /* wait for the pll to lock */ - ret = rockchip_pll_wait_lock(pll); + ret = rockchip_rk3036_pll_wait_lock(pll); if (ret) { pr_warn("%s: pll update unsuccessful, trying to restore old params\n", __func__); @@ -251,7 +271,7 @@ static int rockchip_rk3036_pll_enable(struct clk_hw *hw) writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0), pll->reg_base + RK3036_PLLCON(1)); - rockchip_pll_wait_lock(pll); + rockchip_rk3036_pll_wait_lock(pll); return 0; } -- cgit From 899c537c25f9547a0f591ed2013be0f9f68f2df3 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:05 -0700 Subject: drm/i915: Use 64-bit division macro Since the PWM framework is switching struct pwm_state.duty_cycle's datatype to u64, prepare for this transition by using DIV_ROUND_UP_ULL to handle a 64-bit dividend. Signed-off-by: Guru Das Srinagesh Reviewed-by: Jani Nikula Acked-by: Jani Nikula Signed-off-by: Thierry Reding --- drivers/gpu/drm/i915/display/intel_panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 3c5056dbf607..31cb285e8b38 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1929,7 +1929,7 @@ static int pwm_setup_backlight(struct intel_connector *connector, return retval; } - level = DIV_ROUND_UP(pwm_get_duty_cycle(panel->backlight.pwm) * 100, + level = DIV_ROUND_UP_ULL(pwm_get_duty_cycle(panel->backlight.pwm) * 100, CRC_PMIC_PWM_PERIOD_NS); panel->backlight.level = intel_panel_compute_brightness(connector, level); -- cgit From f3e4b14144a928c097e2d526f6dcd6ba0f5eba8a Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:06 -0700 Subject: hwmon: pwm-fan: Use 64-bit division macro Since the PWM framework is switching struct pwm_args.period's datatype to u64, prepare for this transition by using DIV_ROUND_UP_ULL to handle a 64-bit dividend. Signed-off-by: Guru Das Srinagesh Acked-by: Guenter Roeck Signed-off-by: Thierry Reding --- drivers/hwmon/pwm-fan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 30b7b3ea8836..17bb64299bfd 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -447,7 +447,7 @@ static int pwm_fan_resume(struct device *dev) return 0; pwm_get_args(ctx->pwm, &pargs); - duty = DIV_ROUND_UP(ctx->pwm_value * (pargs.period - 1), MAX_PWM); + duty = DIV_ROUND_UP_ULL(ctx->pwm_value * (pargs.period - 1), MAX_PWM); ret = pwm_config(ctx->pwm, duty, pargs.period); if (ret) return ret; -- cgit From 5bd0b9011da836dad1a00d92ebead7eaa87f5fe3 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:08 -0700 Subject: pwm: clps711x: Use 64-bit division macro Since the PWM framework is switching struct pwm_args.period's datatype to u64, prepare for this transition by using DIV64_U64_ROUND_CLOSEST to handle a 64-bit divisor. Cc: Daniel Thompson Signed-off-by: Guru Das Srinagesh Signed-off-by: Thierry Reding --- drivers/pwm/pwm-clps711x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c index 924d39a797cf..ba9500aca078 100644 --- a/drivers/pwm/pwm-clps711x.c +++ b/drivers/pwm/pwm-clps711x.c @@ -43,7 +43,7 @@ static void clps711x_pwm_update_val(struct clps711x_chip *priv, u32 n, u32 v) static unsigned int clps711x_get_duty(struct pwm_device *pwm, unsigned int v) { /* Duty cycle 0..15 max */ - return DIV_ROUND_CLOSEST(v * 0xf, pwm->args.period); + return DIV64_U64_ROUND_CLOSEST(v * 0xf, pwm->args.period); } static int clps711x_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) -- cgit From fcdea6b2a3f6508618e39e061073a18c55993a1b Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:09 -0700 Subject: pwm: imx-tpm: Use 64-bit division macro Since the PWM framework is switching struct pwm_state.period's datatype to u64, prepare for this transition by using DIV64_U64_ROUND_CLOSEST to handle a 64-bit divisor. Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Signed-off-by: Guru Das Srinagesh Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 5f3d7f7e6aef..fcdf6befb838 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -124,7 +124,7 @@ static int pwm_imx_tpm_round_state(struct pwm_chip *chip, real_state->duty_cycle = state->duty_cycle; tmp = (u64)p->mod * real_state->duty_cycle; - p->val = DIV_ROUND_CLOSEST_ULL(tmp, real_state->period); + p->val = DIV64_U64_ROUND_CLOSEST(tmp, real_state->period); real_state->polarity = state->polarity; real_state->enabled = state->enabled; -- cgit From 1689dcd433aaa0d32dec3fdb4bd166a0d38f4bbf Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:11 -0700 Subject: pwm: imx27: Use 64-bit division macro Since the PWM framework is switching struct pwm_state.period's datatype to u64, prepare for this transition by using DIV_ROUND_UP_ULL to handle a 64-bit dividend. Signed-off-by: Guru Das Srinagesh Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 732a6f3701e8..c50d453552bd 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -202,7 +202,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip, sr = readl(imx->mmio_base + MX3_PWMSR); fifoav = FIELD_GET(MX3_PWMSR_FIFOAV, sr); if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) { - period_ms = DIV_ROUND_UP(pwm_get_period(pwm), + period_ms = DIV_ROUND_UP_ULL(pwm_get_period(pwm), NSEC_PER_MSEC); msleep(period_ms); -- cgit From 1627f683636df70fb25358b0a7b39a24e8fce5bf Mon Sep 17 00:00:00 2001 From: Mylène Josserand Date: Tue, 2 Jun 2020 10:06:43 +0200 Subject: clk: rockchip: Handle clock tree for rk3288w variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The revision rk3288w has a different clock tree about "hclk_vio" clock, according to the BSP kernel code. This patch handles this difference by detecting which device-tree we are using. If it is a "rockchip,rk3288-cru", let's register the clock tree as it was before. If the device-tree node is "rockchip,rk3288w-cru", we will apply the difference with this version of this SoC. Noticed that this new device-tree compatible must be handled in bootloader such as u-boot. Signed-off-by: Mylène Josserand Link: https://lore.kernel.org/r/20200602080644.11333-2-mylene.josserand@collabora.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3288.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index cc2a177bbdbf..204976e2d0cb 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -425,8 +425,6 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(31), 6, 2, MFLAGS, 0, 5, DFLAGS, RK3288_CLKGATE_CON(3), 0, GFLAGS), - DIV(0, "hclk_vio", "aclk_vio0", 0, - RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), COMPOSITE(0, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, RK3288_CLKGATE_CON(3), 2, GFLAGS), @@ -819,6 +817,16 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { INVERTER(0, "pclk_isp", "pclk_isp_in", RK3288_CLKSEL_CON(29), 3, IFLAGS), }; +static struct rockchip_clk_branch rk3288w_hclkvio_branch[] __initdata = { + DIV(0, "hclk_vio", "aclk_vio1", 0, + RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), +}; + +static struct rockchip_clk_branch rk3288_hclkvio_branch[] __initdata = { + DIV(0, "hclk_vio", "aclk_vio0", 0, + RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), +}; + static const char *const rk3288_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", @@ -936,6 +944,14 @@ static void __init rk3288_clk_init(struct device_node *np) RK3288_GRF_SOC_STATUS1); rockchip_clk_register_branches(ctx, rk3288_clk_branches, ARRAY_SIZE(rk3288_clk_branches)); + + if (of_device_is_compatible(np, "rockchip,rk3288w-cru")) + rockchip_clk_register_branches(ctx, rk3288w_hclkvio_branch, + ARRAY_SIZE(rk3288w_hclkvio_branch)); + else + rockchip_clk_register_branches(ctx, rk3288_hclkvio_branch, + ARRAY_SIZE(rk3288_hclkvio_branch)); + rockchip_clk_protect_critical(rk3288_critical_clocks, ARRAY_SIZE(rk3288_critical_clocks)); -- cgit From 00bd404144241155653bb0d0c15be51e4e6983aa Mon Sep 17 00:00:00 2001 From: Mylène Josserand Date: Tue, 2 Jun 2020 10:06:44 +0200 Subject: dt-bindings: clocks: add rk3288w variant compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the possible compatible "rockchip,rk3288w-cru" that handles the difference between the rk3288 and the new revision rk3288w. This compatible will be added by bootloaders. Signed-off-by: Mylène Josserand Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200602080644.11333-3-mylene.josserand@collabora.com Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt index 8cb47c39ba53..bf3a9ec19241 100644 --- a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt +++ b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt @@ -4,9 +4,15 @@ The RK3288 clock controller generates and supplies clock to various controllers within the SoC and also implements a reset controller for SoC peripherals. +A revision of this SoC is available: rk3288w. The clock tree is a bit +different so another dt-compatible is available. Noticed that it is only +setting the difference but there is no automatic revision detection. This +should be performed by bootloaders. + Required Properties: -- compatible: should be "rockchip,rk3288-cru" +- compatible: should be "rockchip,rk3288-cru" or "rockchip,rk3288w-cru" in + case of this revision of Rockchip rk3288. - reg: physical base address of the controller and length of memory mapped region. - #clock-cells: should be 1. -- cgit From 5bc5d99f1f836858820ad8e9a412bb103bb0d88b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 15 Jun 2020 16:08:28 +0200 Subject: pwm: iqs620a: Use 64-bit division MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PWM framework is going to change the PWM period and duty cycles to be 64-bit unsigned integers. To avoid build errors on platforms that do not natively support 64-bit division, use explicity 64-bit division. Acked-by: Uwe Kleine-König Acked-by: Lee Jones Signed-off-by: Thierry Reding --- drivers/pwm/pwm-iqs620a.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 674f0e238ba0..b2bb27eff623 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -46,7 +46,8 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, { struct iqs620_pwm_private *iqs620_pwm; struct iqs62x_core *iqs62x; - int duty_scale, ret; + u64 duty_scale; + int ret; if (state->polarity != PWM_POLARITY_NORMAL) return -ENOTSUPP; @@ -69,7 +70,7 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, * For lower duty cycles (e.g. 0), the PWM output is simply disabled to * allow an external pull-down resistor to hold the GPIO3/LTX pin low. */ - duty_scale = state->duty_cycle * 256 / IQS620_PWM_PERIOD_NS; + duty_scale = div_u64(state->duty_cycle * 256, IQS620_PWM_PERIOD_NS); mutex_lock(&iqs620_pwm->lock); @@ -81,7 +82,7 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } if (duty_scale) { - u8 duty_val = min(duty_scale - 1, 0xFF); + u8 duty_val = min_t(u64, duty_scale - 1, 0xff); ret = regmap_write(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE, duty_val); -- cgit From 4cc23430a5361f29999545de34fc05fa7d8e513b Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:12 -0700 Subject: pwm: sifive: Use 64-bit division macro Since the PWM framework is switching struct pwm_args.period's datatype to u64, prepare for this transition by using DIV64_U64_ROUND_CLOSEST to handle a 64-bit divisor. Signed-off-by: Guru Das Srinagesh Acked-by: Palmer Dabbelt Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sifive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index cc63f9baa481..62de0bb85921 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -181,7 +181,7 @@ static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm, * consecutively */ num = (u64)duty_cycle * (1U << PWM_SIFIVE_CMPWIDTH); - frac = DIV_ROUND_CLOSEST_ULL(num, state->period); + frac = DIV64_U64_ROUND_CLOSEST(num, state->period); /* The hardware cannot generate a 100% duty cycle */ frac = min(frac, (1U << PWM_SIFIVE_CMPWIDTH) - 1); -- cgit From c7dcccaec2f7e99f95ad4b7c20f4f9086c6982be Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:13 -0700 Subject: pwm: sun4i: Use nsecs_to_jiffies to avoid a division Since the PWM framework is switching struct pwm_state.period's datatype to u64, prepare for this transition by using nsecs_to_jiffies() which does away with the need for a division operation. Signed-off-by: Guru Das Srinagesh Acked-by: Chen-Yu Tsai Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sun4i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index 18fbbe3277d0..961c59c99bb3 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -285,7 +285,7 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, val = (duty & PWM_DTY_MASK) | PWM_PRD(period); sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm)); sun4i_pwm->next_period[pwm->hwpwm] = jiffies + - usecs_to_jiffies(cstate.period / 1000 + 1); + nsecs_to_jiffies(cstate.period + 1000); if (state->polarity != PWM_POLARITY_NORMAL) ctrl &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm); -- cgit From 134ada17dbad272cdca57f2819a796c87e352274 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:14 -0700 Subject: backlight: pwm_bl: Use 64-bit division function Since the PWM framework is switching struct pwm_state.period's datatype to u64, prepare for this transition by using div_u64 to handle a 64-bit dividend instead of a straight division operation. Signed-off-by: Guru Das Srinagesh Reviewed-by: Daniel Thompson Acked-by: Lee Jones Signed-off-by: Thierry Reding --- drivers/video/backlight/pwm_bl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 82b8d7594701..464baade8f52 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -606,7 +606,8 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->scale = data->max_brightness; } - pb->lth_brightness = data->lth_brightness * (state.period / pb->scale); + pb->lth_brightness = data->lth_brightness * (div_u64(state.period, + pb->scale)); props.type = BACKLIGHT_RAW; props.max_brightness = data->max_brightness; -- cgit From a6733474ba4bf3150120bacc1d2db446d89d3dbe Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:15 -0700 Subject: clk: pwm: Use 64-bit division function Since the PWM framework is switching struct pwm_args.period's datatype to u64, prepare for this transition by using div64_u64() to handle a 64-bit divisor. Also ensure that divide-by-zero (with fixed_rate as denominator) does not happen with an explicit check with probe failure as a consequence. Signed-off-by: Guru Das Srinagesh Acked-by: Stephen Boyd Signed-off-by: Thierry Reding --- drivers/clk/clk-pwm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-pwm.c b/drivers/clk/clk-pwm.c index 87fe0b0e01a3..86f2e2d3fc02 100644 --- a/drivers/clk/clk-pwm.c +++ b/drivers/clk/clk-pwm.c @@ -89,7 +89,12 @@ static int clk_pwm_probe(struct platform_device *pdev) } if (of_property_read_u32(node, "clock-frequency", &clk_pwm->fixed_rate)) - clk_pwm->fixed_rate = NSEC_PER_SEC / pargs.period; + clk_pwm->fixed_rate = div64_u64(NSEC_PER_SEC, pargs.period); + + if (!clk_pwm->fixed_rate) { + dev_err(&pdev->dev, "fixed_rate cannot be zero\n"); + return -EINVAL; + } if (pargs.period != NSEC_PER_SEC / clk_pwm->fixed_rate && pargs.period != DIV_ROUND_UP(NSEC_PER_SEC, clk_pwm->fixed_rate)) { -- cgit From a9d887dc1c60ed67f2271d66560cdcf864c4a578 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:16 -0700 Subject: pwm: Convert period and duty cycle to u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because period and duty cycle are defined as ints with units of nanoseconds, the maximum time duration that can be set is limited to ~2.147 seconds. Change their definitions to u64 in the structs of the PWM framework so that higher durations may be set. Also use the right format specifiers in debug prints in both core.c, pwm-stm32-lp.c as well as video/fbdev/ssd1307fb.c. Reported-by: kbuild test robot Signed-off-by: Guru Das Srinagesh Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 14 +++++++------- drivers/pwm/pwm-stm32-lp.c | 2 +- drivers/pwm/sysfs.c | 8 ++++---- drivers/video/fbdev/ssd1307fb.c | 2 +- include/linux/pwm.h | 12 ++++++------ 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 004b2ea9b5fd..276e939a5684 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -510,12 +510,12 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, last->period > s2.period && last->period <= state->period) dev_warn(chip->dev, - ".apply didn't pick the best available period (requested: %u, applied: %u, possible: %u)\n", + ".apply didn't pick the best available period (requested: %llu, applied: %llu, possible: %llu)\n", state->period, s2.period, last->period); if (state->enabled && state->period < s2.period) dev_warn(chip->dev, - ".apply is supposed to round down period (requested: %u, applied: %u)\n", + ".apply is supposed to round down period (requested: %llu, applied: %llu)\n", state->period, s2.period); if (state->enabled && @@ -524,14 +524,14 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, last->duty_cycle > s2.duty_cycle && last->duty_cycle <= state->duty_cycle) dev_warn(chip->dev, - ".apply didn't pick the best available duty cycle (requested: %u/%u, applied: %u/%u, possible: %u/%u)\n", + ".apply didn't pick the best available duty cycle (requested: %llu/%llu, applied: %llu/%llu, possible: %llu/%llu)\n", state->duty_cycle, state->period, s2.duty_cycle, s2.period, last->duty_cycle, last->period); if (state->enabled && state->duty_cycle < s2.duty_cycle) dev_warn(chip->dev, - ".apply is supposed to round down duty_cycle (requested: %u/%u, applied: %u/%u)\n", + ".apply is supposed to round down duty_cycle (requested: %llu/%llu, applied: %llu/%llu)\n", state->duty_cycle, state->period, s2.duty_cycle, s2.period); @@ -558,7 +558,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, (s1.enabled && s1.period != last->period) || (s1.enabled && s1.duty_cycle != last->duty_cycle)) { dev_err(chip->dev, - ".apply is not idempotent (ena=%d pol=%d %u/%u) -> (ena=%d pol=%d %u/%u)\n", + ".apply is not idempotent (ena=%d pol=%d %llu/%llu) -> (ena=%d pol=%d %llu/%llu)\n", s1.enabled, s1.polarity, s1.duty_cycle, s1.period, last->enabled, last->polarity, last->duty_cycle, last->period); @@ -1284,8 +1284,8 @@ static void pwm_dbg_show(struct pwm_chip *chip, struct seq_file *s) if (state.enabled) seq_puts(s, " enabled"); - seq_printf(s, " period: %u ns", state.period); - seq_printf(s, " duty: %u ns", state.duty_cycle); + seq_printf(s, " period: %llu ns", state.period); + seq_printf(s, " duty: %llu ns", state.duty_cycle); seq_printf(s, " polarity: %s", state.polarity ? "inverse" : "normal"); diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 67fca62524dc..134c14621ee0 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -61,7 +61,7 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, do_div(div, NSEC_PER_SEC); if (!div) { /* Clock is too slow to achieve requested period. */ - dev_dbg(priv->chip.dev, "Can't reach %u ns\n", state->period); + dev_dbg(priv->chip.dev, "Can't reach %llu ns\n", state->period); return -EINVAL; } diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 2389b8669846..449dbc0f49ed 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -42,7 +42,7 @@ static ssize_t period_show(struct device *child, pwm_get_state(pwm, &state); - return sprintf(buf, "%u\n", state.period); + return sprintf(buf, "%llu\n", state.period); } static ssize_t period_store(struct device *child, @@ -52,10 +52,10 @@ static ssize_t period_store(struct device *child, struct pwm_export *export = child_to_pwm_export(child); struct pwm_device *pwm = export->pwm; struct pwm_state state; - unsigned int val; + u64 val; int ret; - ret = kstrtouint(buf, 0, &val); + ret = kstrtou64(buf, 0, &val); if (ret) return ret; @@ -77,7 +77,7 @@ static ssize_t duty_cycle_show(struct device *child, pwm_get_state(pwm, &state); - return sprintf(buf, "%u\n", state.duty_cycle); + return sprintf(buf, "%llu\n", state.duty_cycle); } static ssize_t duty_cycle_store(struct device *child, diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index 8e06ba912d60..09425ec317ba 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -312,7 +312,7 @@ static int ssd1307fb_init(struct ssd1307fb_par *par) /* Enable the PWM */ pwm_enable(par->pwm); - dev_dbg(&par->client->dev, "Using PWM%d with a %dns period.\n", + dev_dbg(&par->client->dev, "Using PWM%d with a %lluns period.\n", par->pwm->pwm, pwm_get_period(par->pwm)); } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2635b2a55090..a13ff383fa1d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -39,7 +39,7 @@ enum pwm_polarity { * current PWM hardware state. */ struct pwm_args { - unsigned int period; + u64 period; enum pwm_polarity polarity; }; @@ -56,8 +56,8 @@ enum { * @enabled: PWM enabled status */ struct pwm_state { - unsigned int period; - unsigned int duty_cycle; + u64 period; + u64 duty_cycle; enum pwm_polarity polarity; bool enabled; }; @@ -107,13 +107,13 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm) return state.enabled; } -static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) +static inline void pwm_set_period(struct pwm_device *pwm, u64 period) { if (pwm) pwm->state.period = period; } -static inline unsigned int pwm_get_period(const struct pwm_device *pwm) +static inline u64 pwm_get_period(const struct pwm_device *pwm) { struct pwm_state state; @@ -128,7 +128,7 @@ static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) pwm->state.duty_cycle = duty; } -static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) +static inline u64 pwm_get_duty_cycle(const struct pwm_device *pwm) { struct pwm_state state; -- cgit From b8fb642afa0277a0a9072fe119c1fce18437de0d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 15 Jun 2020 16:10:16 +0200 Subject: pwm: iqs620a: Use lowercase hexadecimal literals for consistency Other drivers use lowercase hexadecimal literals, so convert the IQS620a driver to do the same for consistency. Signed-off-by: Thierry Reding --- drivers/pwm/pwm-iqs620a.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index b2bb27eff623..7d33e3646436 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -25,10 +25,10 @@ #include #include -#define IQS620_PWR_SETTINGS 0xD2 +#define IQS620_PWR_SETTINGS 0xd2 #define IQS620_PWR_SETTINGS_PWM_OUT BIT(7) -#define IQS620_PWM_DUTY_CYCLE 0xD8 +#define IQS620_PWM_DUTY_CYCLE 0xd8 #define IQS620_PWM_PERIOD_NS 1000000 @@ -94,7 +94,7 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->enabled && duty_scale) { ret = regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS, - IQS620_PWR_SETTINGS_PWM_OUT, 0xFF); + IQS620_PWR_SETTINGS_PWM_OUT, 0xff); if (ret) goto err_mutex; } @@ -160,7 +160,7 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier, ret = regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS, IQS620_PWR_SETTINGS_PWM_OUT, - iqs620_pwm->out_en ? 0xFF : 0); + iqs620_pwm->out_en ? 0xff : 0); err_mutex: mutex_unlock(&iqs620_pwm->lock); -- cgit From cb8ae6e188a2230a2515493918385dd054e14fa1 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 3 Jun 2020 14:54:34 +0200 Subject: dt-bindings: mfd: Document STM32 low power timer bindings Add a subnode to STM low power timer bindings to support timer driver Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml index e675611f80d0..8bcea8dd7d90 100644 --- a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml @@ -33,6 +33,9 @@ properties: items: - const: mux + interrupts: + maxItems: 1 + "#address-cells": const: 1 @@ -106,11 +109,13 @@ additionalProperties: false examples: - | #include + #include timer@40002400 { compatible = "st,stm32-lptimer"; reg = <0x40002400 0x400>; clocks = <&timer_clk>; clock-names = "mux"; + interrupts-extended = <&exti 47 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; -- cgit From e0bcc58d876c6ece9720310509a908b7637e37cf Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 3 Jun 2020 14:54:36 +0200 Subject: mfd: stm32: Add defines to be used for clkevent purpose Add defines to be able to enable/clear irq and configure one shot mode. Signed-off-by: Benjamin Gaignard Signed-off-by: Lee Jones --- include/linux/mfd/stm32-lptimer.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 605f62264825..90b20550c1c8 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -27,10 +27,15 @@ #define STM32_LPTIM_CMPOK BIT(3) /* STM32_LPTIM_ICR - bit fields */ +#define STM32_LPTIM_ARRMCF BIT(1) #define STM32_LPTIM_CMPOKCF_ARROKCF GENMASK(4, 3) +/* STM32_LPTIM_IER - bit flieds */ +#define STM32_LPTIM_ARRMIE BIT(1) + /* STM32_LPTIM_CR - bit fields */ #define STM32_LPTIM_CNTSTRT BIT(2) +#define STM32_LPTIM_SNGSTRT BIT(1) #define STM32_LPTIM_ENABLE BIT(0) /* STM32_LPTIM_CFGR - bit fields */ -- cgit From 45d93065c8ec4e671f4b1ff02b5b3a633658a92f Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 3 Jun 2020 14:54:37 +0200 Subject: mfd: stm32: Enable regmap fast_io for stm32-lptimer Because stm32-lptimer need to write in registers in interrupt context enable regmap fast_io to use a spin_lock to protect registers access rather than a mutex. Signed-off-by: Benjamin Gaignard Signed-off-by: Lee Jones --- drivers/mfd/stm32-lptimer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/stm32-lptimer.c b/drivers/mfd/stm32-lptimer.c index a00f99f36559..746e51a17cc8 100644 --- a/drivers/mfd/stm32-lptimer.c +++ b/drivers/mfd/stm32-lptimer.c @@ -17,6 +17,7 @@ static const struct regmap_config stm32_lptimer_regmap_cfg = { .val_bits = 32, .reg_stride = sizeof(u32), .max_register = STM32_LPTIM_MAX_REGISTER, + .fast_io = true, }; static int stm32_lptimer_detect_encoder(struct stm32_lptimer *ddata) -- cgit From 48b41c5e2de6c52c90efa99cfa122a5da7a7f0cd Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 3 Jun 2020 14:54:38 +0200 Subject: clocksource: Add Low Power STM32 timers driver Implement clock event driver using low power STM32 timers. Low power timer counters running even when CPUs are stopped. It could be used as clock event broadcaster to wake up CPUs but not like a clocksource because each it rise an interrupt the counter restart from 0. Low power timers have a 16 bits counter and a prescaler which allow to divide the clock per power of 2 to up 128 to target a 32KHz rate. Signed-off-by: Benjamin Gaignard Signed-off-by: Pascal Paillet Acked-by: Daniel Lezcano Signed-off-by: Lee Jones --- drivers/clocksource/Kconfig | 4 + drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-stm32-lp.c | 221 +++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 drivers/clocksource/timer-stm32-lp.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 91418381fcd4..be49123b088a 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -291,6 +291,10 @@ config CLKSRC_STM32 select CLKSRC_MMIO select TIMER_OF +config CLKSRC_STM32_LP + bool "Low power clocksource for STM32 SoCs" + depends on MFD_STM32_LPTIMER || COMPILE_TEST + config CLKSRC_MPS2 bool "Clocksource for MPS2 SoCs" if COMPILE_TEST depends on GENERIC_SCHED_CLOCK diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index bdda1a2e4097..66501e64d0b6 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o obj-$(CONFIG_CADENCE_TTC_TIMER) += timer-cadence-ttc.o obj-$(CONFIG_CLKSRC_EFM32) += timer-efm32.o obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o +obj-$(CONFIG_CLKSRC_STM32_LP) += timer-stm32-lp.o obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o obj-$(CONFIG_CLKSRC_LPC32XX) += timer-lpc32xx.o obj-$(CONFIG_CLKSRC_MPS2) += mps2-timer.o diff --git a/drivers/clocksource/timer-stm32-lp.c b/drivers/clocksource/timer-stm32-lp.c new file mode 100644 index 000000000000..db2841d0beb8 --- /dev/null +++ b/drivers/clocksource/timer-stm32-lp.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) STMicroelectronics 2019 - All Rights Reserved + * Authors: Benjamin Gaignard for STMicroelectronics. + * Pascal Paillet for STMicroelectronics. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CFGR_PSC_OFFSET 9 +#define STM32_LP_RATING 1000 +#define STM32_TARGET_CLKRATE (32000 * HZ) +#define STM32_LP_MAX_PSC 7 + +struct stm32_lp_private { + struct regmap *reg; + struct clock_event_device clkevt; + unsigned long period; + struct device *dev; +}; + +static struct stm32_lp_private* +to_priv(struct clock_event_device *clkevt) +{ + return container_of(clkevt, struct stm32_lp_private, clkevt); +} + +static int stm32_clkevent_lp_shutdown(struct clock_event_device *clkevt) +{ + struct stm32_lp_private *priv = to_priv(clkevt); + + regmap_write(priv->reg, STM32_LPTIM_CR, 0); + regmap_write(priv->reg, STM32_LPTIM_IER, 0); + /* clear pending flags */ + regmap_write(priv->reg, STM32_LPTIM_ICR, STM32_LPTIM_ARRMCF); + + return 0; +} + +static int stm32_clkevent_lp_set_timer(unsigned long evt, + struct clock_event_device *clkevt, + int is_periodic) +{ + struct stm32_lp_private *priv = to_priv(clkevt); + + /* disable LPTIMER to be able to write into IER register*/ + regmap_write(priv->reg, STM32_LPTIM_CR, 0); + /* enable ARR interrupt */ + regmap_write(priv->reg, STM32_LPTIM_IER, STM32_LPTIM_ARRMIE); + /* enable LPTIMER to be able to write into ARR register */ + regmap_write(priv->reg, STM32_LPTIM_CR, STM32_LPTIM_ENABLE); + /* set next event counter */ + regmap_write(priv->reg, STM32_LPTIM_ARR, evt); + + /* start counter */ + if (is_periodic) + regmap_write(priv->reg, STM32_LPTIM_CR, + STM32_LPTIM_CNTSTRT | STM32_LPTIM_ENABLE); + else + regmap_write(priv->reg, STM32_LPTIM_CR, + STM32_LPTIM_SNGSTRT | STM32_LPTIM_ENABLE); + + return 0; +} + +static int stm32_clkevent_lp_set_next_event(unsigned long evt, + struct clock_event_device *clkevt) +{ + return stm32_clkevent_lp_set_timer(evt, clkevt, + clockevent_state_periodic(clkevt)); +} + +static int stm32_clkevent_lp_set_periodic(struct clock_event_device *clkevt) +{ + struct stm32_lp_private *priv = to_priv(clkevt); + + return stm32_clkevent_lp_set_timer(priv->period, clkevt, true); +} + +static int stm32_clkevent_lp_set_oneshot(struct clock_event_device *clkevt) +{ + struct stm32_lp_private *priv = to_priv(clkevt); + + return stm32_clkevent_lp_set_timer(priv->period, clkevt, false); +} + +static irqreturn_t stm32_clkevent_lp_irq_handler(int irq, void *dev_id) +{ + struct clock_event_device *clkevt = (struct clock_event_device *)dev_id; + struct stm32_lp_private *priv = to_priv(clkevt); + + regmap_write(priv->reg, STM32_LPTIM_ICR, STM32_LPTIM_ARRMCF); + + if (clkevt->event_handler) + clkevt->event_handler(clkevt); + + return IRQ_HANDLED; +} + +static void stm32_clkevent_lp_set_prescaler(struct stm32_lp_private *priv, + unsigned long *rate) +{ + int i; + + for (i = 0; i <= STM32_LP_MAX_PSC; i++) { + if (DIV_ROUND_CLOSEST(*rate, 1 << i) < STM32_TARGET_CLKRATE) + break; + } + + regmap_write(priv->reg, STM32_LPTIM_CFGR, i << CFGR_PSC_OFFSET); + + /* Adjust rate and period given the prescaler value */ + *rate = DIV_ROUND_CLOSEST(*rate, (1 << i)); + priv->period = DIV_ROUND_UP(*rate, HZ); +} + +static void stm32_clkevent_lp_init(struct stm32_lp_private *priv, + struct device_node *np, unsigned long rate) +{ + priv->clkevt.name = np->full_name; + priv->clkevt.cpumask = cpu_possible_mask; + priv->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT; + priv->clkevt.set_state_shutdown = stm32_clkevent_lp_shutdown; + priv->clkevt.set_state_periodic = stm32_clkevent_lp_set_periodic; + priv->clkevt.set_state_oneshot = stm32_clkevent_lp_set_oneshot; + priv->clkevt.set_next_event = stm32_clkevent_lp_set_next_event; + priv->clkevt.rating = STM32_LP_RATING; + + clockevents_config_and_register(&priv->clkevt, rate, 0x1, + STM32_LPTIM_MAX_ARR); +} + +static int stm32_clkevent_lp_probe(struct platform_device *pdev) +{ + struct stm32_lptimer *ddata = dev_get_drvdata(pdev->dev.parent); + struct stm32_lp_private *priv; + unsigned long rate; + int ret, irq; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->reg = ddata->regmap; + ret = clk_prepare_enable(ddata->clk); + if (ret) + return -EINVAL; + + rate = clk_get_rate(ddata->clk); + if (!rate) { + ret = -EINVAL; + goto out_clk_disable; + } + + irq = platform_get_irq(to_platform_device(pdev->dev.parent), 0); + if (irq <= 0) { + ret = irq; + goto out_clk_disable; + } + + if (of_property_read_bool(pdev->dev.parent->of_node, "wakeup-source")) { + ret = device_init_wakeup(&pdev->dev, true); + if (ret) + goto out_clk_disable; + + ret = dev_pm_set_wake_irq(&pdev->dev, irq); + if (ret) + goto out_clk_disable; + } + + ret = devm_request_irq(&pdev->dev, irq, stm32_clkevent_lp_irq_handler, + IRQF_TIMER, pdev->name, &priv->clkevt); + if (ret) + goto out_clk_disable; + + stm32_clkevent_lp_set_prescaler(priv, &rate); + + stm32_clkevent_lp_init(priv, pdev->dev.parent->of_node, rate); + + priv->dev = &pdev->dev; + + return 0; + +out_clk_disable: + clk_disable_unprepare(ddata->clk); + return ret; +} + +static int stm32_clkevent_lp_remove(struct platform_device *pdev) +{ + return -EBUSY; /* cannot unregister clockevent */ +} + +static const struct of_device_id stm32_clkevent_lp_of_match[] = { + { .compatible = "st,stm32-lptimer-timer", }, + {}, +}; +MODULE_DEVICE_TABLE(of, stm32_clkevent_lp_of_match); + +static struct platform_driver stm32_clkevent_lp_driver = { + .probe = stm32_clkevent_lp_probe, + .remove = stm32_clkevent_lp_remove, + .driver = { + .name = "stm32-lptimer-timer", + .of_match_table = of_match_ptr(stm32_clkevent_lp_of_match), + }, +}; +module_platform_driver(stm32_clkevent_lp_driver); + +MODULE_ALIAS("platform:stm32-lptimer-timer"); +MODULE_DESCRIPTION("STMicroelectronics STM32 clockevent low power driver"); +MODULE_LICENSE("GPL v2"); -- cgit From 3ea2e4eab64cefa06055bb0541fcdedad4b48565 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 15 Jun 2020 19:10:32 +0300 Subject: mfd: intel-lpss: Add Intel Emmitsburg PCH PCI IDs Intel Emmitsburg PCH has the same LPSS than Intel Ice Lake. Add the new IDs to the list of supported devices. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 046222684b8b..17bcadc00a11 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -201,6 +201,9 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x1ac4), (kernel_ulong_t)&bxt_info }, { PCI_VDEVICE(INTEL, 0x1ac6), (kernel_ulong_t)&bxt_info }, { PCI_VDEVICE(INTEL, 0x1aee), (kernel_ulong_t)&bxt_uart_info }, + /* EBG */ + { PCI_VDEVICE(INTEL, 0x1bad), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x1bae), (kernel_ulong_t)&bxt_uart_info }, /* GLK */ { PCI_VDEVICE(INTEL, 0x31ac), (kernel_ulong_t)&glk_i2c_info }, { PCI_VDEVICE(INTEL, 0x31ae), (kernel_ulong_t)&glk_i2c_info }, -- cgit From 14024cc9fe9478b9948521c0d38fde03ac00cd3d Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:20 +0100 Subject: mfd: arizona: Remove BUG_ON usage BUG_ON macros are generally frowned upon when the issue isn't super critical, the kernel can certainly carry on with the 32k clock on the CODEC in a bad state so change the BUG_ON to a WARN_ON. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/arizona-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index f73cf76d1373..19e0bc3c0683 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -80,7 +80,7 @@ int arizona_clk32k_disable(struct arizona *arizona) { mutex_lock(&arizona->clk_lock); - BUG_ON(arizona->clk32k_ref <= 0); + WARN_ON(arizona->clk32k_ref <= 0); arizona->clk32k_ref--; -- cgit From ddff6c45b21d0437ce0c85f8ac35d7b5480513d7 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:21 +0100 Subject: mfd: arizona: Ensure 32k clock is put on driver unbind and error Whilst it doesn't matter if the internal 32k clock register settings are cleaned up on exit, as the part will be turned off losing any settings, hence the driver hasn't historially bothered. The external clock should however be cleaned up, as it could cause clocks to be left on, and will at best generate a warning on unbind. Add clean up on both the probe error path and unbind for the 32k clock. Fixes: cdd8da8cc66b ("mfd: arizona: Add gating of external MCLKn clocks") Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/arizona-core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 19e0bc3c0683..000cb82023e3 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -1426,6 +1426,15 @@ err_irq: arizona_irq_exit(arizona); err_pm: pm_runtime_disable(arizona->dev); + + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + case ARIZONA_32KZ_MCLK2: + arizona_clk32k_disable(arizona); + break; + default: + break; + } err_reset: arizona_enable_reset(arizona); regulator_disable(arizona->dcvdd); @@ -1448,6 +1457,15 @@ int arizona_dev_exit(struct arizona *arizona) regulator_disable(arizona->dcvdd); regulator_put(arizona->dcvdd); + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + case ARIZONA_32KZ_MCLK2: + arizona_clk32k_disable(arizona); + break; + default: + break; + } + mfd_remove_devices(arizona->dev); arizona_free_irq(arizona, ARIZONA_IRQ_UNDERCLOCKED, arizona); arizona_free_irq(arizona, ARIZONA_IRQ_OVERCLOCKED, arizona); -- cgit From 7f8a137f736f7366820c485c5a0d34d65b9d0125 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:22 +0100 Subject: mfd: madera: Remove unused forward declaration of madera_codec_pdata This forward declaration is redundant since the header including the full data structure is included. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/madera/pdata.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h index fa9595dd42ba..601cbbc10370 100644 --- a/include/linux/mfd/madera/pdata.h +++ b/include/linux/mfd/madera/pdata.h @@ -21,7 +21,6 @@ struct gpio_desc; struct pinctrl_map; -struct madera_codec_pdata; /** * struct madera_pdata - Configuration data for Madera devices -- cgit From b92735f45f99c9bdcf93ce822fab56231e64a904 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:23 +0100 Subject: mfd: madera: Fix minor formatting issues The mfd_cell structures inconsistently use commas on single entries in the table, make this consistent by always using a comma. Also remove an extra blank line. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/madera-core.c | 12 ++++++------ drivers/mfd/madera-i2c.c | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/mfd/madera-core.c b/drivers/mfd/madera-core.c index 7e0835cb062b..4724c1a01a39 100644 --- a/drivers/mfd/madera-core.c +++ b/drivers/mfd/madera-core.c @@ -44,7 +44,7 @@ static const char * const madera_core_supplies[] = { }; static const struct mfd_cell madera_ldo1_devs[] = { - { .name = "madera-ldo1" }, + { .name = "madera-ldo1", }, }; static const char * const cs47l15_supplies[] = { @@ -55,8 +55,8 @@ static const char * const cs47l15_supplies[] = { static const struct mfd_cell cs47l15_devs[] = { { .name = "madera-pinctrl", }, - { .name = "madera-irq" }, - { .name = "madera-gpio" }, + { .name = "madera-irq", }, + { .name = "madera-gpio", }, { .name = "madera-extcon", .parent_supplies = cs47l15_supplies, @@ -108,7 +108,7 @@ static const char * const cs47l85_supplies[] = { static const struct mfd_cell cs47l85_devs[] = { { .name = "madera-pinctrl", }, { .name = "madera-irq", }, - { .name = "madera-micsupp" }, + { .name = "madera-micsupp", }, { .name = "madera-gpio", }, { .name = "madera-extcon", @@ -155,10 +155,10 @@ static const char * const cs47l92_supplies[] = { }; static const struct mfd_cell cs47l92_devs[] = { - { .name = "madera-pinctrl" }, + { .name = "madera-pinctrl", }, { .name = "madera-irq", }, { .name = "madera-micsupp", }, - { .name = "madera-gpio" }, + { .name = "madera-gpio", }, { .name = "madera-extcon", .parent_supplies = cs47l92_supplies, diff --git a/drivers/mfd/madera-i2c.c b/drivers/mfd/madera-i2c.c index 6b965eb034b6..7df5b9ba5855 100644 --- a/drivers/mfd/madera-i2c.c +++ b/drivers/mfd/madera-i2c.c @@ -88,7 +88,6 @@ static int madera_i2c_probe(struct i2c_client *i2c, if (!madera) return -ENOMEM; - madera->regmap = devm_regmap_init_i2c(i2c, regmap_16bit_config); if (IS_ERR(madera->regmap)) { ret = PTR_ERR(madera->regmap); -- cgit From ad738ddd506b68679faf79277280b75522dd84e7 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Tue, 9 Jun 2020 07:57:19 -0700 Subject: dt-bindings: mfd: gateworks-gsc: Add 16bit pre-scaled voltage mode Add a 16-bit pre-scaled voltage mode to ADC and clarify that existing pre-scaled mode is 24bit. Signed-off-by: Tim Harvey Acked-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml index 487a8445722e..ceec33f6aba1 100644 --- a/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml +++ b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml @@ -79,11 +79,12 @@ properties: description: | conversion mode: 0 - temperature, in C*10 - 1 - pre-scaled voltage value + 1 - pre-scaled 24-bit voltage value 2 - scaled voltage based on an optional resistor divider and optional offset + 3 - pre-scaled 16-bit voltage value $ref: /schemas/types.yaml#/definitions/uint32 - enum: [0, 1, 2] + enum: [0, 1, 2, 3] gw,voltage-divider-ohms: description: Values of resistors for divider on raw ADC input -- cgit From 6bcb330c1f373f314118f772c6e22f1cc36d7683 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Jun 2020 11:17:35 +0200 Subject: dt-bindings: mfd: Add Khadas Microcontroller bindings This Microcontroller is present on the Khadas VIM1, VIM2, VIM3 and Edge boards. It has multiple boot control features like password check, power-on options, power-off control and system FAN control on recent boards. Signed-off-by: Neil Armstrong Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- .../devicetree/bindings/mfd/khadas,mcu.yaml | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Documentation/devicetree/bindings/mfd/khadas,mcu.yaml diff --git a/Documentation/devicetree/bindings/mfd/khadas,mcu.yaml b/Documentation/devicetree/bindings/mfd/khadas,mcu.yaml new file mode 100644 index 000000000000..a3b976f101e8 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/khadas,mcu.yaml @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/khadas,mcu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Khadas on-board Microcontroller Device Tree Bindings + +maintainers: + - Neil Armstrong + +description: | + Khadas embeds a microcontroller on their VIM and Edge boards adding some + system feature as PWM Fan control (for VIM2 rev14 or VIM3), User memory + storage, IR/Key resume control, system power LED control and more. + +properties: + compatible: + enum: + - khadas,mcu # MCU revision is discoverable + + "#cooling-cells": # Only needed for boards having FAN control feature + const: 2 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + khadas_mcu: system-controller@18 { + compatible = "khadas,mcu"; + reg = <0x18>; + #cooling-cells = <2>; + }; + }; -- cgit From 6c27219e34911601955b72c754adfc11c527ba7b Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Jun 2020 11:17:36 +0200 Subject: mfd: Add support for the Khadas System control Microcontroller This Microcontroller is present on the Khadas VIM1, VIM2, VIM3 and Edge boards. It has multiple boot control features like password check, power-on options, power-off control and system FAN control on recent boards. This implements a very basic MFD driver with the fan control and User NVMEM cells. Signed-off-by: Neil Armstrong Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 21 ++++++ drivers/mfd/Makefile | 1 + drivers/mfd/khadas-mcu.c | 142 +++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/khadas-mcu.h | 91 ++++++++++++++++++++++++++ 4 files changed, 255 insertions(+) create mode 100644 drivers/mfd/khadas-mcu.c create mode 100644 include/linux/mfd/khadas-mcu.h diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index a37d7d171382..d13bb0abfd6f 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -2053,6 +2053,27 @@ config MFD_WCD934X This driver provides common support WCD934x audio codec and its associated Pin Controller, Soundwire Controller and Audio codec. +config MFD_KHADAS_MCU + tristate "Support for Khadas System control Microcontroller" + depends on I2C + depends on ARCH_MESON || ARCH_ROCKCHIP || COMPILE_TEST + select MFD_CORE + select REGMAP_I2C + help + Support for the Khadas System control Microcontroller interface + present on their VIM and Edge boards. + + This Microcontroller is present on the Khadas VIM1, VIM2, VIM3 and + Edge boards. + + It provides multiple boot control features like password check, + power-on options, power-off control and system FAN control on recent + boards. + + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device. + menu "Multimedia Capabilities Port drivers" depends on ARCH_SA1100 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 9367a92f795a..1c8d6be3347d 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -262,5 +262,6 @@ obj-$(CONFIG_MFD_ROHM_BD70528) += rohm-bd70528.o obj-$(CONFIG_MFD_ROHM_BD71828) += rohm-bd71828.o obj-$(CONFIG_MFD_ROHM_BD718XX) += rohm-bd718x7.o obj-$(CONFIG_MFD_STMFX) += stmfx.o +obj-$(CONFIG_MFD_KHADAS_MCU) += khadas-mcu.o obj-$(CONFIG_SGI_MFD_IOC3) += ioc3.o diff --git a/drivers/mfd/khadas-mcu.c b/drivers/mfd/khadas-mcu.c new file mode 100644 index 000000000000..44d5bb462dab --- /dev/null +++ b/drivers/mfd/khadas-mcu.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Khadas System control Microcontroller + * + * Copyright (C) 2020 BayLibre SAS + * + * Author(s): Neil Armstrong + */ +#include +#include +#include +#include +#include +#include + +static bool khadas_mcu_reg_volatile(struct device *dev, unsigned int reg) +{ + if (reg >= KHADAS_MCU_USER_DATA_0_REG && + reg < KHADAS_MCU_PWR_OFF_CMD_REG) + return true; + + switch (reg) { + case KHADAS_MCU_PWR_OFF_CMD_REG: + case KHADAS_MCU_PASSWD_START_REG: + case KHADAS_MCU_CHECK_VEN_PASSWD_REG: + case KHADAS_MCU_CHECK_USER_PASSWD_REG: + case KHADAS_MCU_WOL_INIT_START_REG: + case KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG: + return true; + default: + return false; + } +} + +static bool khadas_mcu_reg_writeable(struct device *dev, unsigned int reg) +{ + switch (reg) { + case KHADAS_MCU_PASSWD_VEN_0_REG: + case KHADAS_MCU_PASSWD_VEN_1_REG: + case KHADAS_MCU_PASSWD_VEN_2_REG: + case KHADAS_MCU_PASSWD_VEN_3_REG: + case KHADAS_MCU_PASSWD_VEN_4_REG: + case KHADAS_MCU_PASSWD_VEN_5_REG: + case KHADAS_MCU_MAC_0_REG: + case KHADAS_MCU_MAC_1_REG: + case KHADAS_MCU_MAC_2_REG: + case KHADAS_MCU_MAC_3_REG: + case KHADAS_MCU_MAC_4_REG: + case KHADAS_MCU_MAC_5_REG: + case KHADAS_MCU_USID_0_REG: + case KHADAS_MCU_USID_1_REG: + case KHADAS_MCU_USID_2_REG: + case KHADAS_MCU_USID_3_REG: + case KHADAS_MCU_USID_4_REG: + case KHADAS_MCU_USID_5_REG: + case KHADAS_MCU_VERSION_0_REG: + case KHADAS_MCU_VERSION_1_REG: + case KHADAS_MCU_DEVICE_NO_0_REG: + case KHADAS_MCU_DEVICE_NO_1_REG: + case KHADAS_MCU_FACTORY_TEST_REG: + case KHADAS_MCU_SHUTDOWN_NORMAL_STATUS_REG: + return false; + default: + return true; + } +} + +static const struct regmap_config khadas_mcu_regmap_config = { + .reg_bits = 8, + .reg_stride = 1, + .val_bits = 8, + .max_register = KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG, + .volatile_reg = khadas_mcu_reg_volatile, + .writeable_reg = khadas_mcu_reg_writeable, + .cache_type = REGCACHE_RBTREE, +}; + +static struct mfd_cell khadas_mcu_fan_cells[] = { + /* VIM1/2 Rev13+ and VIM3 only */ + { .name = "khadas-mcu-fan-ctrl", }, +}; + +static struct mfd_cell khadas_mcu_cells[] = { + { .name = "khadas-mcu-user-mem", }, +}; + +static int khadas_mcu_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct khadas_mcu *ddata; + int ret; + + ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + i2c_set_clientdata(client, ddata); + + ddata->dev = dev; + + ddata->regmap = devm_regmap_init_i2c(client, &khadas_mcu_regmap_config); + if (IS_ERR(ddata->regmap)) { + ret = PTR_ERR(ddata->regmap); + dev_err(dev, "Failed to allocate register map: %d\n", ret); + return ret; + } + + ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + khadas_mcu_cells, + ARRAY_SIZE(khadas_mcu_cells), + NULL, 0, NULL); + if (ret) + return ret; + + if (of_find_property(dev->of_node, "#cooling-cells", NULL)) + return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + khadas_mcu_fan_cells, + ARRAY_SIZE(khadas_mcu_fan_cells), + NULL, 0, NULL); + + return 0; +} + +static const struct of_device_id khadas_mcu_of_match[] = { + { .compatible = "khadas,mcu", }, + {}, +}; +MODULE_DEVICE_TABLE(of, khadas_mcu_of_match); + +static struct i2c_driver khadas_mcu_driver = { + .driver = { + .name = "khadas-mcu-core", + .of_match_table = of_match_ptr(khadas_mcu_of_match), + }, + .probe = khadas_mcu_probe, +}; +module_i2c_driver(khadas_mcu_driver); + +MODULE_DESCRIPTION("Khadas MCU core driver"); +MODULE_AUTHOR("Neil Armstrong "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/khadas-mcu.h b/include/linux/mfd/khadas-mcu.h new file mode 100644 index 000000000000..a99ba2ed0e4e --- /dev/null +++ b/include/linux/mfd/khadas-mcu.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Khadas System control Microcontroller Register map + * + * Copyright (C) 2020 BayLibre SAS + * + * Author(s): Neil Armstrong + */ + +#ifndef MFD_KHADAS_MCU_H +#define MFD_KHADAS_MCU_H + +#define KHADAS_MCU_PASSWD_VEN_0_REG 0x00 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_1_REG 0x01 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_2_REG 0x02 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_3_REG 0x03 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_4_REG 0x04 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_5_REG 0x05 /* RO */ +#define KHADAS_MCU_MAC_0_REG 0x06 /* RO */ +#define KHADAS_MCU_MAC_1_REG 0x07 /* RO */ +#define KHADAS_MCU_MAC_2_REG 0x08 /* RO */ +#define KHADAS_MCU_MAC_3_REG 0x09 /* RO */ +#define KHADAS_MCU_MAC_4_REG 0x0a /* RO */ +#define KHADAS_MCU_MAC_5_REG 0x0b /* RO */ +#define KHADAS_MCU_USID_0_REG 0x0c /* RO */ +#define KHADAS_MCU_USID_1_REG 0x0d /* RO */ +#define KHADAS_MCU_USID_2_REG 0x0e /* RO */ +#define KHADAS_MCU_USID_3_REG 0x0f /* RO */ +#define KHADAS_MCU_USID_4_REG 0x10 /* RO */ +#define KHADAS_MCU_USID_5_REG 0x11 /* RO */ +#define KHADAS_MCU_VERSION_0_REG 0x12 /* RO */ +#define KHADAS_MCU_VERSION_1_REG 0x13 /* RO */ +#define KHADAS_MCU_DEVICE_NO_0_REG 0x14 /* RO */ +#define KHADAS_MCU_DEVICE_NO_1_REG 0x15 /* RO */ +#define KHADAS_MCU_FACTORY_TEST_REG 0x16 /* R */ +#define KHADAS_MCU_BOOT_MODE_REG 0x20 /* RW */ +#define KHADAS_MCU_BOOT_EN_WOL_REG 0x21 /* RW */ +#define KHADAS_MCU_BOOT_EN_RTC_REG 0x22 /* RW */ +#define KHADAS_MCU_BOOT_EN_EXP_REG 0x23 /* RW */ +#define KHADAS_MCU_BOOT_EN_IR_REG 0x24 /* RW */ +#define KHADAS_MCU_BOOT_EN_DCIN_REG 0x25 /* RW */ +#define KHADAS_MCU_BOOT_EN_KEY_REG 0x26 /* RW */ +#define KHADAS_MCU_KEY_MODE_REG 0x27 /* RW */ +#define KHADAS_MCU_LED_MODE_ON_REG 0x28 /* RW */ +#define KHADAS_MCU_LED_MODE_OFF_REG 0x29 /* RW */ +#define KHADAS_MCU_SHUTDOWN_NORMAL_REG 0x2c /* RW */ +#define KHADAS_MCU_MAC_SWITCH_REG 0x2d /* RW */ +#define KHADAS_MCU_MCU_SLEEP_MODE_REG 0x2e /* RW */ +#define KHADAS_MCU_IR_CODE1_0_REG 0x2f /* RW */ +#define KHADAS_MCU_IR_CODE1_1_REG 0x30 /* RW */ +#define KHADAS_MCU_IR_CODE1_2_REG 0x31 /* RW */ +#define KHADAS_MCU_IR_CODE1_3_REG 0x32 /* RW */ +#define KHADAS_MCU_USB_PCIE_SWITCH_REG 0x33 /* RW */ +#define KHADAS_MCU_IR_CODE2_0_REG 0x34 /* RW */ +#define KHADAS_MCU_IR_CODE2_1_REG 0x35 /* RW */ +#define KHADAS_MCU_IR_CODE2_2_REG 0x36 /* RW */ +#define KHADAS_MCU_IR_CODE2_3_REG 0x37 /* RW */ +#define KHADAS_MCU_PASSWD_USER_0_REG 0x40 /* RW */ +#define KHADAS_MCU_PASSWD_USER_1_REG 0x41 /* RW */ +#define KHADAS_MCU_PASSWD_USER_2_REG 0x42 /* RW */ +#define KHADAS_MCU_PASSWD_USER_3_REG 0x43 /* RW */ +#define KHADAS_MCU_PASSWD_USER_4_REG 0x44 /* RW */ +#define KHADAS_MCU_PASSWD_USER_5_REG 0x45 /* RW */ +#define KHADAS_MCU_USER_DATA_0_REG 0x46 /* RW 56 bytes */ +#define KHADAS_MCU_PWR_OFF_CMD_REG 0x80 /* WO */ +#define KHADAS_MCU_PASSWD_START_REG 0x81 /* WO */ +#define KHADAS_MCU_CHECK_VEN_PASSWD_REG 0x82 /* WO */ +#define KHADAS_MCU_CHECK_USER_PASSWD_REG 0x83 /* WO */ +#define KHADAS_MCU_SHUTDOWN_NORMAL_STATUS_REG 0x86 /* RO */ +#define KHADAS_MCU_WOL_INIT_START_REG 0x87 /* WO */ +#define KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG 0x88 /* WO */ + +enum { + KHADAS_BOARD_VIM1 = 0x1, + KHADAS_BOARD_VIM2, + KHADAS_BOARD_VIM3, + KHADAS_BOARD_EDGE = 0x11, + KHADAS_BOARD_EDGE_V, +}; + +/** + * struct khadas_mcu - Khadas MCU structure + * @device: device reference used for logs + * @regmap: register map + */ +struct khadas_mcu { + struct device *dev; + struct regmap *regmap; +}; + +#endif /* MFD_KHADAS_MCU_H */ -- cgit From 4a95a1b20da31db0c1ca5289a892fef21f691f27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Jun 2020 11:54:01 +0100 Subject: KVM: arm64: Enable Address Authentication at EL2 if available While initializing EL2, enable Address Authentication if detected from EL1. We still use the EL1-provided keys though. Acked-by: Andrew Scull Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp-init.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6e6ed5581eed..1587d146726a 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -104,6 +104,11 @@ alternative_else_nop_endif */ mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) CPU_BE( orr x4, x4, #SCTLR_ELx_EE) +alternative_if ARM64_HAS_ADDRESS_AUTH + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + orr x4, x4, x5 +alternative_else_nop_endif msr sctlr_el2, x4 isb -- cgit From dfb0589c8fff9dde809b9013335a6b8019b299b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Jun 2020 12:18:58 +0100 Subject: KVM: arm64: Allow ARM64_PTR_AUTH when ARM64_VHE=n We currently prevent PtrAuth from even being built if KVM is selected, but VHE isn't. It is a bit of a pointless restriction, since we also check this at run time (rejecting the enabling of PtrAuth for the vcpu if we're not running with VHE). Just drop this apparently useless restriction. Acked-by: Andrew Scull Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..d719ea9c596d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1516,7 +1516,6 @@ menu "ARMv8.3 architectural features" config ARM64_PTR_AUTH bool "Enable support for pointer authentication" default y - depends on !KVM || ARM64_VHE depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC # GCC 9.1 and later inserts a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. @@ -1543,8 +1542,7 @@ config ARM64_PTR_AUTH The feature is detected at runtime. If the feature is not present in hardware it will not be advertised to userspace/KVM guest nor will it - be enabled. However, KVM guest also require VHE mode and hence - CONFIG_ARM64_VHE=y option to use this feature. + be enabled. If the feature is present on the boot CPU but not on a late CPU, then the late CPU will be parked. Also, if the boot CPU does not have -- cgit From aff7cce0d337f5468470d614ef83a507dcf4c93f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Jun 2020 13:20:28 +0100 Subject: KVM: arm64: Allow PtrAuth to be enabled from userspace on non-VHE systems Now that the scene is set for enabling PtrAuth on non-VHE, drop the restrictions preventing userspace from enabling it. Acked-by: Andrew Scull Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d3b209023727..2a929789fe2e 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,6 +42,11 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) +static bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + /** * kvm_arch_vm_ioctl_check_extension * @@ -80,8 +85,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = has_vhe() && system_supports_address_auth() && - system_supports_generic_auth(); + r = system_has_full_ptr_auth(); break; default: r = 0; @@ -205,19 +209,14 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) { - /* Support ptrauth only if the system supports these capabilities. */ - if (!has_vhe()) - return -EINVAL; - - if (!system_supports_address_auth() || - !system_supports_generic_auth()) - return -EINVAL; /* * For now make sure that both address/generic pointer authentication - * features are requested by the userspace together. + * features are requested by the userspace together and the system + * supports these capabilities. */ if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || + !system_has_full_ptr_auth()) return -EINVAL; vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; -- cgit From 655169cec7bbf84f59faa9f824edb581eaecf78d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Jun 2020 11:26:09 +0100 Subject: KVM: arm64: Check HCR_EL2 instead of shadow copy to swap PtrAuth registers When save/restoring PtrAuth registers between host and guest, it is pretty useless to fetch the in-memory state, while we have the right state in the HCR_EL2 system register. Use that instead. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_ptrauth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6301813dcace..f1830173fa9e 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -74,7 +74,7 @@ alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF b 1001f alternative_else_nop_endif 1000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) cbz \reg1, 1001f add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 @@ -90,7 +90,7 @@ alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF b 2001f alternative_else_nop_endif 2000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) cbz \reg1, 2001f add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 -- cgit From 11ac16a4290b72efa8328cb144b727f3cbbdfa6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Jun 2020 21:03:30 +0100 Subject: KVM: arm64: Simplify PtrAuth alternative patching We currently decide to execute the PtrAuth save/restore code based on a set of branches that evaluate as (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF). This can be easily replaced by a much simpler test as the ARM64_HAS_ADDRESS_AUTH capability is exactly this expression. Suggested-by: Mark Rutland Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_ptrauth.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index f1830173fa9e..0ddf98c3ba9f 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -61,44 +61,36 @@ /* * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will - * check for the presence of one of the cpufeature flag - * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and + * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as + * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication - * key registers. + * key registers if enabled for the guest. */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 1000f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 1001f -alternative_else_nop_endif -1000: mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 1001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 -1001: +.L__skip_switch\@: .endm .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 2000f -alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 2001f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -2000: mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 2001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_save_state \reg1, \reg2, \reg3 add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 isb -2001: +.L__skip_switch\@: .endm #else /* !CONFIG_ARM64_PTR_AUTH */ -- cgit From 5772717e59b9fbdc9e97da606dd69a75174f8101 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 24 Jun 2020 18:15:27 +0200 Subject: thermal: Add support for the MCU controlled FAN on Khadas boards The new Khadas VIM2 and VIM3 boards controls the cooling fan via the on-board microcontroller. This implements the FAN control as thermal devices and as cell of the Khadas MCU MFD driver. Signed-off-by: Neil Armstrong Reviewed-by: Amit Kucheria Acked-by: Daniel Lezcano Signed-off-by: Lee Jones --- drivers/thermal/Kconfig | 11 +++ drivers/thermal/Makefile | 1 + drivers/thermal/khadas_mcu_fan.c | 162 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+) create mode 100644 drivers/thermal/khadas_mcu_fan.c diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 3eb2348e5242..0125561488c9 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -500,4 +500,15 @@ config SPRD_THERMAL help Support for the Spreadtrum thermal sensor driver in the Linux thermal framework. + +config KHADAS_MCU_FAN_THERMAL + tristate "Khadas MCU controller FAN cooling support" + depends on OF || COMPILE_TEST + depends on MFD_KHADAS_MCU + select MFD_CORE + select REGMAP + help + If you say yes here you get support for the FAN controlled + by the Microcontroller found on the Khadas VIM boards. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 0c8b84a09b9a..4b6aabaa7e31 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -61,3 +61,4 @@ obj-$(CONFIG_ZX2967_THERMAL) += zx2967_thermal.o obj-$(CONFIG_UNIPHIER_THERMAL) += uniphier_thermal.o obj-$(CONFIG_AMLOGIC_THERMAL) += amlogic_thermal.o obj-$(CONFIG_SPRD_THERMAL) += sprd_thermal.o +obj-$(CONFIG_KHADAS_MCU_FAN_THERMAL) += khadas_mcu_fan.o diff --git a/drivers/thermal/khadas_mcu_fan.c b/drivers/thermal/khadas_mcu_fan.c new file mode 100644 index 000000000000..9eadd2d6413e --- /dev/null +++ b/drivers/thermal/khadas_mcu_fan.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Khadas MCU Controlled FAN driver + * + * Copyright (C) 2020 BayLibre SAS + * Author(s): Neil Armstrong + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_LEVEL 3 + +struct khadas_mcu_fan_ctx { + struct khadas_mcu *mcu; + unsigned int level; + struct thermal_cooling_device *cdev; +}; + +static int khadas_mcu_fan_set_level(struct khadas_mcu_fan_ctx *ctx, + unsigned int level) +{ + int ret; + + ret = regmap_write(ctx->mcu->regmap, KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG, + level); + if (ret) + return ret; + + ctx->level = level; + + return 0; +} + +static int khadas_mcu_fan_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = MAX_LEVEL; + + return 0; +} + +static int khadas_mcu_fan_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct khadas_mcu_fan_ctx *ctx = cdev->devdata; + + *state = ctx->level; + + return 0; +} + +static int +khadas_mcu_fan_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long state) +{ + struct khadas_mcu_fan_ctx *ctx = cdev->devdata; + + if (state > MAX_LEVEL) + return -EINVAL; + + if (state == ctx->level) + return 0; + + return khadas_mcu_fan_set_level(ctx, state); +} + +static const struct thermal_cooling_device_ops khadas_mcu_fan_cooling_ops = { + .get_max_state = khadas_mcu_fan_get_max_state, + .get_cur_state = khadas_mcu_fan_get_cur_state, + .set_cur_state = khadas_mcu_fan_set_cur_state, +}; + +static int khadas_mcu_fan_probe(struct platform_device *pdev) +{ + struct khadas_mcu *mcu = dev_get_drvdata(pdev->dev.parent); + struct thermal_cooling_device *cdev; + struct device *dev = &pdev->dev; + struct khadas_mcu_fan_ctx *ctx; + int ret; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->mcu = mcu; + platform_set_drvdata(pdev, ctx); + + cdev = devm_thermal_of_cooling_device_register(dev->parent, + dev->parent->of_node, "khadas-mcu-fan", ctx, + &khadas_mcu_fan_cooling_ops); + if (IS_ERR(cdev)) { + ret = PTR_ERR(cdev); + dev_err(dev, "Failed to register khadas-mcu-fan as cooling device: %d\n", + ret); + return ret; + } + ctx->cdev = cdev; + thermal_cdev_update(cdev); + + return 0; +} + +static void khadas_mcu_fan_shutdown(struct platform_device *pdev) +{ + struct khadas_mcu_fan_ctx *ctx = platform_get_drvdata(pdev); + + khadas_mcu_fan_set_level(ctx, 0); +} + +#ifdef CONFIG_PM_SLEEP +static int khadas_mcu_fan_suspend(struct device *dev) +{ + struct khadas_mcu_fan_ctx *ctx = dev_get_drvdata(dev); + unsigned int level_save = ctx->level; + int ret; + + ret = khadas_mcu_fan_set_level(ctx, 0); + if (ret) + return ret; + + ctx->level = level_save; + + return 0; +} + +static int khadas_mcu_fan_resume(struct device *dev) +{ + struct khadas_mcu_fan_ctx *ctx = dev_get_drvdata(dev); + + return khadas_mcu_fan_set_level(ctx, ctx->level); +} +#endif + +static SIMPLE_DEV_PM_OPS(khadas_mcu_fan_pm, khadas_mcu_fan_suspend, + khadas_mcu_fan_resume); + +static const struct platform_device_id khadas_mcu_fan_id_table[] = { + { .name = "khadas-mcu-fan-ctrl", }, + {}, +}; +MODULE_DEVICE_TABLE(platform, khadas_mcu_fan_id_table); + +static struct platform_driver khadas_mcu_fan_driver = { + .probe = khadas_mcu_fan_probe, + .shutdown = khadas_mcu_fan_shutdown, + .driver = { + .name = "khadas-mcu-fan-ctrl", + .pm = &khadas_mcu_fan_pm, + }, + .id_table = khadas_mcu_fan_id_table, +}; + +module_platform_driver(khadas_mcu_fan_driver); + +MODULE_AUTHOR("Neil Armstrong "); +MODULE_DESCRIPTION("Khadas MCU FAN driver"); +MODULE_LICENSE("GPL"); -- cgit From bf9367a156fd868b312e8a575c00d57db2bc16bf Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Jun 2020 11:17:38 +0200 Subject: MAINTAINERS: Add myself as maintainer for Khadas MCU drivers Add the Thermal driver along the MFD drivers and header as Maintained by myself. Signed-off-by: Neil Armstrong Signed-off-by: Lee Jones --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68f21d46614c..02d9827a4f81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9574,6 +9574,15 @@ F: include/linux/kdb.h F: include/linux/kgdb.h F: kernel/debug/ +KHADAS MCU MFD DRIVER +M: Neil Armstrong +L: linux-amlogic@lists.infradead.org +S: Maintained +F: Documentation/devicetree/bindings/mfd/khadas,mcu.yaml +F: drivers/mfd/khadas-mcu.c +F: include/linux/mfd/khadas-mcu.h +F: drivers/thermal/khadas_mcu_fan.c + KMEMLEAK M: Catalin Marinas S: Maintained -- cgit From c61e165822d57fd317868d14b46151d9c83662d2 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 19 Jun 2020 17:09:14 +0800 Subject: mfd: sprd: Populate sub-devices defined in DT SC27XX-SPI added subdevices according to a pre-defined mfd_cell array, no matter these devices were really included on board. In this patch, switch to use devm_of_platform_populate() for adding sub-devices which are defined in devicetree. Signed-off-by: Chunyan Zhang Signed-off-by: Lee Jones --- drivers/mfd/sprd-sc27xx-spi.c | 75 ++----------------------------------------- 1 file changed, 3 insertions(+), 72 deletions(-) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 33336cde4724..d030a5da5544 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -93,73 +94,6 @@ enum usb_charger_type sprd_pmic_detect_charger_type(struct device *dev) } EXPORT_SYMBOL_GPL(sprd_pmic_detect_charger_type); -static const struct mfd_cell sprd_pmic_devs[] = { - { - .name = "sc27xx-wdt", - .of_compatible = "sprd,sc2731-wdt", - }, { - .name = "sc27xx-rtc", - .of_compatible = "sprd,sc2731-rtc", - }, { - .name = "sc27xx-charger", - .of_compatible = "sprd,sc2731-charger", - }, { - .name = "sc27xx-chg-timer", - .of_compatible = "sprd,sc2731-chg-timer", - }, { - .name = "sc27xx-fast-chg", - .of_compatible = "sprd,sc2731-fast-chg", - }, { - .name = "sc27xx-chg-wdt", - .of_compatible = "sprd,sc2731-chg-wdt", - }, { - .name = "sc27xx-typec", - .of_compatible = "sprd,sc2731-typec", - }, { - .name = "sc27xx-flash", - .of_compatible = "sprd,sc2731-flash", - }, { - .name = "sc27xx-eic", - .of_compatible = "sprd,sc2731-eic", - }, { - .name = "sc27xx-efuse", - .of_compatible = "sprd,sc2731-efuse", - }, { - .name = "sc27xx-thermal", - .of_compatible = "sprd,sc2731-thermal", - }, { - .name = "sc27xx-adc", - .of_compatible = "sprd,sc2731-adc", - }, { - .name = "sc27xx-audio-codec", - .of_compatible = "sprd,sc2731-audio-codec", - }, { - .name = "sc27xx-regulator", - .of_compatible = "sprd,sc2731-regulator", - }, { - .name = "sc27xx-vibrator", - .of_compatible = "sprd,sc2731-vibrator", - }, { - .name = "sc27xx-keypad-led", - .of_compatible = "sprd,sc2731-keypad-led", - }, { - .name = "sc27xx-bltc", - .of_compatible = "sprd,sc2731-bltc", - }, { - .name = "sc27xx-fgu", - .of_compatible = "sprd,sc2731-fgu", - }, { - .name = "sc27xx-7sreset", - .of_compatible = "sprd,sc2731-7sreset", - }, { - .name = "sc27xx-poweroff", - .of_compatible = "sprd,sc2731-poweroff", - }, { - .name = "sc27xx-syscon", - .of_compatible = "sprd,sc2731-syscon", - }, -}; - static int sprd_pmic_spi_write(void *context, const void *data, size_t count) { struct device *dev = context; @@ -263,12 +197,9 @@ static int sprd_pmic_probe(struct spi_device *spi) return ret; } - ret = devm_mfd_add_devices(&spi->dev, PLATFORM_DEVID_AUTO, - sprd_pmic_devs, ARRAY_SIZE(sprd_pmic_devs), - NULL, 0, - regmap_irq_get_domain(ddata->irq_data)); + ret = devm_of_platform_populate(&spi->dev); if (ret) { - dev_err(&spi->dev, "Failed to register device %d\n", ret); + dev_err(&spi->dev, "Failed to populate sub-devices %d\n", ret); return ret; } -- cgit From e8da9ed039805a52c32c4ee2f462080aa2315929 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 20 Feb 2020 17:22:46 +0100 Subject: dt-bindings: mfd: Convert stmfx bindings to json-schema Convert stmfx bindings to json-schema. Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Linus Walleij Signed-off-by: Lee Jones --- .../devicetree/bindings/mfd/st,stmfx.yaml | 124 +++++++++++++++++++++ Documentation/devicetree/bindings/mfd/stmfx.txt | 28 ----- .../devicetree/bindings/pinctrl/pinctrl-stmfx.txt | 116 ------------------- 3 files changed, 124 insertions(+), 144 deletions(-) create mode 100644 Documentation/devicetree/bindings/mfd/st,stmfx.yaml delete mode 100644 Documentation/devicetree/bindings/mfd/stmfx.txt delete mode 100644 Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml new file mode 100644 index 000000000000..0ce56a0da553 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/st,stmfx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: STMicroelectonics Multi-Function eXpander (STMFX) bindings + +description: ST Multi-Function eXpander (STMFX) is a slave controller using I2C for + communication with the main MCU. Its main features are GPIO expansion, + main MCU IDD measurement (IDD is the amount of current that flows + through VDD) and resistive touchscreen controller. + +maintainers: + - Amelie Delaunay + +properties: + compatible: + const: st,stmfx-0300 + + reg: + enum: [ 0x42, 0x43 ] + + interrupts: + maxItems: 1 + + drive-open-drain: true + + vdd-supply: + maxItems: 1 + + pinctrl: + type: object + + properties: + compatible: + const: st,stmfx-0300-pinctrl + + "#gpio-cells": + const: 2 + + "#interrupt-cells": + const: 2 + + gpio-controller: true + + interrupt-controller: true + + gpio-ranges: + description: if all STMFX pins[24:0] are available (no other STMFX function in use), + you should use gpio-ranges = <&stmfx_pinctrl 0 0 24>; + if agpio[3:0] are not available (STMFX Touchscreen function in use), + you should use gpio-ranges = <&stmfx_pinctrl 0 0 16>, <&stmfx_pinctrl 20 20 4>; + if agpio[7:4] are not available (STMFX IDD function in use), + you should use gpio-ranges = <&stmfx_pinctrl 0 0 20>; + maxItems: 1 + + patternProperties: + "^[a-zA-Z]*-pins$": + type: object + + allOf: + - $ref: ../pinctrl/pinmux-node.yaml + + properties: + pins: true + bias-disable: true + bias-pull-up: true + bias-pull-pin-default: true + bias-pull-down: true + drive-open-drain: true + drive-push-pull: true + output-high: true + output-low: true + + additionalProperties: false + + additionalProperties: false + + required: + - compatible + - "#gpio-cells" + - "#interrupt-cells" + - gpio-controller + - interrupt-controller + - gpio-ranges + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + +examples: + - | + #include + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + stmfx@42 { + compatible = "st,stmfx-0300"; + reg = <0x42>; + interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupt-parent = <&gpioi>; + vdd-supply = <&v3v3>; + + stmfx_pinctrl: pinctrl { + compatible = "st,stmfx-0300-pinctrl"; + #gpio-cells = <2>; + #interrupt-cells = <2>; + gpio-controller; + interrupt-controller; + gpio-ranges = <&stmfx_pinctrl 0 0 24>; + + joystick_pins: joystick-pins { + pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; + drive-push-pull; + bias-pull-up; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/mfd/stmfx.txt b/Documentation/devicetree/bindings/mfd/stmfx.txt deleted file mode 100644 index f0c2f7fcf5c7..000000000000 --- a/Documentation/devicetree/bindings/mfd/stmfx.txt +++ /dev/null @@ -1,28 +0,0 @@ -STMicroelectonics Multi-Function eXpander (STMFX) Core bindings - -ST Multi-Function eXpander (STMFX) is a slave controller using I2C for -communication with the main MCU. Its main features are GPIO expansion, main -MCU IDD measurement (IDD is the amount of current that flows through VDD) and -resistive touchscreen controller. - -Required properties: -- compatible: should be "st,stmfx-0300". -- reg: I2C slave address of the device. -- interrupts: interrupt specifier triggered by MFX_IRQ_OUT signal. - Please refer to ../interrupt-controller/interrupt.txt - -Optional properties: -- drive-open-drain: configure MFX_IRQ_OUT as open drain. -- vdd-supply: phandle of the regulator supplying STMFX. - -Example: - - stmfx: stmfx@42 { - compatible = "st,stmfx-0300"; - reg = <0x42>; - interrupts = <8 IRQ_TYPE_EDGE_RISING>; - interrupt-parent = <&gpioi>; - vdd-supply = <&v3v3>; - }; - -Please refer to ../pinctrl/pinctrl-stmfx.txt for STMFX GPIO expander function bindings. diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt deleted file mode 100644 index c1b4c1819b84..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt +++ /dev/null @@ -1,116 +0,0 @@ -STMicroelectronics Multi-Function eXpander (STMFX) GPIO expander bindings - -ST Multi-Function eXpander (STMFX) offers up to 24 GPIOs expansion. -Please refer to ../mfd/stmfx.txt for STMFX Core bindings. - -Required properties: -- compatible: should be "st,stmfx-0300-pinctrl". -- #gpio-cells: should be <2>, the first cell is the GPIO number and the second - cell is the gpio flags in accordance with . -- gpio-controller: marks the device as a GPIO controller. -- #interrupt-cells: should be <2>, the first cell is the GPIO number and the - second cell is the interrupt flags in accordance with - . -- interrupt-controller: marks the device as an interrupt controller. -- gpio-ranges: specifies the mapping between gpio controller and pin - controller pins. Check "Concerning gpio-ranges property" below. -Please refer to ../gpio/gpio.txt. - -Please refer to pinctrl-bindings.txt for pin configuration. - -Required properties for pin configuration sub-nodes: -- pins: list of pins to which the configuration applies. - -Optional properties for pin configuration sub-nodes (pinconf-generic ones): -- bias-disable: disable any bias on the pin. -- bias-pull-up: the pin will be pulled up. -- bias-pull-pin-default: use the pin-default pull state. -- bias-pull-down: the pin will be pulled down. -- drive-open-drain: the pin will be driven with open drain. -- drive-push-pull: the pin will be driven actively high and low. -- output-high: the pin will be configured as an output driving high level. -- output-low: the pin will be configured as an output driving low level. - -Note that STMFX pins[15:0] are called "gpio[15:0]", and STMFX pins[23:16] are -called "agpio[7:0]". Example, to refer to pin 18 of STMFX, use "agpio2". - -Concerning gpio-ranges property: -- if all STMFX pins[24:0] are available (no other STMFX function in use), you - should use gpio-ranges = <&stmfx_pinctrl 0 0 24>; -- if agpio[3:0] are not available (STMFX Touchscreen function in use), you - should use gpio-ranges = <&stmfx_pinctrl 0 0 16>, <&stmfx_pinctrl 20 20 4>; -- if agpio[7:4] are not available (STMFX IDD function in use), you - should use gpio-ranges = <&stmfx_pinctrl 0 0 20>; - - -Example: - - stmfx: stmfx@42 { - ... - - stmfx_pinctrl: stmfx-pin-controller { - compatible = "st,stmfx-0300-pinctrl"; - #gpio-cells = <2>; - #interrupt-cells = <2>; - gpio-controller; - interrupt-controller; - gpio-ranges = <&stmfx_pinctrl 0 0 24>; - - joystick_pins: joystick { - pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; - drive-push-pull; - bias-pull-up; - }; - }; - }; - -Example of STMFX GPIO consumers: - - joystick { - compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; - pinctrl-0 = <&joystick_pins>; - pinctrl-names = "default"; - button-0 { - label = "JoySel"; - linux,code = ; - interrupt-parent = <&stmfx_pinctrl>; - interrupts = <0 IRQ_TYPE_EDGE_RISING>; - }; - button-1 { - label = "JoyDown"; - linux,code = ; - interrupt-parent = <&stmfx_pinctrl>; - interrupts = <1 IRQ_TYPE_EDGE_RISING>; - }; - button-2 { - label = "JoyLeft"; - linux,code = ; - interrupt-parent = <&stmfx_pinctrl>; - interrupts = <2 IRQ_TYPE_EDGE_RISING>; - }; - button-3 { - label = "JoyRight"; - linux,code = ; - interrupt-parent = <&stmfx_pinctrl>; - interrupts = <3 IRQ_TYPE_EDGE_RISING>; - }; - button-4 { - label = "JoyUp"; - linux,code = ; - interrupt-parent = <&stmfx_pinctrl>; - interrupts = <4 IRQ_TYPE_EDGE_RISING>; - }; - }; - - leds { - compatible = "gpio-leds"; - orange { - gpios = <&stmfx_pinctrl 17 1>; - }; - - blue { - gpios = <&stmfx_pinctrl 19 1>; - }; - } -- cgit From bb7fcad48d3804d814b97c785514e2d1657e157f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Jun 2020 16:10:36 +0300 Subject: mfd: intel-lpss: Add Intel Tiger Lake PCH-H PCI IDs Intel Tiger Lake PCH-H has the same LPSS than Intel Broxton. Add the new IDs to the list of supported devices. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss-pci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 17bcadc00a11..9a58032f818a 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -233,6 +233,22 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x34ea), (kernel_ulong_t)&bxt_i2c_info }, { PCI_VDEVICE(INTEL, 0x34eb), (kernel_ulong_t)&bxt_i2c_info }, { PCI_VDEVICE(INTEL, 0x34fb), (kernel_ulong_t)&spt_info }, + /* TGL-H */ + { PCI_VDEVICE(INTEL, 0x43a7), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x43a8), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x43a9), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x43aa), (kernel_ulong_t)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x43ab), (kernel_ulong_t)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x43ad), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43ae), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43d8), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43da), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0x43e8), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43e9), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43ea), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43eb), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x43fb), (kernel_ulong_t)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x43fd), (kernel_ulong_t)&bxt_info }, /* EHL */ { PCI_VDEVICE(INTEL, 0x4b28), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x4b29), (kernel_ulong_t)&bxt_uart_info }, -- cgit From f375a038daf655933dae0c54542a071388c02880 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Jun 2020 10:33:31 +0300 Subject: MAINTAINERS: Add entry for ROHM Power Management ICs Add entry for maintaining power management IC drivers for ROHM BD71837, BD71847, BD71850, BD71828, BD71878, BD70528 and BD99954. Signed-off-by: Matti Vaittinen Acked-by: Sebastian Reichel Acked-by: Guenter Roeck Acked-by: Stephen Boyd Reviewed-by: Linus Walleij Signed-off-by: Lee Jones --- MAINTAINERS | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 02d9827a4f81..423f7107ace4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14713,6 +14713,13 @@ L: linux-serial@vger.kernel.org S: Odd Fixes F: drivers/tty/serial/rp2.* +ROHM BD99954 CHARGER IC +R: Matti Vaittinen +L: linux-power@fi.rohmeurope.com +S: Supported +F: drivers/power/supply/bd99954-charger.c +F: drivers/power/supply/bd99954-charger.h + ROHM BH1750 AMBIENT LIGHT SENSOR DRIVER M: Tomasz Duszynski S: Maintained @@ -14730,6 +14737,31 @@ F: drivers/mfd/bd9571mwv.c F: drivers/regulator/bd9571mwv-regulator.c F: include/linux/mfd/bd9571mwv.h +ROHM POWER MANAGEMENT IC DEVICE DRIVERS +R: Matti Vaittinen +L: linux-power@fi.rohmeurope.com +S: Supported +F: Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt +F: Documentation/devicetree/bindings/regulator/rohm,bd70528-regulator.txt +F: drivers/clk/clk-bd718x7.c +F: drivers/gpio/gpio-bd70528.c +F: drivers/gpio/gpio-bd71828.c +F: drivers/mfd/rohm-bd70528.c +F: drivers/mfd/rohm-bd71828.c +F: drivers/mfd/rohm-bd718x7.c +F: drivers/power/supply/bd70528-charger.c +F: drivers/regulator/bd70528-regulator.c +F: drivers/regulator/bd71828-regulator.c +F: drivers/regulator/bd718x7-regulator.c +F: drivers/regulator/rohm-regulator.c +F: drivers/rtc/rtc-bd70528.c +F: drivers/watchdog/bd70528_wdt.c +F: include/linux/mfd/rohm-bd70528.h +F: include/linux/mfd/rohm-bd71828.h +F: include/linux/mfd/rohm-bd718x7.h +F: include/linux/mfd/rohm-generic.h +F: include/linux/mfd/rohm-shared.h + ROSE NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org -- cgit From f1d8fe2e98d11788cabb2ad5c5becfcc95dd81a9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 30 Jun 2020 08:44:43 -0300 Subject: dt-bindings: mfd: st,stmfx: Remove extra additionalProperties The following build error is seen with 'make dt_binding_check': CHKDT Documentation/devicetree/bindings/mfd/st,stmfx.yaml /home/fabio/linux-next/Documentation/devicetree/bindings/mfd/st,stmfx.yaml: properties:pinctrl:patternProperties: {'enum': ['$ref', 'additionalItems', 'additionalProperties', 'allOf', 'anyOf', 'const', 'contains', 'default', 'dependencies', 'deprecated', 'description', 'else', 'enum', 'if', 'items', 'maxItems', 'maximum', 'minItems', 'minimum', 'multipleOf', 'not', 'oneOf', 'pattern', 'patternProperties', 'properties', 'propertyNames', 'required', 'then', 'unevaluatedProperties']} is not allowed for 'additionalProperties' Remove the extra 'additionalProperties' to pass the build. Signed-off-by: Fabio Estevam Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/st,stmfx.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml index 0ce56a0da553..bed22d4abffb 100644 --- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -73,8 +73,6 @@ properties: output-high: true output-low: true - additionalProperties: false - additionalProperties: false required: -- cgit From 4d3ec936f80dbb2174c8c1f426eb86c032e9f14e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Jun 2020 10:39:48 +0200 Subject: mfd: lm3533: Expand control-bank accessors Expand the control-bank accessors that were implemented using macros. This allows the definitions of these exported functions to be found more easily and specifically avoids a W=1 compiler warning due to the redundant brightness sanity check: drivers/mfd/lm3533-ctrlbank.c: In function 'lm3533_ctrlbank_set_brightness': drivers/mfd/lm3533-ctrlbank.c:98:10: warning: comparison is always false due to limited range of data type [-Wtype-limits] 98 | if (val > LM3533_##_NAME##_MAX) \ | ^ drivers/mfd/lm3533-ctrlbank.c:125:1: note: in expansion of macro 'lm3533_ctrlbank_set' 125 | lm3533_ctrlbank_set(brightness, BRIGHTNESS); | ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Johan Hovold Signed-off-by: Lee Jones --- drivers/mfd/lm3533-ctrlbank.c | 94 ++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/drivers/mfd/lm3533-ctrlbank.c b/drivers/mfd/lm3533-ctrlbank.c index 34fba06ec705..2537dfade51c 100644 --- a/drivers/mfd/lm3533-ctrlbank.c +++ b/drivers/mfd/lm3533-ctrlbank.c @@ -17,7 +17,6 @@ #define LM3533_MAX_CURRENT_MAX 29800 #define LM3533_MAX_CURRENT_STEP 800 -#define LM3533_BRIGHTNESS_MAX 255 #define LM3533_PWM_MAX 0x3f #define LM3533_REG_PWM_BASE 0x14 @@ -89,41 +88,33 @@ int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u16 imax) } EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_max_current); -#define lm3533_ctrlbank_set(_name, _NAME) \ -int lm3533_ctrlbank_set_##_name(struct lm3533_ctrlbank *cb, u8 val) \ -{ \ - u8 reg; \ - int ret; \ - \ - if (val > LM3533_##_NAME##_MAX) \ - return -EINVAL; \ - \ - reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE); \ - ret = lm3533_write(cb->lm3533, reg, val); \ - if (ret) \ - dev_err(cb->dev, "failed to set " #_name "\n"); \ - \ - return ret; \ -} \ -EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_##_name); - -#define lm3533_ctrlbank_get(_name, _NAME) \ -int lm3533_ctrlbank_get_##_name(struct lm3533_ctrlbank *cb, u8 *val) \ -{ \ - u8 reg; \ - int ret; \ - \ - reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE); \ - ret = lm3533_read(cb->lm3533, reg, val); \ - if (ret) \ - dev_err(cb->dev, "failed to get " #_name "\n"); \ - \ - return ret; \ -} \ -EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_##_name); - -lm3533_ctrlbank_set(brightness, BRIGHTNESS); -lm3533_ctrlbank_get(brightness, BRIGHTNESS); +int lm3533_ctrlbank_set_brightness(struct lm3533_ctrlbank *cb, u8 val) +{ + u8 reg; + int ret; + + reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_BRIGHTNESS_BASE); + ret = lm3533_write(cb->lm3533, reg, val); + if (ret) + dev_err(cb->dev, "failed to set brightness\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_brightness); + +int lm3533_ctrlbank_get_brightness(struct lm3533_ctrlbank *cb, u8 *val) +{ + u8 reg; + int ret; + + reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_BRIGHTNESS_BASE); + ret = lm3533_read(cb->lm3533, reg, val); + if (ret) + dev_err(cb->dev, "failed to get brightness\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_brightness); /* * PWM-input control mask: @@ -135,9 +126,36 @@ lm3533_ctrlbank_get(brightness, BRIGHTNESS); * bit 1 - PWM-input enabled in Zone 0 * bit 0 - PWM-input enabled */ -lm3533_ctrlbank_set(pwm, PWM); -lm3533_ctrlbank_get(pwm, PWM); +int lm3533_ctrlbank_set_pwm(struct lm3533_ctrlbank *cb, u8 val) +{ + u8 reg; + int ret; + + if (val > LM3533_PWM_MAX) + return -EINVAL; + + reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_PWM_BASE); + ret = lm3533_write(cb->lm3533, reg, val); + if (ret) + dev_err(cb->dev, "failed to set PWM mask\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_pwm); + +int lm3533_ctrlbank_get_pwm(struct lm3533_ctrlbank *cb, u8 *val) +{ + u8 reg; + int ret; + reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_PWM_BASE); + ret = lm3533_read(cb->lm3533, reg, val); + if (ret) + dev_err(cb->dev, "failed to get PWM mask\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_pwm); MODULE_AUTHOR("Johan Hovold "); MODULE_DESCRIPTION("LM3533 Control Bank interface"); -- cgit From 3180cfabf6fbf982ca6d1a6eb56334647cc1416b Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 29 Jun 2020 13:41:23 +0200 Subject: rtc: cpcap: fix range Unbreak CPCAP driver, which has one more bit in the day counter increasing the max. range from 2014 to 2058. The original commit introducing the range limit was obviously wrong, since the driver has only been written in 2017 (3 years after 14 bits would have run out). Fixes: d2377f8cc5a7 ("rtc: cpcap: set range") Reported-by: Sicelo A. Mhlongo Reported-by: Dev Null Signed-off-by: Sebastian Reichel Signed-off-by: Alexandre Belloni Tested-by: Merlijn Wajer Acked-by: Tony Lindgren Acked-by: Merlijn Wajer Link: https://lore.kernel.org/r/20200629114123.27956-1-sebastian.reichel@collabora.com --- drivers/rtc/rtc-cpcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index a603f1f21125..800667d73a6f 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -261,7 +261,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) return PTR_ERR(rtc->rtc_dev); rtc->rtc_dev->ops = &cpcap_rtc_ops; - rtc->rtc_dev->range_max = (1 << 14) * SECS_PER_DAY - 1; + rtc->rtc_dev->range_max = (timeu64_t) (DAY_MASK + 1) * SECS_PER_DAY - 1; err = cpcap_get_vendor(dev, rtc->regmap, &rtc->vendor); if (err) -- cgit From 05513a706b4f8f978c0ecc97b6d99793fbaaaf17 Mon Sep 17 00:00:00 2001 From: "Tales L. da Aparecida" Date: Tue, 23 Jun 2020 22:21:19 -0300 Subject: rtc: imxdi: fix trivial typos Fix typos 'pionter' -> 'pointer' and 'softwere' -> 'software' Signed-off-by: Tales L. da Aparecida Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200624012119.54768-1-tales.aparecida@gmail.com --- drivers/rtc/rtc-imxdi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index f21dc6b16d88..8d141d8a5490 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -95,7 +95,7 @@ /** * struct imxdi_dev - private imxdi rtc data - * @pdev: pionter to platform dev + * @pdev: pointer to platform dev * @rtc: pointer to rtc struct * @ioaddr: IO registers pointer * @clk: input reference clock @@ -350,7 +350,7 @@ static int di_handle_invalid_and_failure_state(struct imxdi_dev *imxdi, u32 dsr) * the tamper register is locked. We cannot disable the * tamper detection. The TDCHL can only be reset by a * DRYICE POR, but we cannot force a DRYICE POR in - * softwere because we are still in "FAILURE STATE". + * software because we are still in "FAILURE STATE". * We need a DRYICE POR via battery power cycling.... */ /* -- cgit From f80531c8217c5b348058a0280f3be05835abd737 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 11 Jun 2020 14:05:34 +0300 Subject: i2c: Use separate MODULE_AUTHOR() statements for multiple authors Modules with multiple authors should use multiple MODULE_AUTHOR() statements according to comment in include/linux/module.h. Split the i2c modules with multiple authors to use multiple MODULE_AUTHOR() statements. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-pca.c | 4 ++-- drivers/i2c/busses/i2c-ali1535.c | 8 ++++---- drivers/i2c/busses/i2c-ali15x3.c | 6 +++--- drivers/i2c/busses/i2c-emev2.c | 3 ++- drivers/i2c/busses/i2c-i801.c | 3 ++- drivers/i2c/busses/i2c-nomadik.c | 3 ++- drivers/i2c/busses/i2c-piix4.c | 4 ++-- drivers/i2c/busses/i2c-pnx.c | 3 ++- drivers/i2c/busses/i2c-sh_mobile.c | 3 ++- drivers/i2c/busses/i2c-sibyte.c | 3 ++- drivers/i2c/busses/i2c-sirf.c | 4 ++-- drivers/i2c/busses/i2c-viapro.c | 6 +++--- drivers/i2c/i2c-dev.c | 4 ++-- 13 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 7f10312d1b88..8620963e442c 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -541,8 +541,8 @@ int i2c_pca_add_numbered_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL(i2c_pca_add_numbered_bus); -MODULE_AUTHOR("Ian Campbell , " - "Wolfram Sang "); +MODULE_AUTHOR("Ian Campbell "); +MODULE_AUTHOR("Wolfram Sang "); MODULE_DESCRIPTION("I2C-Bus PCA9564/PCA9665 algorithm"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index a43deea390f5..fb93152845f4 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -519,9 +519,9 @@ static struct pci_driver ali1535_driver = { module_pci_driver(ali1535_driver); -MODULE_AUTHOR("Frodo Looijaard , " - "Philip Edelbrock , " - "Mark D. Studebaker " - "and Dan Eaton "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Dan Eaton "); MODULE_DESCRIPTION("ALI1535 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 02185a1cfa77..cc58feacd082 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -502,8 +502,8 @@ static struct pci_driver ali15x3_driver = { module_pci_driver(ali15x3_driver); -MODULE_AUTHOR ("Frodo Looijaard , " - "Philip Edelbrock , " - "and Mark D. Studebaker "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); +MODULE_AUTHOR("Mark D. Studebaker "); MODULE_DESCRIPTION("ALI15X3 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 1a319352e51b..a08554c1a570 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -442,6 +442,7 @@ static struct platform_driver em_i2c_driver = { module_platform_driver(em_i2c_driver); MODULE_DESCRIPTION("EMEV2 I2C bus driver"); -MODULE_AUTHOR("Ian Molton and Wolfram Sang "); +MODULE_AUTHOR("Ian Molton"); +MODULE_AUTHOR("Wolfram Sang "); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, em_i2c_ids); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index fea644921a76..1fc7ae77753d 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1986,7 +1986,8 @@ static void __exit i2c_i801_exit(void) pci_unregister_driver(&i801_driver); } -MODULE_AUTHOR("Mark D. Studebaker , Jean Delvare "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("I801 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index e1e8d4ef9aa7..d4b1b0865f67 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1122,6 +1122,7 @@ static void __exit nmk_i2c_exit(void) subsys_initcall(nmk_i2c_init); module_exit(nmk_i2c_exit); -MODULE_AUTHOR("Sachin Verma, Srinidhi KASAGAR"); +MODULE_AUTHOR("Sachin Verma"); +MODULE_AUTHOR("Srinidhi KASAGAR"); MODULE_DESCRIPTION("Nomadik/Ux500 I2C driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 69740a4ff1db..8c1b31ed0c42 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -1032,7 +1032,7 @@ static struct pci_driver piix4_driver = { module_pci_driver(piix4_driver); -MODULE_AUTHOR("Frodo Looijaard and " - "Philip Edelbrock "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Philip Edelbrock "); MODULE_DESCRIPTION("PIIX4 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 5d7207c10f1d..8c4ec7f13f5a 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -781,7 +781,8 @@ static void __exit i2c_adap_pnx_exit(void) platform_driver_unregister(&i2c_pnx_driver); } -MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev "); +MODULE_AUTHOR("Vitaly Wool"); +MODULE_AUTHOR("Dennis Kovalev "); MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:pnx-i2c"); diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 2cca1b21e26e..cab725559999 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -932,6 +932,7 @@ static void __exit sh_mobile_i2c_adap_exit(void) module_exit(sh_mobile_i2c_adap_exit); MODULE_DESCRIPTION("SuperH Mobile I2C Bus Controller driver"); -MODULE_AUTHOR("Magnus Damm and Wolfram Sang"); +MODULE_AUTHOR("Magnus Damm"); +MODULE_AUTHOR("Wolfram Sang"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:i2c-sh_mobile"); diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index 9dcea2ba7168..8f71f01cb169 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -180,6 +180,7 @@ static void __exit i2c_sibyte_exit(void) module_init(i2c_sibyte_init); module_exit(i2c_sibyte_exit); -MODULE_AUTHOR("Kip Walker (Broadcom Corp.), Steven J. Hill "); +MODULE_AUTHOR("Kip Walker (Broadcom Corp.)"); +MODULE_AUTHOR("Steven J. Hill "); MODULE_DESCRIPTION("SMBus adapter routines for SiByte boards"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c index d7f72ec331e8..30db8fafe078 100644 --- a/drivers/i2c/busses/i2c-sirf.c +++ b/drivers/i2c/busses/i2c-sirf.c @@ -470,6 +470,6 @@ static struct platform_driver i2c_sirfsoc_driver = { module_platform_driver(i2c_sirfsoc_driver); MODULE_DESCRIPTION("SiRF SoC I2C master controller driver"); -MODULE_AUTHOR("Zhiwu Song , " - "Xiangzhen Ye "); +MODULE_AUTHOR("Zhiwu Song "); +MODULE_AUTHOR("Xiangzhen Ye "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 4abc7771af06..05aa92a3fbe0 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -489,9 +489,9 @@ static void __exit i2c_vt596_exit(void) } } -MODULE_AUTHOR("Kyosti Malkki , " - "Mark D. Studebaker and " - "Jean Delvare "); +MODULE_AUTHOR("Kyosti Malkki "); +MODULE_AUTHOR("Mark D. Studebaker "); +MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("vt82c596 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index da020acc9bbd..6ceb11cc4be1 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -761,8 +761,8 @@ static void __exit i2c_dev_exit(void) unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS); } -MODULE_AUTHOR("Frodo Looijaard and " - "Simon G. Vogl "); +MODULE_AUTHOR("Frodo Looijaard "); +MODULE_AUTHOR("Simon G. Vogl "); MODULE_DESCRIPTION("I2C /dev entries driver"); MODULE_LICENSE("GPL"); -- cgit From f3e2bd713730e6d69e8e021d3ff574f01c39e7e3 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 23 Jun 2020 13:06:46 +0100 Subject: i2c: rk3x: support master_xfer_atomic Enable i2c transactions in irq disabled contexts like poweroff where the PMIC is connected via i2c. Signed-off-by: John Keeping Tested-by: Heiko Stuebner Reviewed-by: Heiko Stuebner Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rk3x.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index bc698240c4aa..b67bb54caf27 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1040,8 +1041,21 @@ static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num) return ret; } -static int rk3x_i2c_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) +static int rk3x_i2c_wait_xfer_poll(struct rk3x_i2c *i2c) +{ + ktime_t timeout = ktime_add_ms(ktime_get(), WAIT_TIMEOUT); + + while (READ_ONCE(i2c->busy) && + ktime_compare(ktime_get(), timeout) < 0) { + udelay(5); + rk3x_i2c_irq(0, i2c); + } + + return !i2c->busy; +} + +static int rk3x_i2c_xfer_common(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num, bool polling) { struct rk3x_i2c *i2c = (struct rk3x_i2c *)adap->algo_data; unsigned long timeout, flags; @@ -1075,8 +1089,12 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, rk3x_i2c_start(i2c); - timeout = wait_event_timeout(i2c->wait, !i2c->busy, - msecs_to_jiffies(WAIT_TIMEOUT)); + if (!polling) { + timeout = wait_event_timeout(i2c->wait, !i2c->busy, + msecs_to_jiffies(WAIT_TIMEOUT)); + } else { + timeout = rk3x_i2c_wait_xfer_poll(i2c); + } spin_lock_irqsave(&i2c->lock, flags); @@ -1110,6 +1128,18 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, return ret < 0 ? ret : num; } +static int rk3x_i2c_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, false); +} + +static int rk3x_i2c_xfer_polling(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, true); +} + static __maybe_unused int rk3x_i2c_resume(struct device *dev) { struct rk3x_i2c *i2c = dev_get_drvdata(dev); @@ -1126,6 +1156,7 @@ static u32 rk3x_i2c_func(struct i2c_adapter *adap) static const struct i2c_algorithm rk3x_i2c_algorithm = { .master_xfer = rk3x_i2c_xfer, + .master_xfer_atomic = rk3x_i2c_xfer_polling, .functionality = rk3x_i2c_func, }; -- cgit From 0a7f99aad259d223ce69c03e792c7e2bfcf8c2c6 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 3 Jul 2020 17:49:48 +0200 Subject: clk: rockchip: use separate compatibles for rk3288w-cru Commit 1627f683636d ("clk: rockchip: Handle clock tree for rk3288w variant") added the check for rk3288w-specific clock-tree changes but in turn would require a double-compatible due to re-using the main rockchip,rk3288-cru compatible as entry point. The binding change actually describes the compatibles as one or the other so adapt the code accordingly and add a real second entry-point for the clock controller. Signed-off-by: Heiko Stuebner Reviewed-by: Ezequiel Garcia Reviewed-by: Jagan Teki Tested-by: Jagan Teki # rock-pi-n8 Link: https://lore.kernel.org/r/20200703154948.260369-1-heiko@sntech.de --- drivers/clk/rockchip/clk-rk3288.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index 204976e2d0cb..93c794695c46 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -15,6 +15,11 @@ #define RK3288_GRF_SOC_CON(x) (0x244 + x * 4) #define RK3288_GRF_SOC_STATUS1 0x284 +enum rk3288_variant { + RK3288_CRU, + RK3288W_CRU, +}; + enum rk3288_plls { apll, dpll, cpll, gpll, npll, }; @@ -922,7 +927,8 @@ static struct syscore_ops rk3288_clk_syscore_ops = { .resume = rk3288_clk_resume, }; -static void __init rk3288_clk_init(struct device_node *np) +static void __init rk3288_common_init(struct device_node *np, + enum rk3288_variant soc) { struct rockchip_clk_provider *ctx; @@ -945,7 +951,7 @@ static void __init rk3288_clk_init(struct device_node *np) rockchip_clk_register_branches(ctx, rk3288_clk_branches, ARRAY_SIZE(rk3288_clk_branches)); - if (of_device_is_compatible(np, "rockchip,rk3288w-cru")) + if (soc == RK3288W_CRU) rockchip_clk_register_branches(ctx, rk3288w_hclkvio_branch, ARRAY_SIZE(rk3288w_hclkvio_branch)); else @@ -970,4 +976,15 @@ static void __init rk3288_clk_init(struct device_node *np) rockchip_clk_of_add_provider(np, ctx); } + +static void __init rk3288_clk_init(struct device_node *np) +{ + rk3288_common_init(np, RK3288_CRU); +} CLK_OF_DECLARE(rk3288_cru, "rockchip,rk3288-cru", rk3288_clk_init); + +static void __init rk3288w_clk_init(struct device_node *np) +{ + rk3288_common_init(np, RK3288W_CRU); +} +CLK_OF_DECLARE(rk3288w_cru, "rockchip,rk3288w-cru", rk3288w_clk_init); -- cgit From a47dee5513cd7b6d1e20dfecd458363f24a19cdc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 May 2020 09:45:21 +0100 Subject: KVM: arm64: Allow in-atomic injection of SPIs On a system that uses SPIs to implement MSIs (as it would be the case on a GICv2 system exposing a GICv2m to its guests), we deny the possibility of injecting SPIs on the in-atomic fast-path. This results in a very large amount of context-switches (roughly equivalent to twice the interrupt rate) on the host, and suboptimal performance for the guest (as measured with a test workload involving a virtio interface backed by vhost-net). Given that GICv2 systems are usually on the low-end of the spectrum performance wise, they could do without the aggravation. We solved this for GICv3+ITS by having a translation cache. But SPIs do not need any extra infrastructure, and can be immediately injected in the virtual distributor as the locking is already heavy enough that we don't need to worry about anything. This halves the number of context switches for the same workload. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-irqfd.c | 24 +++++++++++++++++++----- arch/arm64/kvm/vgic/vgic-its.c | 3 +-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index d8cdfea5cc96..79f8899b234c 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -100,19 +100,33 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, /** * kvm_arch_set_irq_inatomic: fast-path for irqfd injection - * - * Currently only direct MSI injection is supported. */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { - if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + if (!level) + return -EWOULDBLOCK; + + switch (e->type) { + case KVM_IRQ_ROUTING_MSI: { struct kvm_msi msi; + if (!vgic_has_its(kvm)) + break; + kvm_populate_msi(e, &msi); - if (!vgic_its_inject_cached_translation(kvm, &msi)) - return 0; + return vgic_its_inject_cached_translation(kvm, &msi); + } + + case KVM_IRQ_ROUTING_IRQCHIP: + /* + * Injecting SPIs is always possible in atomic context + * as long as the damn vgic is initialized. + */ + if (unlikely(!vgic_initialized(kvm))) + break; + return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status); } return -EWOULDBLOCK; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index c012a52b19f5..40cbaca81333 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -757,9 +757,8 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) db = (u64)msi->address_hi << 32 | msi->address_lo; irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); - if (!irq) - return -1; + return -EWOULDBLOCK; raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; -- cgit From 2da3ffa6e840b9f8fc65a71326c43b716992861d Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Thu, 18 Jun 2020 10:36:16 +0100 Subject: arm64: kvm: Remove kern_hyp_va from get_vcpu_ptr get_vcpu_ptr is an assembly accessor for the percpu value kvm_host_data->host_ctxt.__hyp_running_vcpu. kern_hyp_va only applies to nVHE however __hyp_running_vcpu is always assigned a pointer that has already had kern_hyp_va applied in __kvm_vcpu_run_nvhe. kern_hyp_va is currently idempotent as it just masks and inserts the tag, but this could change in future and the second application is unnecessary. Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200618093616.164413-1-ascull@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 352aaebf4198..ac71d0939f2e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -143,7 +143,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] - kern_hyp_va \vcpu .endm #endif -- cgit From 6b33e0d64f8501b51d32069e08d3ed68c58c25b4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jun 2020 11:33:13 +0000 Subject: KVM: arm64: Drop the target_table[] indirection KVM for 32bit arm had a get/set target mechanism to allow for micro-architecture differences that are visible in system registers to be described. KVM's user-space can query the supported targets for a CPU, and create vCPUs for that target. The target can override the handling of system registers to provide different reset or RES0 behaviour. On 32bit arm this was used to provide different ACTLR reset values for A7 and A15. On 64bit arm, the first few CPUs out of the gate used this mechanism, before it was deemed redundant in commit bca556ac468a ("arm64/kvm: Add generic v8 KVM target"). All future CPUs use the KVM_ARM_TARGET_GENERIC_V8 target. The 64bit target_table[] stuff exists to preserve the ABI to user-space. As all targets registers genericv8_target_table, there is no reason to look the target up. Until we can merge genericv8_target_table with the main sys_regs array, kvm_register_target_sys_reg_table() becomes kvm_check_target_sys_reg_table(), which uses BUG_ON() in keeping with the other callers in this file. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622113317.20477-2-james.morse@arm.com --- arch/arm64/include/asm/kvm_coproc.h | 3 +-- arch/arm64/kvm/sys_regs.c | 16 ++++------------ arch/arm64/kvm/sys_regs.h | 2 ++ arch/arm64/kvm/sys_regs_generic_v8.c | 15 ++------------- 4 files changed, 9 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 0185ee8b8b5e..4bf0d6d05e0f 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -24,8 +24,7 @@ struct kvm_sys_reg_target_table { struct kvm_sys_reg_table table32; }; -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table); +void kvm_check_target_sys_reg_table(struct kvm_sys_reg_target_table *table); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index baf5ce9225ce..6333a7cd92d3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2109,17 +2109,10 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, return 0; } -/* Target specific emulation tables */ -static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table) +void kvm_check_target_sys_reg_table(struct kvm_sys_reg_target_table *table) { - if (check_sysreg_table(table->table64.table, table->table64.num, false) || - check_sysreg_table(table->table32.table, table->table32.num, true)) - return; - - target_tables[target] = table; + BUG_ON(check_sysreg_table(table->table64.table, table->table64.num, false)); + BUG_ON(check_sysreg_table(table->table32.table, table->table32.num, true)); } /* Get specific register table for this target. */ @@ -2127,9 +2120,8 @@ static const struct sys_reg_desc *get_target_table(unsigned target, bool mode_is_64, size_t *num) { - struct kvm_sys_reg_target_table *table; + struct kvm_sys_reg_target_table *table = &genericv8_target_table; - table = target_tables[target]; if (mode_is_64) { *num = table->table64.num; return table->table64.table; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..933609e883bf 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -165,4 +165,6 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \ Op2(sys_reg_Op2(reg)) +extern struct kvm_sys_reg_target_table genericv8_target_table; + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index aa9d356451eb..a82cc2ccfd44 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -59,7 +59,7 @@ static const struct sys_reg_desc genericv8_cp15_regs[] = { access_actlr }, }; -static struct kvm_sys_reg_target_table genericv8_target_table = { +struct kvm_sys_reg_target_table genericv8_target_table = { .table64 = { .table = genericv8_sys_regs, .num = ARRAY_SIZE(genericv8_sys_regs), @@ -78,18 +78,7 @@ static int __init sys_reg_genericv8_init(void) BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], &genericv8_sys_regs[i]) >= 0); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, - &genericv8_target_table); + kvm_check_target_sys_reg_table(&genericv8_target_table); return 0; } -- cgit From 04343ae312ef0d80d601ea1b784b6b039ae9c82a Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jun 2020 11:33:14 +0000 Subject: KVM: arm64: Tolerate an empty target_table list Before emptying the target_table lists, and then removing their infrastructure, add some tolerance to an empty list. Instead of bugging-out on an empty list, pretend we already reached the end in the two-list-walk. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622113317.20477-3-james.morse@arm.com --- arch/arm64/kvm/sys_regs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6333a7cd92d3..fb448bfc83ec 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2809,7 +2809,10 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); - BUG_ON(i1 == end1 || i2 == end2); + if (i1 == end1) + i1 = NULL; + + BUG_ON(i2 == end2); /* Walk carefully, as both tables may refer to the same register. */ while (i1 || i2) { -- cgit From af4738290d9dfe3787f60e40f709a4f78a115943 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jun 2020 11:33:15 +0000 Subject: KVM: arm64: Move ACTLR_EL1 emulation to the sys_reg_descs array The only entry in the genericv8_sys_regs arrays is for emulation of ACTLR_EL1. As all targets emulate this in the same way, move it to sys_reg_descs[]. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622113317.20477-4-james.morse@arm.com --- arch/arm64/kvm/sys_regs.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs_generic_v8.c | 30 ------------------------------ 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fb448bfc83ec..f8407cfa9032 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -242,6 +242,25 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool access_actlr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); + + if (p->is_aarch32) { + if (r->Op2 & 2) + p->regval = upper_32_bits(p->regval); + else + p->regval = lower_32_bits(p->regval); + } + + return true; +} + /* * Trap handler for the GICv3 SGI generation system register. * Forward the request to the VGIC emulation. @@ -615,6 +634,12 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr = read_sysreg(actlr_el1); + vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1); +} + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mpidr; @@ -1518,6 +1543,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(7,7), { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1957,6 +1983,8 @@ static const struct sys_reg_desc cp14_64_regs[] = { static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index a82cc2ccfd44..a7e21e61beea 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -19,44 +19,14 @@ #include "sys_regs.h" -static bool access_actlr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } - - return true; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) -{ - __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); -} - /* * Implementation specific sys-reg registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 */ static const struct sys_reg_desc genericv8_sys_regs[] = { - { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, }; static const struct sys_reg_desc genericv8_cp15_regs[] = { - /* ACTLR */ - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), - access_actlr }, - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011), - access_actlr }, }; struct kvm_sys_reg_target_table genericv8_target_table = { -- cgit From dcaffa7bf911578a6d69165d712501996c587fbe Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jun 2020 11:33:16 +0000 Subject: KVM: arm64: Remove target_table from exit handlers Whenever KVM searches for a register (e.g. due to a guest exit), it works with two tables, as the target table overrides the sys_regs array. Now that everything is in the sys_regs array, and the target table is empty, stop doing that. Remove the second table and its size from all the functions that take it. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622113317.20477-5-james.morse@arm.com --- arch/arm64/kvm/sys_regs.c | 87 +++++++++-------------------------------------- 1 file changed, 16 insertions(+), 71 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f8407cfa9032..14333005b476 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2269,9 +2269,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -2298,14 +2296,11 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, } /* - * Try to emulate the coprocessor access using the target - * specific table first, and using the global table afterwards. - * If either of the tables contains a handler, handle the + * If the table contains a handler, handle the * potential register operation in the case of a read and return * with success. */ - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { /* Split up the value between registers for the read side */ if (!params.is_write) { vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); @@ -2326,9 +2321,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -2344,8 +2337,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, params.Op1 = (hsr >> 14) & 0x7; params.Op2 = (hsr >> 17) & 0x7; - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { if (!params.is_write) vcpu_set_reg(vcpu, Rt, params.regval); return 1; @@ -2357,38 +2349,22 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_64(vcpu, - cp15_64_regs, ARRAY_SIZE(cp15_64_regs), - target_specific, num); + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); } int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_32(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), - target_specific, num); + return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); } int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { - return kvm_handle_cp_64(vcpu, - cp14_64_regs, ARRAY_SIZE(cp14_64_regs), - NULL, 0); + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); } int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) { - return kvm_handle_cp_32(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), - NULL, 0); + return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); } static bool is_imp_def_sys_reg(struct sys_reg_params *params) @@ -2400,15 +2376,9 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) static int emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - size_t num; - const struct sys_reg_desc *table, *r; - - table = get_target_table(vcpu->arch.target, true, &num); + const struct sys_reg_desc *r; - /* Search target-specific then generic table. */ - r = find_reg(params, table, num); - if (!r) - r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); if (likely(r)) { perform_access(vcpu, params, r); @@ -2512,8 +2482,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; struct sys_reg_params params; /* We only do sys_reg for now. */ @@ -2523,10 +2492,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if (!index_to_params(id, ¶ms)) return NULL; - table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg(¶ms, table, num); - if (!r) - r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); /* Not saved in the sys_reg array and not otherwise accessible? */ if (r && !(r->reg || r->get_user)) @@ -2826,38 +2792,17 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, /* Assumed ordered tables, see kvm_sys_reg_table_init. */ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) { - const struct sys_reg_desc *i1, *i2, *end1, *end2; + const struct sys_reg_desc *i2, *end2; unsigned int total = 0; - size_t num; int err; - /* We check for duplicates here, to allow arch-specific overrides. */ - i1 = get_target_table(vcpu->arch.target, true, &num); - end1 = i1 + num; i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); - if (i1 == end1) - i1 = NULL; - - BUG_ON(i2 == end2); - - /* Walk carefully, as both tables may refer to the same register. */ - while (i1 || i2) { - int cmp = cmp_sys_reg(i1, i2); - /* target-specific overrides generic entry. */ - if (cmp <= 0) - err = walk_one_sys_reg(vcpu, i1, &uind, &total); - else - err = walk_one_sys_reg(vcpu, i2, &uind, &total); - + while (i2 != end2) { + err = walk_one_sys_reg(vcpu, i2++, &uind, &total); if (err) return err; - - if (cmp <= 0 && ++i1 == end1) - i1 = NULL; - if (cmp >= 0 && ++i2 == end2) - i2 = NULL; } return total; } -- cgit From 750ed56693803e992ed09ac9c46e07226dd4d350 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jun 2020 11:33:17 +0000 Subject: KVM: arm64: Remove the target table Finally, remove the target table. Merge the code that checks the tables into kvm_reset_sys_regs() as there is now only one table. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622113317.20477-6-james.morse@arm.com --- arch/arm64/include/asm/kvm_coproc.h | 7 ----- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/sys_regs.c | 57 +++++++----------------------------- arch/arm64/kvm/sys_regs.h | 2 -- arch/arm64/kvm/sys_regs_generic_v8.c | 55 ---------------------------------- 5 files changed, 12 insertions(+), 111 deletions(-) delete mode 100644 arch/arm64/kvm/sys_regs_generic_v8.c diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 4bf0d6d05e0f..147f3a77e6a5 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -19,13 +19,6 @@ struct kvm_sys_reg_table { size_t num; }; -struct kvm_sys_reg_target_table { - struct kvm_sys_reg_table table64; - struct kvm_sys_reg_table table32; -}; - -void kvm_check_target_sys_reg_table(struct kvm_sys_reg_target_table *table); - int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 8d3d9513cbfe..48cda50aa225 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ - guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ aarch32.o arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 14333005b476..41a4d3d2a312 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2137,28 +2137,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, return 0; } -void kvm_check_target_sys_reg_table(struct kvm_sys_reg_target_table *table) -{ - BUG_ON(check_sysreg_table(table->table64.table, table->table64.num, false)); - BUG_ON(check_sysreg_table(table->table32.table, table->table32.num, true)); -} - -/* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, - bool mode_is_64, - size_t *num) -{ - struct kvm_sys_reg_target_table *table = &genericv8_target_table; - - if (mode_is_64) { - *num = table->table64.num; - return table->table64.table; - } else { - *num = table->table32.num; - return table->table32.table; - } -} - static int match_sys_reg(const void *key, const void *elt) { const unsigned long pval = (unsigned long)key; @@ -2393,14 +2371,20 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, return 1; } -static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num) +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { unsigned long i; - for (i = 0; i < num; i++) - if (table[i].reset) - table[i].reset(vcpu, &table[i]); + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) + if (sys_reg_descs[i].reset) + sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]); } /** @@ -2869,22 +2853,3 @@ void kvm_sys_reg_table_init(void) /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; } - -/** - * kvm_reset_sys_regs - sets system registers to reset value - * @vcpu: The VCPU pointer - * - * This function finds the right table above and sets the registers on the - * virtual CPU struct to their architecturally defined reset values. - */ -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) -{ - size_t num; - const struct sys_reg_desc *table; - - /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - - table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num); -} diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 933609e883bf..5a6fc30f5989 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -165,6 +165,4 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \ Op2(sys_reg_Op2(reg)) -extern struct kvm_sys_reg_target_table genericv8_target_table; - #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c deleted file mode 100644 index a7e21e61beea..000000000000 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * Based on arch/arm/kvm/coproc_a15.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Authors: Rusty Russell - * Christoffer Dall - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sys_regs.h" - -/* - * Implementation specific sys-reg registers. - * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 - */ -static const struct sys_reg_desc genericv8_sys_regs[] = { -}; - -static const struct sys_reg_desc genericv8_cp15_regs[] = { -}; - -struct kvm_sys_reg_target_table genericv8_target_table = { - .table64 = { - .table = genericv8_sys_regs, - .num = ARRAY_SIZE(genericv8_sys_regs), - }, - .table32 = { - .table = genericv8_cp15_regs, - .num = ARRAY_SIZE(genericv8_cp15_regs), - }, -}; - -static int __init sys_reg_genericv8_init(void) -{ - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) - BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], - &genericv8_sys_regs[i]) >= 0); - - kvm_check_target_sys_reg_table(&genericv8_target_table); - - return 0; -} -late_initcall(sys_reg_genericv8_init); -- cgit From b38b298aa4397e2dc74a89b4dd3eac9e59b64c96 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:06 +0100 Subject: KVM: arm64: Fix symbol dependency in __hyp_call_panic_nvhe __hyp_call_panic_nvhe contains inline assembly which did not declare its dependency on the __hyp_panic_string symbol. The static-declared string has previously been kept alive because of a use in __hyp_call_panic_vhe. Fix this in preparation for separating the source files between VHE and nVHE when the two users land in two different compilation units. The static variable otherwise gets dropped when compiling the nVHE source file, causing an undefined symbol linker error later. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-2-dbrazdil@google.com --- arch/arm64/kvm/hyp/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index db1c4487d95d..9270b14157b5 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -897,7 +897,7 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, * making sure it is a kernel address and not a PC-relative * reference. */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); __hyp_do_panic(str_va, spsr, elr, -- cgit From 7b2399ea5640b2e5f576af08b91091a26f240ea4 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:07 +0100 Subject: KVM: arm64: Move __smccc_workaround_1_smc to .rodata This snippet of assembly is used by cpu_errata.c to overwrite parts of KVM hyp vector. Move it to its own source file and change its ELF section to .rodata. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-3-dbrazdil@google.com --- arch/arm64/kvm/hyp/Makefile | 1 + arch/arm64/kvm/hyp/hyp-entry.S | 16 ---------------- arch/arm64/kvm/hyp/smccc_wa.S | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 16 deletions(-) create mode 100644 arch/arm64/kvm/hyp/smccc_wa.S diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8c9880783839..5d8357ddc234 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -7,6 +7,7 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) obj-$(CONFIG_KVM) += hyp.o +obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9c5cfb04170e..d362fad97cc8 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -318,20 +318,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) - - .popsection - -SYM_CODE_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_CODE_END(__smccc_workaround_1_smc) #endif diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S new file mode 100644 index 000000000000..b0441dbdf68b --- /dev/null +++ b/arch/arm64/kvm/hyp/smccc_wa.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2018 - ARM Ltd + * Author: Marc Zyngier + */ + +#include +#include + +#include +#include + + /* + * This is not executed directly and is instead copied into the vectors + * by install_bp_hardening_cb(). + */ + .data + .pushsection .rodata + .global __smccc_workaround_1_smc +SYM_DATA_START(__smccc_workaround_1_smc) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ + .org 1b +SYM_DATA_END(__smccc_workaround_1_smc) -- cgit From 7621712918ad4f5e6356193d9058debf657a6254 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:08 +0100 Subject: KVM: arm64: Add build rules for separate VHE/nVHE object files Add new folders arch/arm64/kvm/hyp/{vhe,nvhe} and Makefiles for building code that runs in EL2 under VHE/nVHE KVM, repsectivelly. Add an include folder for hyp-specific header files which will include code common to VHE/nVHE. Build nVHE code with -D__KVM_NVHE_HYPERVISOR__, VHE code with -D__KVM_VHE_HYPERVISOR__. Under nVHE compile each source file into a `.hyp.tmp.o` object first, then prefix all its symbols with "__kvm_nvhe_" using `objcopy` and produce a `.hyp.o`. Suffixes were chosen so that it would be possible for VHE and nVHE to share some source files, but compiled with different CFLAGS. The nVHE ELF symbol prefix is added to kallsyms.c as ignored. EL2-only symbols will never appear in EL1 stack traces. Due to symbol prefixing, add a section in image-vars.h for aliases of symbols that are defined in nVHE EL2 and accessed by kernel in EL1 or vice versa. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-4-dbrazdil@google.com --- arch/arm64/kernel/image-vars.h | 14 ++++++++++++++ arch/arm64/kvm/hyp/Makefile | 10 +++++++--- arch/arm64/kvm/hyp/nvhe/Makefile | 34 ++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/vhe/Makefile | 17 +++++++++++++++++ scripts/kallsyms.c | 1 + 5 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/Makefile create mode 100644 arch/arm64/kvm/hyp/vhe/Makefile diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index be0a63ffed23..3dc27da47712 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -51,4 +51,18 @@ __efistub__ctype = _ctype; #endif +#ifdef CONFIG_KVM + +/* + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to + * separate it from the kernel proper. The following symbols are legally + * accessed by it, therefore provide aliases to make them linkable. + * Do not include symbols which may not be safely accessed under hypervisor + * memory mappings. + */ + +#define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; + +#endif /* CONFIG_KVM */ + #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 5d8357ddc234..9c5dfe6ff80b 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,10 +3,14 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) +incdir := $(srctree)/$(src)/include +subdir-asflags-y := -I$(incdir) +subdir-ccflags-y := -I$(incdir) \ + -fno-stack-protector \ + -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += hyp.o +obj-$(CONFIG_KVM) += hyp.o nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile new file mode 100644 index 000000000000..955f4188e00f --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_NVHE_HYPERVISOR__ +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ + +obj-y := + +obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) +extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) + +$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) +$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE + $(call if_changed_rule,as_o_S) +$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE + $(call if_changed,hypcopy) + +quiet_cmd_hypcopy = HYPCOPY $@ + cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@ + +# KVM nVHE code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# Skip objtool checking for this directory because nVHE code is compiled with +# non-standard build rules. +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile new file mode 100644 index 000000000000..e04375546081 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_VHE_HYPERVISOR__ +ccflags-y := -D__KVM_VHE_HYPERVISOR__ + +obj-y := + +# KVM code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 6dc3078649fa..0096cd965332 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -109,6 +109,7 @@ static bool is_ignored_symbol(const char *name, char type) ".LASANPC", /* s390 kasan local symbols */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ + "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ NULL }; -- cgit From 53b671128bd7f9ea41ae1a06106d88eb4cf66623 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:09 +0100 Subject: KVM: arm64: Use build-time defines in has_vhe() Build system compiles hyp code with macros specifying if the code belongs to VHE or nVHE. Use these macros to evaluate has_vhe() at compile time. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-5-dbrazdil@google.com --- arch/arm64/include/asm/virt.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 5051b388c654..09977acc007d 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,10 +85,17 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) + /* + * The following macros are defined for code specic to VHE/nVHE. + * If has_vhe() is inlined into those compilation units, it can + * be determined statically. Otherwise fall back to caps. + */ + if (__is_defined(__KVM_VHE_HYPERVISOR__)) return true; - - return false; + else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + return false; + else + return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } #endif /* __ASSEMBLY__ */ -- cgit From f50b6f6ae131b6ee7d5dd738961eda0c00b7f027 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Thu, 25 Jun 2020 14:14:10 +0100 Subject: KVM: arm64: Handle calls to prefixed hyp functions Once hyp functions are moved to a hyp object, they will have prefixed symbols. This change declares and gets the address of the prefixed version for calls to the hyp functions. To aid migration, the hyp functions that have not yet moved have their prefixed versions aliased to their non-prefixed version. This begins with all the hyp functions being listed and will reduce to none of them once the migration is complete. Signed-off-by: Andrew Scull [David: Extracted kvm_call_hyp nVHE branches into own helper macros, added comments around symbol aliases.] Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-6-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 19 +++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 17 ++++++++++++++--- arch/arm64/kernel/image-vars.h | 26 ++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 352aaebf4198..6a682d66a640 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,6 +42,24 @@ #include +/* + * Translate name of a symbol defined in nVHE hyp to the name seen + * by kernel proper. All nVHE symbols are prefixed by the build system + * to avoid clashes with the VHE variants. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#define DECLARE_KVM_VHE_SYM(sym) extern char sym[] +#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] + +/* + * Define a pair of symbols sharing the same name but one defined in + * VHE and the other in nVHE hyp implementations. + */ +#define DECLARE_KVM_HYP_SYM(sym) \ + DECLARE_KVM_VHE_SYM(sym); \ + DECLARE_KVM_NVHE_SYM(sym) + /* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ @@ -50,6 +68,7 @@ val = lm_alias(&sym); \ val; \ }) +#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c3e6fcc664b1..49d1a5cd8f8f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -448,6 +448,18 @@ void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); +#define kvm_call_hyp_nvhe(f, ...) \ + do { \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + } while(0) + +#define kvm_call_hyp_nvhe_ret(f, ...) \ + ({ \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + }) + /* * The couple of isb() below are there to guarantee the same behaviour * on VHE as on !VHE, where the eret to EL1 acts as a context @@ -459,7 +471,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); f(__VA_ARGS__); \ isb(); \ } else { \ - __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ } while(0) @@ -471,8 +483,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = __kvm_call_hyp(kvm_ksym_ref(f), \ - ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ } \ \ ret; \ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 3dc27da47712..36444bac6a05 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -63,6 +63,32 @@ __efistub__ctype = _ctype; #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; +/* Symbols defined in debug-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_get_mdcr_el2); + +/* Symbols defined in switch.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_vcpu_run_nvhe); + +/* Symbols defined in sysreg-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_enable_ssbs); + +/* Symbols defined in timer-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_timer_set_cntvoff); + +/* Symbols defined in tlb.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_flush_vm_context); +KVM_NVHE_ALIAS(__kvm_tlb_flush_local_vmid); +KVM_NVHE_ALIAS(__kvm_tlb_flush_vmid); +KVM_NVHE_ALIAS(__kvm_tlb_flush_vmid_ipa); + +/* Symbols defined in vgic-v3-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__vgic_v3_get_ich_vtr_el2); +KVM_NVHE_ALIAS(__vgic_v3_init_lrs); +KVM_NVHE_ALIAS(__vgic_v3_read_vmcr); +KVM_NVHE_ALIAS(__vgic_v3_restore_aprs); +KVM_NVHE_ALIAS(__vgic_v3_save_aprs); +KVM_NVHE_ALIAS(__vgic_v3_write_vmcr); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ -- cgit From b877e9849d41e7d2100d2933e0a3971d0ddec011 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:11 +0100 Subject: KVM: arm64: Build hyp-entry.S separately for VHE/nVHE hyp-entry.S contains implementation of KVM hyp vectors. This code is mostly shared between VHE/nVHE, therefore compile it under both VHE and nVHE build rules. nVHE-specific host HVC handler is hidden behind __KVM_NVHE_HYPERVISOR__. Adjust code which selects which KVM hyp vecs to install to choose the correct VHE/nVHE symbol. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-7-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 22 +++++++++++++++++----- arch/arm64/include/asm/mmu.h | 7 ------- arch/arm64/kernel/image-vars.h | 24 ++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/Makefile | 4 ++-- arch/arm64/kvm/hyp/hyp-entry.S | 2 ++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- 8 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6a682d66a640..6026cbd204ae 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -60,12 +60,17 @@ DECLARE_KVM_VHE_SYM(sym); \ DECLARE_KVM_NVHE_SYM(sym) -/* Translate a kernel address of @sym into its equivalent linear mapping */ -#define kvm_ksym_ref(sym) \ +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) +#define CHOOSE_HYP_SYM(sym) (has_vhe() ? CHOOSE_VHE_SYM(sym) \ + : CHOOSE_NVHE_SYM(sym)) + +/* Translate a kernel address @ptr into its equivalent linear mapping */ +#define kvm_ksym_ref(ptr) \ ({ \ - void *val = &sym; \ + void *val = (ptr); \ if (!is_kernel_in_hyp_mode()) \ - val = lm_alias(&sym); \ + val = lm_alias((ptr)); \ val; \ }) #define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) @@ -76,7 +81,14 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; -extern char __kvm_hyp_vector[]; +DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); +#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern atomic_t arm64_el2_vector_last_slot; +DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); +#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) +#endif extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8444df000181..a7a5ecaa2e83 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -45,13 +45,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ - defined(CONFIG_HARDEN_EL2_VECTORS)) - -extern char __bp_harden_hyp_vecs[]; -extern atomic_t arm64_el2_vector_last_slot; -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ - #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 36444bac6a05..f28da486b75a 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -66,8 +66,17 @@ __efistub__ctype = _ctype; /* Symbols defined in debug-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_get_mdcr_el2); +/* Symbols defined in entry.S (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__guest_exit); +KVM_NVHE_ALIAS(abort_guest_exit_end); +KVM_NVHE_ALIAS(abort_guest_exit_start); + +/* Symbols defined in hyp-init.S (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__kvm_handle_stub_hvc); + /* Symbols defined in switch.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_vcpu_run_nvhe); +KVM_NVHE_ALIAS(hyp_panic); /* Symbols defined in sysreg-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_enable_ssbs); @@ -89,6 +98,21 @@ KVM_NVHE_ALIAS(__vgic_v3_restore_aprs); KVM_NVHE_ALIAS(__vgic_v3_save_aprs); KVM_NVHE_ALIAS(__vgic_v3_write_vmcr); +/* Alternative callbacks for init-time patching of nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_enable_wa2_handling); +KVM_NVHE_ALIAS(kvm_patch_vector_branch); +KVM_NVHE_ALIAS(kvm_update_va_mask); + +/* Global kernel state accessed by nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_ssbd_callback_required); +KVM_NVHE_ALIAS(kvm_host_data); + +/* Kernel constant needed to compute idmap addresses. */ +KVM_NVHE_ALIAS(kimage_voffset); + +/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ +KVM_NVHE_ALIAS(panic); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 90cb90561446..34b551385153 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1285,7 +1285,7 @@ static void cpu_init_hyp_mode(void) * so that we can use adr_l to access per-cpu variables in EL2. */ tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - - (unsigned long)kvm_ksym_ref(kvm_host_data)); + (unsigned long)kvm_ksym_ref(&kvm_host_data)); pgd_ptr = kvm_mmu_get_httbr(); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 9c5dfe6ff80b..8b0cf85080b5 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,11 +10,11 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += hyp.o nvhe/ +obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o + debug-sr.o entry.o switch.o fpsimd.o tlb.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index d362fad97cc8..7e3c72fa634f 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -40,6 +40,7 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap +#ifdef __KVM_NVHE_HYPERVISOR__ mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC @@ -74,6 +75,7 @@ el1_sync: // Guest trapped into EL2 eret sb +#endif /* __KVM_NVHE_HYPERVISOR__ */ el1_hvc_guest: /* diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 955f4188e00f..79eb8eed96a1 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := +obj-y := ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index e04375546081..323029e02b4e 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := +obj-y := ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may -- cgit From 208243c752a7eeef4236f7b7d67e806ee356e3f8 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Thu, 25 Jun 2020 14:14:12 +0100 Subject: KVM: arm64: Move hyp-init.S to nVHE hyp-init.S contains the identity mapped initialisation code for the non-VHE code that runs at EL2. It is only used for non-VHE. Adjust code that calls into this to use the prefixed symbol name. Signed-off-by: Andrew Scull Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-8-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 5 +- arch/arm64/kernel/image-vars.h | 9 +- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/hyp-init.S | 163 ------------------------------------- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 163 +++++++++++++++++++++++++++++++++++++ 6 files changed, 173 insertions(+), 171 deletions(-) delete mode 100644 arch/arm64/kvm/hyp-init.S create mode 100644 arch/arm64/kvm/hyp/nvhe/hyp-init.S diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6026cbd204ae..3476abb046e3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -78,10 +78,9 @@ struct kvm; struct kvm_vcpu; -extern char __kvm_hyp_init[]; -extern char __kvm_hyp_init_end[]; - +DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); +#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) #ifdef CONFIG_KVM_INDIRECT_VECTORS diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index f28da486b75a..63186c91b614 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(__guest_exit); KVM_NVHE_ALIAS(abort_guest_exit_end); KVM_NVHE_ALIAS(abort_guest_exit_start); -/* Symbols defined in hyp-init.S (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__kvm_handle_stub_hvc); - /* Symbols defined in switch.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_vcpu_run_nvhe); KVM_NVHE_ALIAS(hyp_panic); @@ -113,6 +110,12 @@ KVM_NVHE_ALIAS(kimage_voffset); /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ KVM_NVHE_ALIAS(panic); +/* Vectors installed by hyp-init on reset HVC. */ +KVM_NVHE_ALIAS(__hyp_stub_vectors); + +/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ +KVM_NVHE_ALIAS(idmap_t0sz); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 8d3d9513cbfe..152d8845a1a2 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ + inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ aarch32.o arch_timer.o \ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S deleted file mode 100644 index 6e6ed5581eed..000000000000 --- a/arch/arm64/kvm/hyp-init.S +++ /dev/null @@ -1,163 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - */ - -#include - -#include -#include -#include -#include -#include -#include - - .text - .pushsection .hyp.idmap.text, "ax" - - .align 11 - -SYM_CODE_START(__kvm_hyp_init) - ventry __invalid // Synchronous EL2t - ventry __invalid // IRQ EL2t - ventry __invalid // FIQ EL2t - ventry __invalid // Error EL2t - - ventry __invalid // Synchronous EL2h - ventry __invalid // IRQ EL2h - ventry __invalid // FIQ EL2h - ventry __invalid // Error EL2h - - ventry __do_hyp_init // Synchronous 64-bit EL1 - ventry __invalid // IRQ 64-bit EL1 - ventry __invalid // FIQ 64-bit EL1 - ventry __invalid // Error 64-bit EL1 - - ventry __invalid // Synchronous 32-bit EL1 - ventry __invalid // IRQ 32-bit EL1 - ventry __invalid // FIQ 32-bit EL1 - ventry __invalid // Error 32-bit EL1 - -__invalid: - b . - - /* - * x0: HYP pgd - * x1: HYP stack - * x2: HYP vectors - * x3: per-CPU offset - */ -__do_hyp_init: - /* Check for a stub HVC call */ - cmp x0, #HVC_STUB_HCALL_NR - b.lo __kvm_handle_stub_hvc - - phys_to_ttbr x4, x0 -alternative_if ARM64_HAS_CNP - orr x4, x4, #TTBR_CNP_BIT -alternative_else_nop_endif - msr ttbr0_el2, x4 - - mrs x4, tcr_el1 - mov_q x5, TCR_EL2_MASK - and x4, x4, x5 - mov x5, #TCR_EL2_RES1 - orr x4, x4, x5 - - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ - ldr_l x5, idmap_t0sz - bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH - - /* - * Set the PS bits in TCR_EL2. - */ - tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 - - msr tcr_el2, x4 - - mrs x4, mair_el1 - msr mair_el2, x4 - isb - - /* Invalidate the stale TLBs from Bootloader */ - tlbi alle2 - dsb sy - - /* - * Preserve all the RES1 bits while setting the default flags, - * as well as the EE bit on BE. Drop the A flag since the compiler - * is allowed to generate unaligned accesses. - */ - mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) -CPU_BE( orr x4, x4, #SCTLR_ELx_EE) - msr sctlr_el2, x4 - isb - - /* Set the stack and new vectors */ - kern_hyp_va x1 - mov sp, x1 - msr vbar_el2, x2 - - /* Set tpidr_el2 for use by HYP */ - msr tpidr_el2, x3 - - /* Hello, World! */ - eret -SYM_CODE_END(__kvm_hyp_init) - -SYM_CODE_START(__kvm_handle_stub_hvc) - cmp x0, #HVC_SOFT_RESTART - b.ne 1f - - /* This is where we're about to jump, staying at EL2 */ - msr elr_el2, x1 - mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) - msr spsr_el2, x0 - - /* Shuffle the arguments, and don't come back */ - mov x0, x2 - mov x1, x3 - mov x2, x4 - b reset - -1: cmp x0, #HVC_RESET_VECTORS - b.ne 1f -reset: - /* - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in - * case we coming via HVC_SOFT_RESTART. - */ - mrs x5, sctlr_el2 - mov_q x6, SCTLR_ELx_FLAGS - bic x5, x5, x6 // Clear SCTL_M and etc - pre_disable_mmu_workaround - msr sctlr_el2, x5 - isb - - /* Install stub vectors */ - adr_l x5, __hyp_stub_vectors - msr vbar_el2, x5 - mov x0, xzr - eret - -1: /* Bad stub call */ - mov_q x0, HVC_STUB_ERR - eret - -SYM_CODE_END(__kvm_handle_stub_hvc) - - .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 79eb8eed96a1..bf2d8dea5400 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := ../hyp-entry.o +obj-y := hyp-init.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S new file mode 100644 index 000000000000..6e6ed5581eed --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include +#include +#include +#include +#include + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +SYM_CODE_START(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP pgd + * x1: HYP stack + * x2: HYP vectors + * x3: per-CPU offset + */ +__do_hyp_init: + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.lo __kvm_handle_stub_hvc + + phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif + msr ttbr0_el2, x4 + + mrs x4, tcr_el1 + mov_q x5, TCR_EL2_MASK + and x4, x4, x5 + mov x5, #TCR_EL2_RES1 + orr x4, x4, x5 + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + ldr_l x5, idmap_t0sz + bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + + /* + * Set the PS bits in TCR_EL2. + */ + tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 + + msr tcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + /* Invalidate the stale TLBs from Bootloader */ + tlbi alle2 + dsb sy + + /* + * Preserve all the RES1 bits while setting the default flags, + * as well as the EE bit on BE. Drop the A flag since the compiler + * is allowed to generate unaligned accesses. + */ + mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) +CPU_BE( orr x4, x4, #SCTLR_ELx_EE) + msr sctlr_el2, x4 + isb + + /* Set the stack and new vectors */ + kern_hyp_va x1 + mov sp, x1 + msr vbar_el2, x2 + + /* Set tpidr_el2 for use by HYP */ + msr tpidr_el2, x3 + + /* Hello, World! */ + eret +SYM_CODE_END(__kvm_hyp_init) + +SYM_CODE_START(__kvm_handle_stub_hvc) + cmp x0, #HVC_SOFT_RESTART + b.ne 1f + + /* This is where we're about to jump, staying at EL2 */ + msr elr_el2, x1 + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) + msr spsr_el2, x0 + + /* Shuffle the arguments, and don't come back */ + mov x0, x2 + mov x1, x3 + mov x2, x4 + b reset + +1: cmp x0, #HVC_RESET_VECTORS + b.ne 1f +reset: + /* + * Reset kvm back to the hyp stub. Do not clobber x0-x4 in + * case we coming via HVC_SOFT_RESTART. + */ + mrs x5, sctlr_el2 + mov_q x6, SCTLR_ELx_FLAGS + bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround + msr sctlr_el2, x5 + isb + + /* Install stub vectors */ + adr_l x5, __hyp_stub_vectors + msr vbar_el2, x5 + mov x0, xzr + eret + +1: /* Bad stub call */ + mov_q x0, HVC_STUB_ERR + eret + +SYM_CODE_END(__kvm_handle_stub_hvc) + + .popsection -- cgit From e03fa29164ec1db1a81dc0168d0017a9e0366c7c Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:13 +0100 Subject: KVM: arm64: Duplicate hyp/tlb.c for VHE/nVHE tlb.c contains code for flushing the TLB, with code shared between VHE/nVHE. Because common code is small, duplicate tlb.c and specialize each copy for VHE/nVHE. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-9-dbrazdil@google.com --- arch/arm64/kernel/image-vars.h | 14 ++- arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 156 +++++++++++++++++++++++++ arch/arm64/kvm/hyp/tlb.c | 242 --------------------------------------- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 161 ++++++++++++++++++++++++++ 7 files changed, 328 insertions(+), 251 deletions(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/tlb.c delete mode 100644 arch/arm64/kvm/hyp/tlb.c create mode 100644 arch/arm64/kvm/hyp/vhe/tlb.c diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 63186c91b614..f029f3ea7ffe 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -81,12 +81,6 @@ KVM_NVHE_ALIAS(__kvm_enable_ssbs); /* Symbols defined in timer-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_timer_set_cntvoff); -/* Symbols defined in tlb.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__kvm_flush_vm_context); -KVM_NVHE_ALIAS(__kvm_tlb_flush_local_vmid); -KVM_NVHE_ALIAS(__kvm_tlb_flush_vmid); -KVM_NVHE_ALIAS(__kvm_tlb_flush_vmid_ipa); - /* Symbols defined in vgic-v3-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__vgic_v3_get_ich_vtr_el2); KVM_NVHE_ALIAS(__vgic_v3_init_lrs); @@ -116,6 +110,14 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); /* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ KVM_NVHE_ALIAS(idmap_t0sz); +/* Kernel symbol used by icache_is_vpipt(). */ +KVM_NVHE_ALIAS(__icache_flags); + +/* Kernel symbols needed for cpus_have_final/const_caps checks. */ +KVM_NVHE_ALIAS(arm64_const_caps_ready); +KVM_NVHE_ALIAS(cpu_hwcap_keys); +KVM_NVHE_ALIAS(cpu_hwcaps); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8b0cf85080b5..87d3cce2b26e 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o tlb.o + debug-sr.o entry.o switch.o fpsimd.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index bf2d8dea5400..a5316e97d373 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := hyp-init.o ../hyp-entry.o +obj-y := tlb.o hyp-init.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c new file mode 100644 index 000000000000..deb48c8c00ee --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include +#include +#include + +struct tlb_inv_context { + u64 tcr; +}; + +static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + + /* __load_guest_stage2() includes an ISB for the workaround. */ + __load_guest_stage2(kvm); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); +} + +static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + write_sysreg(0, vttbr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } +} + +void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + __tlb_switch_to_guest(kvm, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi(ipas2e1is, ipa); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + __tlb_switch_to_guest(kvm, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(kvm, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __hyp_text __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c deleted file mode 100644 index d063a576d511..000000000000 --- a/arch/arm64/kvm/hyp/tlb.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include - -#include -#include -#include - -struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; -}; - -static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - u64 val; - - local_irq_save(cxt->flags); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* - * For CPUs that are affected by ARM errata 1165522 or 1530923, - * we cannot trust stage-1 to be in a correct state at that - * point. Since we do not want to force a full load of the - * vcpu state, we prevent the EL1 page-table walker to - * allocate new TLBs. This is done by setting the EPD bits - * in the TCR_EL1 register. We also need to prevent it to - * allocate IPA->PA walks, so we enable the S1 MMU... - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); - val |= SCTLR_ELx_M; - write_sysreg_el1(val, SYS_SCTLR); - } - - /* - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and - * most TLB operations target EL2/EL0. In order to affect the - * guest TLBs (EL1/EL0), we need to change one of these two - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so - * let's flip TGE before executing the TLB operation. - * - * ARM erratum 1165522 requires some special handling (again), - * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already - * has an ISB in order to deal with this. - */ - __load_guest_stage2(kvm); - val = read_sysreg(hcr_el2); - val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - isb(); - } - - /* __load_guest_stage2() includes an ISB for the workaround. */ - __load_guest_stage2(kvm); - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); -} - -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_guest_vhe(kvm, cxt); - else - __tlb_switch_to_guest_nvhe(kvm, cxt); -} - -static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - /* - * We're done with the TLB operation, let's restore the host's - * view of HCR_EL2. - */ - write_sysreg(0, vttbr_el2); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - isb(); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); - } - - local_irq_restore(cxt->flags); -} - -static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - write_sysreg(0, vttbr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - } -} - -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_host_vhe(kvm, cxt); - else - __tlb_switch_to_host_nvhe(kvm, cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - ipa >>= 12; - __tlbi(ipas2e1is, ipa); - - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb(ish); - __tlbi(vmalle1is); - dsb(ish); - isb(); - - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (!has_vhe() && icache_is_vpipt()) - __flush_icache_all(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalls12e1is); - dsb(ish); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); - struct tlb_inv_context cxt; - - /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalle1); - dsb(nsh); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_flush_vm_context(void) -{ - dsb(ishst); - __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - - dsb(ish); -} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 323029e02b4e..704140fc5d66 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := ../hyp-entry.o +obj-y := tlb.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c new file mode 100644 index 000000000000..b275101e9c9c --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include +#include + +struct tlb_inv_context { + unsigned long flags; + u64 tcr; + u64 sctlr; +}; + +static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) +{ + u64 val; + + local_irq_save(cxt->flags); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that + * point. Since we do not want to force a full load of the + * vcpu state, we prevent the EL1 page-table walker to + * allocate new TLBs. This is done by setting the EPD bits + * in the TCR_EL1 register. We also need to prevent it to + * allocate IPA->PA walks, so we enable the S1 MMU... + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + } + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + * + * ARM erratum 1165522 requires some special handling (again), + * as we need to make sure both stages of translation are in + * place before clearing TGE. __load_guest_stage2() already + * has an ISB in order to deal with this. + */ + __load_guest_stage2(kvm); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Restore the registers to what they were */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + } + + local_irq_restore(cxt->flags); +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(kvm, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi(ipas2e1is, ipa); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm *kvm) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(kvm, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(kvm, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(kvm, &cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} -- cgit From 09cf57eba304246141367b95c89801fd2047ac96 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:14 +0100 Subject: KVM: arm64: Split hyp/switch.c to VHE/nVHE switch.c implements context-switching for KVM, with large parts shared between VHE/nVHE. These common routines are moved to a header file, VHE-specific code is moved to vhe/switch.c and nVHE-specific code is moved to nvhe/switch.c. Previously __kvm_vcpu_run needed a different symbol name for VHE/nVHE. This is cleaned up and the caller in arm.c simplified. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-10-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 4 +- arch/arm64/include/asm/kvm_hyp.h | 6 + arch/arm64/kernel/image-vars.h | 37 +- arch/arm64/kvm/arm.c | 6 +- arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/hyp-entry.S | 2 + arch/arm64/kvm/hyp/include/hyp/switch.h | 509 +++++++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 271 +++++++++ arch/arm64/kvm/hyp/switch.c | 936 -------------------------------- arch/arm64/kvm/hyp/sysreg-sr.c | 4 +- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 219 ++++++++ 13 files changed, 1048 insertions(+), 952 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/switch.h create mode 100644 arch/arm64/kvm/hyp/nvhe/switch.c delete mode 100644 arch/arm64/kvm/hyp/switch.c create mode 100644 arch/arm64/kvm/hyp/vhe/switch.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 3476abb046e3..5a91aaae78d2 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -96,9 +96,7 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); -extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); - -extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_enable_ssbs(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index ce3080834bfa..82fa05d15b8b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -81,11 +81,17 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +#ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); +#endif u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +#ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(unsigned long, ...); +#endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index f029f3ea7ffe..06fe9833104a 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -63,30 +63,50 @@ __efistub__ctype = _ctype; #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; +/* Symbols defined in aarch32.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(kvm_skip_instr32); + /* Symbols defined in debug-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__debug_switch_to_guest); +KVM_NVHE_ALIAS(__debug_switch_to_host); KVM_NVHE_ALIAS(__kvm_get_mdcr_el2); /* Symbols defined in entry.S (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__guest_enter); KVM_NVHE_ALIAS(__guest_exit); KVM_NVHE_ALIAS(abort_guest_exit_end); KVM_NVHE_ALIAS(abort_guest_exit_start); -/* Symbols defined in switch.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__kvm_vcpu_run_nvhe); -KVM_NVHE_ALIAS(hyp_panic); +/* Symbols defined in fpsimd.S (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__fpsimd_restore_state); +KVM_NVHE_ALIAS(__fpsimd_save_state); /* Symbols defined in sysreg-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_enable_ssbs); +KVM_NVHE_ALIAS(__sysreg32_restore_state); +KVM_NVHE_ALIAS(__sysreg32_save_state); +KVM_NVHE_ALIAS(__sysreg_restore_state_nvhe); +KVM_NVHE_ALIAS(__sysreg_save_state_nvhe); /* Symbols defined in timer-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_timer_set_cntvoff); +KVM_NVHE_ALIAS(__timer_disable_traps); +KVM_NVHE_ALIAS(__timer_enable_traps); + +/* Symbols defined in vgic-v2-cpuif-proxy.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__vgic_v2_perform_cpuif_access); /* Symbols defined in vgic-v3-sr.c (not yet compiled with nVHE build rules). */ +KVM_NVHE_ALIAS(__vgic_v3_activate_traps); +KVM_NVHE_ALIAS(__vgic_v3_deactivate_traps); KVM_NVHE_ALIAS(__vgic_v3_get_ich_vtr_el2); KVM_NVHE_ALIAS(__vgic_v3_init_lrs); +KVM_NVHE_ALIAS(__vgic_v3_perform_cpuif_access); KVM_NVHE_ALIAS(__vgic_v3_read_vmcr); KVM_NVHE_ALIAS(__vgic_v3_restore_aprs); +KVM_NVHE_ALIAS(__vgic_v3_restore_state); KVM_NVHE_ALIAS(__vgic_v3_save_aprs); +KVM_NVHE_ALIAS(__vgic_v3_save_state); KVM_NVHE_ALIAS(__vgic_v3_write_vmcr); /* Alternative callbacks for init-time patching of nVHE hyp code. */ @@ -97,11 +117,13 @@ KVM_NVHE_ALIAS(kvm_update_va_mask); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(arm64_ssbd_callback_required); KVM_NVHE_ALIAS(kvm_host_data); +KVM_NVHE_ALIAS(kvm_vgic_global_state); /* Kernel constant needed to compute idmap addresses. */ KVM_NVHE_ALIAS(kimage_voffset); /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ +KVM_NVHE_ALIAS(__hyp_panic_string); KVM_NVHE_ALIAS(panic); /* Vectors installed by hyp-init on reset HVC. */ @@ -118,6 +140,15 @@ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); KVM_NVHE_ALIAS(cpu_hwcaps); +/* Static keys which are set if a vGIC trap should be handled in hyp. */ +KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); +KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); + +/* Static key checked in pmr_sync(). */ +#ifdef CONFIG_ARM64_PSEUDO_NMI +KVM_NVHE_ALIAS(gic_pmr_sync); +#endif + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 34b551385153..26780b78a523 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -748,11 +748,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) { - ret = kvm_vcpu_run_vhe(vcpu); - } else { - ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); - } + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 87d3cce2b26e..7462d3a8a6f2 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o + debug-sr.o entry.o fpsimd.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 7e3c72fa634f..8316ee67d6a0 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -182,6 +182,7 @@ el2_error: eret sb +#ifdef __KVM_NVHE_HYPERVISOR__ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -191,6 +192,7 @@ SYM_FUNC_START(__hyp_do_panic) eret sb SYM_FUNC_END(__hyp_do_panic) +#endif SYM_CODE_START(__hyp_panic) get_host_ctxt x0, x1 diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000000..65d2a879b93b --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char __hyp_panic_string[]; + +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static inline bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) +{ + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); + + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); +} + +static inline void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static inline void __hyp_text __deactivate_traps_common(void) +{ + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} + +static inline void __hyp_text ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void __hyp_text ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline void __hyp_text __activate_vm(struct kvm *kvm) +{ + __load_guest_stage2(kvm); +} + +static inline bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + asm volatile("at s1e1r, %0" : : "r" (far)); + isb(); + + tmp = read_sysreg(par_el1); + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + u64 hpfar, far; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static inline bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +{ + bool vhe, sve_guest, sve_host; + u8 hsr_ec; + + if (!system_supports_fpsimd()) + return false; + + /* + * Currently system_supports_sve() currently implies has_vhe(), + * so the check is redundant. However, has_vhe() can be determined + * statically and helps the compiler remove dead code. + */ + if (has_vhe() && system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + hsr_ec = kvm_vcpu_trap_get_class(vcpu); + if (hsr_ec != ESR_ELx_EC_FP_ASIMD && + hsr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (hsr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + } + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (sve_host) { + struct thread_struct *thread = container_of( + vcpu->arch.host_fpsimd_state, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); + } else { + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); + } + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], + fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + +static inline bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool __hyp_text esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(regs, key) \ +({ \ + regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ +}) + +static inline bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt->sys_regs, APIA); + __ptrauth_save_key(ctxt->sys_regs, APIB); + __ptrauth_save_key(ctxt->sys_regs, APDA); + __ptrauth_save_key(ctxt->sys_regs, APDB); + __ptrauth_save_key(ctxt->sys_regs, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. + */ + if (__hyp_handle_fpsimd(vcpu)) + return true; + + if (__hyp_handle_ptrauth(vcpu)) + return true; + + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_dabt_isextabt(vcpu) && + !kvm_vcpu_dabt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + + goto exit; + } + } + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + } + +exit: + /* Return to the host kernel and handle the exit */ + return false; +} + +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static inline void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static inline void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index a5316e97d373..8b3ac38eaa44 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := tlb.o hyp-init.o ../hyp-entry.o +obj-y := switch.o tlb.o hyp-init.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c new file mode 100644 index 000000000000..7f6b8d3dc637 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + if (!update_fp_enabled(vcpu)) { + val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cptr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u64 mdcr_el2; + + ___deactivate_traps(vcpu); + + mdcr_el2 = read_sysreg(mdcr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + + __deactivate_traps_common(); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/** + * Disable host events, enable guest events + */ +static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + +/* Switch to the guest for legacy non-VHE systems */ +int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool pmu_switch_needed; + u64 exit_code; + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + pmr_sync(); + } + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + + __sysreg_save_state_nvhe(host_ctxt); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->kvm)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + __sysreg_save_state_nvhe(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __hyp_vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state_nvhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); + + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + + return exit_code; +} + +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; + unsigned long str_va; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(host_ctxt); + } + + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); + + __hyp_do_panic(str_va, + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c deleted file mode 100644 index 9270b14157b5..000000000000 --- a/arch/arm64/kvm/hyp/switch.c +++ /dev/null @@ -1,936 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) -{ - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); -} - -/* Save the 32-bit only FPSIMD system register state */ -static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) -{ - if (!vcpu_el1_is_32bit(vcpu)) - return; - - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); -} - -static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) -{ - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to - * it will cause an exception. - */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { - write_sysreg(1 << 30, fpexc32_el2); - isb(); - } -} - -static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) -{ - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ - write_sysreg(1 << 15, hstr_el2); - - /* - * Make sure we trap PMU access from EL0 to EL2. Also sanitize - * PMSELR_EL0 to make sure it never contains the cycle - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at - * EL1 instead of being trapped to EL2. - */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); -} - -static void __hyp_text __deactivate_traps_common(void) -{ - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); -} - -static void activate_traps_vhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (update_fp_enabled(vcpu)) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - val &= ~CPACR_EL1_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} -NOKPROBE_SYMBOL(activate_traps_vhe); - -static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - __activate_traps_common(vcpu); - - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cptr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; - - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * configured and enabled. We can now restore the guest's S1 - * configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } -} - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) -{ - u64 hcr = vcpu->arch.hcr_el2; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) - hcr |= HCR_TVM; - - write_sysreg(hcr, hcr_el2); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (has_vhe()) - activate_traps_vhe(vcpu); - else - __activate_traps_nvhe(vcpu); -} - -static void deactivate_traps_vhe(void) -{ - extern char vectors[]; /* kernel exception vectors */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL2/EL0 translation regime used by - * the host. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); -} -NOKPROBE_SYMBOL(deactivate_traps_vhe); - -static void __hyp_text __deactivate_traps_nvhe(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * Set the TCR and SCTLR registers in the exact opposite - * sequence as __activate_traps_nvhe (first prevent walks, - * then force the MMU on). A generous sprinkling of isb() - * ensure that things happen in this exact order. - */ - val = read_sysreg_el1(SYS_TCR); - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); - isb(); - val = read_sysreg_el1(SYS_SCTLR); - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); - isb(); - } - - __deactivate_traps_common(); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - - write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); -} - -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -{ - /* - * If we pended a virtual abort, preserve it until it gets - * cleared. See D1.14.3 (Virtual Interrupts) for details, but - * the crucial bit is "On taking a vSError interrupt, - * HCR_EL2.VSE is cleared to 0." - */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; - } - - if (has_vhe()) - deactivate_traps_vhe(); - else - __deactivate_traps_nvhe(); -} - -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) -{ - __activate_traps_common(vcpu); -} - -void deactivate_traps_vhe_put(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); -} - -static void __hyp_text __activate_vm(struct kvm *kvm) -{ - __load_guest_stage2(kvm); -} - -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) -{ - write_sysreg(0, vttbr_el2); -} - -/* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -/* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) -{ - u8 ec; - u64 esr; - u64 hpfar, far; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; - return true; -} - -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) -{ - bool vhe, sve_guest, sve_host; - u8 hsr_ec; - - if (!system_supports_fpsimd()) - return false; - - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; - } else { - sve_guest = false; - sve_host = false; - vhe = has_vhe(); - } - - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) - return false; - - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) - return false; - - /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - - if (sve_guest) - reg |= CPACR_EL1_ZEN; - - write_sysreg(reg, cpacr_el1); - } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } - - isb(); - - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - - if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); - } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - } - - /* Skip restoring fpexc32 for AArch64 guests */ - if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); - - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; - - return true; -} - -static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) -{ - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); - int rt = kvm_vcpu_sys_get_rt(vcpu); - u64 val = vcpu_get_reg(vcpu, rt); - - /* - * The normal sysreg handling code expects to see the traps, - * let's not do anything here. - */ - if (vcpu->arch.hcr_el2 & HCR_TVM) - return false; - - switch (sysreg) { - case SYS_SCTLR_EL1: - write_sysreg_el1(val, SYS_SCTLR); - break; - case SYS_TTBR0_EL1: - write_sysreg_el1(val, SYS_TTBR0); - break; - case SYS_TTBR1_EL1: - write_sysreg_el1(val, SYS_TTBR1); - break; - case SYS_TCR_EL1: - write_sysreg_el1(val, SYS_TCR); - break; - case SYS_ESR_EL1: - write_sysreg_el1(val, SYS_ESR); - break; - case SYS_FAR_EL1: - write_sysreg_el1(val, SYS_FAR); - break; - case SYS_AFSR0_EL1: - write_sysreg_el1(val, SYS_AFSR0); - break; - case SYS_AFSR1_EL1: - write_sysreg_el1(val, SYS_AFSR1); - break; - case SYS_MAIR_EL1: - write_sysreg_el1(val, SYS_MAIR); - break; - case SYS_AMAIR_EL1: - write_sysreg_el1(val, SYS_AMAIR); - break; - case SYS_CONTEXTIDR_EL1: - write_sysreg_el1(val, SYS_CONTEXTIDR); - break; - default: - return false; - } - - __kvm_skip_instr(vcpu); - return true; -} - -static bool __hyp_text esr_is_ptrauth_trap(u32 esr) -{ - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - -static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *ctxt; - u64 val; - - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) - return false; - - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - - vcpu_ptrauth_enable(vcpu); - - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); - - return true; -} - -/* - * Return true when we were able to fixup the guest exit and should return to - * the guest, false when we should restore the host state and return to the - * main run loop. - */ -static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - - /* - * We're using the raw exception code in order to only process - * the trap if no SError is pending. We will come back to the - * same PC once the SError has been injected, and replay the - * trapping instruction. - */ - if (*exit_code != ARM_EXCEPTION_TRAP) - goto exit; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - return true; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - return true; - - if (__hyp_handle_ptrauth(vcpu)) - return true; - - if (!__populate_fault_info(vcpu)) - return true; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - } - -exit: - /* Return to the host kernel and handle the exit */ - return false; -} - -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - -/** - * Disable host events, enable guest events - */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - -/* Switch to the guest for VHE systems running in EL2 */ -static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - u64 exit_code; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - sysreg_save_host_state_vhe(host_ctxt); - - /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). - */ - __activate_vm(vcpu->kvm); - __activate_traps(vcpu); - - sysreg_restore_guest_state_vhe(guest_ctxt); - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - sysreg_save_guest_state_vhe(guest_ctxt); - - __deactivate_traps(vcpu); - - sysreg_restore_host_state_vhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - __debug_switch_to_host(vcpu); - - return exit_code; -} -NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); - -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - int ret; - - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); - - ret = __kvm_vcpu_run_vhe(vcpu); - - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); - - return ret; -} - -/* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool pmu_switch_needed; - u64 exit_code; - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - pmr_sync(); - } - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); - - __sysreg_save_state_nvhe(host_ctxt); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - * - * Also, and in order to be able to deal with erratum #1319537 (A57) - * and #1319367 (A72), we must ensure that all VM-related sysreg are - * restored before we enable S2 translation. - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); - - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - __sysreg_save_state_nvhe(guest_ctxt); - __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); - __hyp_vgic_save_state(vcpu); - - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - - __sysreg_restore_state_nvhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - /* - * This must come after restoring the host sysregs, since a non-VHE - * system may enable SPE here and make use of the TTBRs. - */ - __debug_switch_to_host(vcpu); - - if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); - - /* Returning to host will clear PSR.I, remask PMR if needed */ - if (system_uses_irq_prio_masking()) - gic_write_pmr(GIC_PRIO_IRQOFF); - - return exit_code; -} - -static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; - -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *__host_ctxt) -{ - struct kvm_vcpu *vcpu; - unsigned long str_va; - - vcpu = __host_ctxt->__hyp_running_vcpu; - - if (read_sysreg(vttbr_el2)) { - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_state_nvhe(__host_ctxt); - } - - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} - -static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) -{ - struct kvm_vcpu *vcpu; - vcpu = host_ctxt->__hyp_running_vcpu; - - __deactivate_traps(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - - panic(__hyp_panic_string, - spsr, elr, - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} -NOKPROBE_SYMBOL(__hyp_call_panic_vhe); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) -{ - u64 spsr = read_sysreg_el2(SYS_SPSR); - u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); - - if (!has_vhe()) - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); - else - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); - - unreachable(); -} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index cc7e957f5b2c..2493439a5c54 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -114,7 +114,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) /* * Must only be done for guest registers, hence the context * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with __activate_traps_nvhe(). + * set. Pairs with nVHE's __activate_traps(). */ write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | TCR_EPD1_MASK | TCR_EPD0_MASK), @@ -142,7 +142,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context - * test. Pairs with __deactivate_traps_nvhe(). + * test. Pairs with nVHE's __deactivate_traps(). */ isb(); /* diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 704140fc5d66..9f71cd3ba50d 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := tlb.o ../hyp-entry.o +obj-y := switch.o tlb.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c new file mode 100644 index 000000000000..e8d76cab44e4 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_ZEN; + + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + + val |= CPTR_EL2_TAM; + + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { + val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} +NOKPROBE_SYMBOL(__activate_traps); + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + extern char vectors[]; /* kernel exception vectors */ + + ___deactivate_traps(vcpu); + + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by + * the host. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + write_sysreg(vectors, vbar_el1); +} +NOKPROBE_SYMBOL(__deactivate_traps); + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); +} + +/* Switch to the guest for VHE systems running in EL2 */ +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + /* + * ARM erratum 1165522 requires us to configure both stage 1 and + * stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. + * + * We have already configured the guest's stage 1 translation in + * kvm_vcpu_load_sysregs above. We must now call __activate_vm + * before __activate_traps, because __activate_vm configures + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE + * (among other things). + */ + __activate_vm(vcpu->kvm); + __activate_traps(vcpu); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + __debug_switch_to_host(vcpu); + + return exit_code; +} +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} + +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); +} +NOKPROBE_SYMBOL(__hyp_call_panic); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + + __hyp_call_panic(spsr, elr, par, host_ctxt); + unreachable(); +} -- cgit From d400c5b2025c9aeca76213d6bd4138ec39da5cef Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:15 +0100 Subject: KVM: arm64: Split hyp/debug-sr.c to VHE/nVHE debug-sr.c contains KVM's code for context-switching debug registers, with some code shared between VHE/nVHE. These common routines are moved to a header file, VHE-specific code is moved to vhe/debug-sr.c and nVHE-specific code to nvhe/debug-sr.c. Functions are slightly refactored to move code hidden behind `has_vhe()` checks to the corresponding .c files. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-11-dbrazdil@google.com --- arch/arm64/kernel/image-vars.h | 5 - arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/debug-sr.c | 224 ------------------------------ arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 170 +++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 77 ++++++++++ arch/arm64/kvm/hyp/vhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/debug-sr.c | 26 ++++ 8 files changed, 276 insertions(+), 232 deletions(-) delete mode 100644 arch/arm64/kvm/hyp/debug-sr.c create mode 100644 arch/arm64/kvm/hyp/include/hyp/debug-sr.h create mode 100644 arch/arm64/kvm/hyp/nvhe/debug-sr.c create mode 100644 arch/arm64/kvm/hyp/vhe/debug-sr.c diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 06fe9833104a..c44ca4e7dd66 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -66,11 +66,6 @@ __efistub__ctype = _ctype; /* Symbols defined in aarch32.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(kvm_skip_instr32); -/* Symbols defined in debug-sr.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__debug_switch_to_guest); -KVM_NVHE_ALIAS(__debug_switch_to_host); -KVM_NVHE_ALIAS(__kvm_get_mdcr_el2); - /* Symbols defined in entry.S (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__guest_enter); KVM_NVHE_ALIAS(__guest_exit); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 7462d3a8a6f2..fc09025d2e97 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o fpsimd.o + entry.o fpsimd.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c deleted file mode 100644 index e95af204fec7..000000000000 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include - -#include -#include -#include -#include - -#define read_debug(r,n) read_sysreg(r##n##_el1) -#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) - -#define save_debug(ptr,reg,nr) \ - switch (nr) { \ - case 15: ptr[15] = read_debug(reg, 15); \ - /* Fall through */ \ - case 14: ptr[14] = read_debug(reg, 14); \ - /* Fall through */ \ - case 13: ptr[13] = read_debug(reg, 13); \ - /* Fall through */ \ - case 12: ptr[12] = read_debug(reg, 12); \ - /* Fall through */ \ - case 11: ptr[11] = read_debug(reg, 11); \ - /* Fall through */ \ - case 10: ptr[10] = read_debug(reg, 10); \ - /* Fall through */ \ - case 9: ptr[9] = read_debug(reg, 9); \ - /* Fall through */ \ - case 8: ptr[8] = read_debug(reg, 8); \ - /* Fall through */ \ - case 7: ptr[7] = read_debug(reg, 7); \ - /* Fall through */ \ - case 6: ptr[6] = read_debug(reg, 6); \ - /* Fall through */ \ - case 5: ptr[5] = read_debug(reg, 5); \ - /* Fall through */ \ - case 4: ptr[4] = read_debug(reg, 4); \ - /* Fall through */ \ - case 3: ptr[3] = read_debug(reg, 3); \ - /* Fall through */ \ - case 2: ptr[2] = read_debug(reg, 2); \ - /* Fall through */ \ - case 1: ptr[1] = read_debug(reg, 1); \ - /* Fall through */ \ - default: ptr[0] = read_debug(reg, 0); \ - } - -#define restore_debug(ptr,reg,nr) \ - switch (nr) { \ - case 15: write_debug(ptr[15], reg, 15); \ - /* Fall through */ \ - case 14: write_debug(ptr[14], reg, 14); \ - /* Fall through */ \ - case 13: write_debug(ptr[13], reg, 13); \ - /* Fall through */ \ - case 12: write_debug(ptr[12], reg, 12); \ - /* Fall through */ \ - case 11: write_debug(ptr[11], reg, 11); \ - /* Fall through */ \ - case 10: write_debug(ptr[10], reg, 10); \ - /* Fall through */ \ - case 9: write_debug(ptr[9], reg, 9); \ - /* Fall through */ \ - case 8: write_debug(ptr[8], reg, 8); \ - /* Fall through */ \ - case 7: write_debug(ptr[7], reg, 7); \ - /* Fall through */ \ - case 6: write_debug(ptr[6], reg, 6); \ - /* Fall through */ \ - case 5: write_debug(ptr[5], reg, 5); \ - /* Fall through */ \ - case 4: write_debug(ptr[4], reg, 4); \ - /* Fall through */ \ - case 3: write_debug(ptr[3], reg, 3); \ - /* Fall through */ \ - case 2: write_debug(ptr[2], reg, 2); \ - /* Fall through */ \ - case 1: write_debug(ptr[1], reg, 1); \ - /* Fall through */ \ - default: write_debug(ptr[0], reg, 0); \ - } - -static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) -{ - u64 reg; - - /* Clear pmscr in case of early return */ - *pmscr_el1 = 0; - - /* SPE present on this CPU? */ - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), - ID_AA64DFR0_PMSVER_SHIFT)) - return; - - /* Yes; is it owned by EL3? */ - reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) - return; - - /* No; is the host actually using the thing? */ - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) - return; - - /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); - isb(); - - /* Now drain all buffered data to memory */ - psb_csync(); - dsb(nsh); -} - -static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) -{ - if (!pmscr_el1) - return; - - /* The host page table is installed, but not yet synchronised */ - isb(); - - /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); -} - -static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) -{ - u64 aa64dfr0; - int brps, wrps; - - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = (aa64dfr0 >> 12) & 0xf; - wrps = (aa64dfr0 >> 20) & 0xf; - - save_debug(dbg->dbg_bcr, dbgbcr, brps); - save_debug(dbg->dbg_bvr, dbgbvr, brps); - save_debug(dbg->dbg_wcr, dbgwcr, wrps); - save_debug(dbg->dbg_wvr, dbgwvr, wrps); - - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); -} - -static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) -{ - u64 aa64dfr0; - int brps, wrps; - - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); - - brps = (aa64dfr0 >> 12) & 0xf; - wrps = (aa64dfr0 >> 20) & 0xf; - - restore_debug(dbg->dbg_bcr, dbgbcr, brps); - restore_debug(dbg->dbg_bvr, dbgbvr, brps); - restore_debug(dbg->dbg_wcr, dbgwcr, wrps); - restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); -} - -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - struct kvm_guest_debug_arch *host_dbg; - struct kvm_guest_debug_arch *guest_dbg; - - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); -} - -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - struct kvm_guest_debug_arch *host_dbg; - struct kvm_guest_debug_arch *guest_dbg; - - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); - - vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; -} - -u32 __hyp_text __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h new file mode 100644 index 000000000000..e041dbd23243 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + +#include +#include + +#include +#include +#include +#include + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + /* Fall through */ \ + case 14: ptr[14] = read_debug(reg, 14); \ + /* Fall through */ \ + case 13: ptr[13] = read_debug(reg, 13); \ + /* Fall through */ \ + case 12: ptr[12] = read_debug(reg, 12); \ + /* Fall through */ \ + case 11: ptr[11] = read_debug(reg, 11); \ + /* Fall through */ \ + case 10: ptr[10] = read_debug(reg, 10); \ + /* Fall through */ \ + case 9: ptr[9] = read_debug(reg, 9); \ + /* Fall through */ \ + case 8: ptr[8] = read_debug(reg, 8); \ + /* Fall through */ \ + case 7: ptr[7] = read_debug(reg, 7); \ + /* Fall through */ \ + case 6: ptr[6] = read_debug(reg, 6); \ + /* Fall through */ \ + case 5: ptr[5] = read_debug(reg, 5); \ + /* Fall through */ \ + case 4: ptr[4] = read_debug(reg, 4); \ + /* Fall through */ \ + case 3: ptr[3] = read_debug(reg, 3); \ + /* Fall through */ \ + case 2: ptr[2] = read_debug(reg, 2); \ + /* Fall through */ \ + case 1: ptr[1] = read_debug(reg, 1); \ + /* Fall through */ \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + /* Fall through */ \ + case 14: write_debug(ptr[14], reg, 14); \ + /* Fall through */ \ + case 13: write_debug(ptr[13], reg, 13); \ + /* Fall through */ \ + case 12: write_debug(ptr[12], reg, 12); \ + /* Fall through */ \ + case 11: write_debug(ptr[11], reg, 11); \ + /* Fall through */ \ + case 10: write_debug(ptr[10], reg, 10); \ + /* Fall through */ \ + case 9: write_debug(ptr[9], reg, 9); \ + /* Fall through */ \ + case 8: write_debug(ptr[8], reg, 8); \ + /* Fall through */ \ + case 7: write_debug(ptr[7], reg, 7); \ + /* Fall through */ \ + case 6: write_debug(ptr[6], reg, 6); \ + /* Fall through */ \ + case 5: write_debug(ptr[5], reg, 5); \ + /* Fall through */ \ + case 4: write_debug(ptr[4], reg, 4); \ + /* Fall through */ \ + case 3: write_debug(ptr[3], reg, 3); \ + /* Fall through */ \ + case 2: write_debug(ptr[2], reg, 2); \ + /* Fall through */ \ + case 1: write_debug(ptr[1], reg, 1); \ + /* Fall through */ \ + default: write_debug(ptr[0], reg, 0); \ + } + +static inline void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); +} + +static inline void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); +} + +static inline void __hyp_text __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(vcpu, host_dbg, host_ctxt); + __debug_restore_state(vcpu, guest_dbg, guest_ctxt); +} + +static inline void __hyp_text __debug_switch_to_host_common(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(vcpu, guest_dbg, guest_ctxt); + __debug_restore_state(vcpu, host_dbg, host_ctxt); + + vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; +} + +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 8b3ac38eaa44..b3cb67b63d67 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := switch.o tlb.o hyp-init.o ../hyp-entry.o +obj-y := debug-sr.o switch.o tlb.o hyp-init.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c new file mode 100644 index 000000000000..828c0d48e790 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include + +#include +#include +#include +#include + +static void __hyp_text __debug_save_spe(u64 *pmscr_el1) +{ + u64 reg; + + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static void __hyp_text __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); +} + +void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* Disable and flush SPE data generation */ + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_guest_common(vcpu); +} + +void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_host_common(vcpu); +} + +u32 __hyp_text __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 9f71cd3ba50d..62bdaf272b03 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := switch.o tlb.o ../hyp-entry.o +obj-y := debug-sr.o switch.o tlb.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c new file mode 100644 index 000000000000..f1e2e5a00933 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include + +#include + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} -- cgit From 13aeb9b400c5d7c5e979fdbbf994c787487f7889 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:16 +0100 Subject: KVM: arm64: Split hyp/sysreg-sr.c to VHE/nVHE sysreg-sr.c contains KVM's code for saving/restoring system registers, with some code shared between VHE/nVHE. These common routines are moved to a header file, VHE-specific code is moved to vhe/sysreg-sr.c and nVHE-specific code to nvhe/sysreg-sr.c. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-12-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 6 +- arch/arm64/include/asm/kvm_hyp.h | 5 +- arch/arm64/kernel/image-vars.h | 7 - arch/arm64/kvm/arm.c | 8 +- arch/arm64/kvm/hyp/Makefile | 4 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 203 ++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 1 + arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 46 ++++ arch/arm64/kvm/hyp/sysreg-sr.c | 333 ----------------------------- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 114 ++++++++++ 13 files changed, 380 insertions(+), 353 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h create mode 100644 arch/arm64/kvm/hyp/nvhe/sysreg-sr.c delete mode 100644 arch/arm64/kvm/hyp/sysreg-sr.c create mode 100644 arch/arm64/kvm/hyp/vhe/sysreg-sr.c diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 49d1a5cd8f8f..e0920df1d0c1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -338,7 +338,7 @@ struct kvm_vcpu_arch { struct vcpu_reset_state reset_state; /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ bool sysregs_loaded_on_cpu; /* Guest PV state */ @@ -639,8 +639,8 @@ static inline int kvm_arm_have_ssbd(void) } } -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); int kvm_set_ipa_limit(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 82fa05d15b8b..997c5bda1ac7 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -66,14 +66,15 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +#else void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); -void __sysreg32_save_state(struct kvm_vcpu *vcpu); -void __sysreg32_restore_state(struct kvm_vcpu *vcpu); +#endif void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c44ca4e7dd66..59eb55893eaf 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -76,13 +76,6 @@ KVM_NVHE_ALIAS(abort_guest_exit_start); KVM_NVHE_ALIAS(__fpsimd_restore_state); KVM_NVHE_ALIAS(__fpsimd_save_state); -/* Symbols defined in sysreg-sr.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__kvm_enable_ssbs); -KVM_NVHE_ALIAS(__sysreg32_restore_state); -KVM_NVHE_ALIAS(__sysreg32_save_state); -KVM_NVHE_ALIAS(__sysreg_restore_state_nvhe); -KVM_NVHE_ALIAS(__sysreg_save_state_nvhe); - /* Symbols defined in timer-sr.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__kvm_timer_set_cntvoff); KVM_NVHE_ALIAS(__timer_disable_traps); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 26780b78a523..0bf2cf5614c6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -351,7 +351,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); - kvm_vcpu_load_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); kvm_vcpu_pmu_restore_guest(vcpu); if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) @@ -369,7 +370,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_arch_vcpu_put_fp(vcpu); - kvm_vcpu_put_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_put_sysregs_vhe(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); @@ -1302,7 +1304,7 @@ static void cpu_init_hyp_mode(void) */ if (this_cpu_has_cap(ARM64_SSBS) && arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); + kvm_call_hyp_nvhe(__kvm_enable_ssbs); } } diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index fc09025d2e97..f49797237818 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -13,8 +13,8 @@ subdir-ccflags-y := -I$(incdir) \ obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o -hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - entry.o fpsimd.o +hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o entry.o \ + fpsimd.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000000..3e0585fbd403 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include +#include + +#include +#include +#include +#include + +static inline void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); +} + +static inline void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); +} + +static inline void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); + ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); + ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); + ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); + ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); + ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); + ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); + ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); +} + +static inline void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); +} + +static inline void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); +} + +static inline void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); + write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); + write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); + write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); + write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); + write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); + write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); + write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); + write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); + write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); + write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); + write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } + + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); + write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); +} + +static inline void __hyp_text __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->gp_regs.regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); +} + +static inline void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (!vcpu_el1_is_32bit(vcpu)) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); +} + +static inline void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (!vcpu_el1_is_32bit(vcpu)) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index b3cb67b63d67..61a8160f0dd9 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := debug-sr.o switch.o tlb.o hyp-init.o ../hyp-entry.o +obj-y := sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 7f6b8d3dc637..f08bfb951df6 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -5,6 +5,7 @@ */ #include +#include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c new file mode 100644 index 000000000000..710cf28ab1ec --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include + +#include +#include +#include +#include + +/* + * Non-VHE: Both host and guest must save everything. + */ + +void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} + +void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c deleted file mode 100644 index 2493439a5c54..000000000000 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include - -#include -#include -#include -#include - -/* - * Non-VHE: Both host and guest must save everything. - * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and - * pstate, which are handled as part of the el2 return state) on every - * switch (sp_el0 is being dealt with in the assembly code). - * tpidr_el0 and tpidrro_el0 only need to be switched when going - * to host userspace or a different VCPU. EL1 registers only need to be - * switched when potentially going to run a different VCPU. The latter two - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. - */ - -static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); -} - -static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); -} - -static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); -} - -static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) -{ - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_el1_state(ctxt); - __sysreg_save_common_state(ctxt); - __sysreg_save_user_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} - -void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); - -void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); - -static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); -} - -static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -} - -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - - if (has_vhe() || - !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } else if (!ctxt->__hyp_running_vcpu) { - /* - * Must only be done for guest registers, hence the context - * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with nVHE's __activate_traps(). - */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | - TCR_EPD1_MASK | TCR_EPD0_MASK), - SYS_TCR); - isb(); - } - - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - - if (!has_vhe() && - cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && - ctxt->__hyp_running_vcpu) { - /* - * Must only be done for host registers, hence the context - * test. Pairs with nVHE's __deactivate_traps(). - */ - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * deconfigured and disabled. We can now restore the host's - * S1 configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } - - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); -} - -static void __hyp_text -__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) -{ - u64 pstate = ctxt->gp_regs.regs.pstate; - u64 mode = pstate & PSR_AA32_MODE_MASK; - - /* - * Safety check to ensure we're setting the CPU up to enter the guest - * in a less privileged mode. - * - * If we are attempting a return to EL2 or higher in AArch64 state, - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that - * we'll take an illegal exception state exception immediately after - * the ERET to the guest. Attempts to return to AArch32 Hyp will - * result in an illegal exception return because EL2's execution state - * is determined by SCR_EL3.RW. - */ - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) - pstate = PSR_MODE_EL2h | PSR_IL_BIT; - - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_el1_state(ctxt); - __sysreg_restore_common_state(ctxt); - __sysreg_restore_user_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} - -void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); - -void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); - -void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); -} - -void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); -} - -/** - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Load system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_load() function - * and loading system register state early avoids having to load them on - * every entry to the VM. - */ -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __sysreg_save_user_state(host_ctxt); - - /* - * Load guest EL1 and user state - * - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = true; - - activate_traps_vhe_load(vcpu); -} - -/** - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Save guest system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_put() function - * and deferring saving system register state until we're no longer running the - * VCPU avoids having to save them on every exit from the VM. - */ -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(); - - __sysreg_save_el1_state(guest_ctxt); - __sysreg_save_user_state(guest_ctxt); - __sysreg32_save_state(vcpu); - - /* Restore host user state */ - __sysreg_restore_user_state(host_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = false; -} - -void __hyp_text __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 62bdaf272b03..2801582a739a 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := debug-sr.o switch.o tlb.o ../hyp-entry.o +obj-y := sysreg-sr.o debug-sr.o switch.o tlb.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e8d76cab44e4..c0d33deba77e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -120,7 +120,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * HCR_EL2.TGE. * * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm * before __activate_traps, because __activate_vm configures * stage 2 translation, and __activate_traps clear HCR_EL2.TGE * (among other things). diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c new file mode 100644 index 000000000000..996471e4c138 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +#include +#include + +#include +#include +#include +#include + +/* + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. + */ + +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); + +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); + +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); + +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); + +/** + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} -- cgit From 9aebdea494b5d2d5fe0ba54d71e9d6c5acfe76b4 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:17 +0100 Subject: KVM: arm64: Duplicate hyp/timer-sr.c for VHE/nVHE timer-sr.c contains a HVC handler for setting CNTVOFF_EL2 and two helper functions for controlling access to physical counter. The former is used by both VHE/nVHE and is duplicated, the latter are used only by nVHE and moved to nvhe/timer-sr.c. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-13-dbrazdil@google.com --- arch/arm64/include/asm/kvm_hyp.h | 2 ++ arch/arm64/kernel/image-vars.h | 5 ---- arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/Makefile | 3 ++- arch/arm64/kvm/hyp/nvhe/timer-sr.c | 48 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/timer-sr.c | 48 -------------------------------------- arch/arm64/kvm/hyp/vhe/Makefile | 2 +- arch/arm64/kvm/hyp/vhe/timer-sr.c | 12 ++++++++++ 8 files changed, 66 insertions(+), 56 deletions(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/timer-sr.c delete mode 100644 arch/arm64/kvm/hyp/timer-sr.c create mode 100644 arch/arm64/kvm/hyp/vhe/timer-sr.c diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 997c5bda1ac7..19f8b40fe6cf 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -63,8 +63,10 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); +#endif #ifdef __KVM_NVHE_HYPERVISOR__ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 59eb55893eaf..2757d3512704 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -76,11 +76,6 @@ KVM_NVHE_ALIAS(abort_guest_exit_start); KVM_NVHE_ALIAS(__fpsimd_restore_state); KVM_NVHE_ALIAS(__fpsimd_save_state); -/* Symbols defined in timer-sr.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__kvm_timer_set_cntvoff); -KVM_NVHE_ALIAS(__timer_disable_traps); -KVM_NVHE_ALIAS(__timer_enable_traps); - /* Symbols defined in vgic-v2-cpuif-proxy.c (not yet compiled with nVHE build rules). */ KVM_NVHE_ALIAS(__vgic_v2_perform_cpuif_access); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f49797237818..ef1aa7fe8f5a 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -13,7 +13,7 @@ subdir-ccflags-y := -I$(incdir) \ obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o -hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o entry.o \ +hyp-y := vgic-v3-sr.o aarch32.o vgic-v2-cpuif-proxy.o entry.o \ fpsimd.o # KVM code is run at a different exception code with a different map, so diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 61a8160f0dd9..0f4c544f07db 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,7 +6,8 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o ../hyp-entry.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o \ + ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c new file mode 100644 index 000000000000..42c8ed71d06e --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include +#include +#include + +#include + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c deleted file mode 100644 index fb5c0be33223..000000000000 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include -#include - -#include - -void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) -{ - write_sysreg(cntvoff, cntvoff_el2); -} - -/* - * Should only be called on non-VHE systems. - * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). - */ -void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) -{ - u64 val; - - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); -} - -/* - * Should only be called on non-VHE systems. - * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). - */ -void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) -{ - u64 val; - - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); -} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 2801582a739a..a1dbbc5409bd 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,7 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := sysreg-sr.o debug-sr.o switch.o tlb.o ../hyp-entry.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c new file mode 100644 index 000000000000..4cda674a8be6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#include + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} -- cgit From c04dd455eb311d2d289c9d81d080eaf11a06cebf Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:18 +0100 Subject: KVM: arm64: Compile remaining hyp/ files for both VHE/nVHE The following files in hyp/ contain only code shared by VHE/nVHE: vgic-v3-sr.c, aarch32.c, vgic-v2-cpuif-proxy.c, entry.S, fpsimd.S Compile them under both configurations. Deletions in image-vars.h reflect eliminated dependencies of nVHE code on the rest of the kernel. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-14-dbrazdil@google.com --- arch/arm64/kernel/image-vars.h | 29 ----------------------------- arch/arm64/kvm/hyp/Makefile | 13 +------------ arch/arm64/kvm/hyp/nvhe/Makefile | 5 +++-- arch/arm64/kvm/hyp/vhe/Makefile | 4 +++- 4 files changed, 7 insertions(+), 44 deletions(-) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 2757d3512704..9e897c500237 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -63,35 +63,6 @@ __efistub__ctype = _ctype; #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; -/* Symbols defined in aarch32.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(kvm_skip_instr32); - -/* Symbols defined in entry.S (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__guest_enter); -KVM_NVHE_ALIAS(__guest_exit); -KVM_NVHE_ALIAS(abort_guest_exit_end); -KVM_NVHE_ALIAS(abort_guest_exit_start); - -/* Symbols defined in fpsimd.S (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__fpsimd_restore_state); -KVM_NVHE_ALIAS(__fpsimd_save_state); - -/* Symbols defined in vgic-v2-cpuif-proxy.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__vgic_v2_perform_cpuif_access); - -/* Symbols defined in vgic-v3-sr.c (not yet compiled with nVHE build rules). */ -KVM_NVHE_ALIAS(__vgic_v3_activate_traps); -KVM_NVHE_ALIAS(__vgic_v3_deactivate_traps); -KVM_NVHE_ALIAS(__vgic_v3_get_ich_vtr_el2); -KVM_NVHE_ALIAS(__vgic_v3_init_lrs); -KVM_NVHE_ALIAS(__vgic_v3_perform_cpuif_access); -KVM_NVHE_ALIAS(__vgic_v3_read_vmcr); -KVM_NVHE_ALIAS(__vgic_v3_restore_aprs); -KVM_NVHE_ALIAS(__vgic_v3_restore_state); -KVM_NVHE_ALIAS(__vgic_v3_save_aprs); -KVM_NVHE_ALIAS(__vgic_v3_save_state); -KVM_NVHE_ALIAS(__vgic_v3_write_vmcr); - /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(arm64_enable_wa2_handling); KVM_NVHE_ALIAS(kvm_patch_vector_branch); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ef1aa7fe8f5a..f54f0e89a71c 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,16 +10,5 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += hyp.o vhe/ nvhe/ +obj-$(CONFIG_KVM) += vhe/ nvhe/ obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o - -hyp-y := vgic-v3-sr.o aarch32.o vgic-v2-cpuif-proxy.o entry.o \ - fpsimd.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0f4c544f07db..ad8729f5e814 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,8 +6,9 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o \ - ../hyp-entry.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index a1dbbc5409bd..090fd1e14be2 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,7 +6,9 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o ../hyp-entry.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may -- cgit From c50cb04303cb88c517715b078e3e010c024af1a5 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:19 +0100 Subject: KVM: arm64: Remove __hyp_text macro, use build rules instead With nVHE code now fully separated from the rest of the kernel, the effects of the __hyp_text macro (which had to be applied on all nVHE code) can be achieved with build rules instead. The macro used to: (a) move code to .hyp.text ELF section, now done by renaming .text using `objcopy`, and (b) `notrace` and `__noscs` would negate effects of CC_FLAGS_FTRACE and CC_FLAGS_SCS, respectivelly, now those flags are erased from KBUILD_CFLAGS (same way as in EFI stub). Note that by removing __hyp_text from code shared with VHE, all VHE code is now compiled into .text and without `notrace` and `__noscs`. Use of '.pushsection .hyp.text' removed from assembly files as this is now also covered by the build rules. For MAINTAINERS: if needed to re-run, uses of macro were removed with the following command. Formatting was fixed up manually. find arch/arm64/kvm/hyp -type f -name '*.c' -o -name '*.h' \ -exec sed -i 's/ __hyp_text//g' {} + Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-15-dbrazdil@google.com --- arch/arm64/include/asm/kvm_emulate.h | 2 +- arch/arm64/include/asm/kvm_hyp.h | 2 - arch/arm64/kvm/hyp/aarch32.c | 6 +- arch/arm64/kvm/hyp/entry.S | 1 - arch/arm64/kvm/hyp/fpsimd.S | 1 - arch/arm64/kvm/hyp/hyp-entry.S | 1 - arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 16 ++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 36 ++++---- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 20 ++--- arch/arm64/kvm/hyp/nvhe/Makefile | 8 +- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 10 +-- arch/arm64/kvm/hyp/nvhe/switch.c | 18 ++-- arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 6 +- arch/arm64/kvm/hyp/nvhe/timer-sr.c | 4 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 14 ++-- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 4 +- arch/arm64/kvm/hyp/vgic-v3-sr.c | 130 +++++++++++++---------------- 17 files changed, 132 insertions(+), 147 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4d0f8ea600ba..269a76cd51ff 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -516,7 +516,7 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 19f8b40fe6cf..46689e7db46c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -12,8 +12,6 @@ #include #include -#define __hyp_text __section(.hyp.text) notrace __noscs - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index 25c0e47d57cb..f9ff67dfbf0b 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -44,7 +44,7 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; @@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) * * IT[7:0] -> CPSR[26:25],CPSR[15:10] */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90186cf6473e..dfb4e6d359ab 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -21,7 +21,6 @@ #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text - .pushsection .hyp.text, "ax" /* * We treat x18 as callee-saved as the host may use it as a platform diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 5b8ff517ff10..01f114aa47b0 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -9,7 +9,6 @@ #include .text - .pushsection .hyp.text, "ax" SYM_FUNC_START(__fpsimd_save_state) fpsimd_save x0, 1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 8316ee67d6a0..689fccbc9de7 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -16,7 +16,6 @@ #include .text - .pushsection .hyp.text, "ax" .macro do_el2_call /* diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index e041dbd23243..24e8acf9ec10 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -88,9 +88,9 @@ default: write_debug(ptr[0], reg, 0); \ } -static inline void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static inline void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -107,9 +107,9 @@ static inline void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); } -static inline void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static inline void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -127,7 +127,7 @@ static inline void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); } -static inline void __hyp_text __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -146,7 +146,7 @@ static inline void __hyp_text __debug_switch_to_guest_common(struct kvm_vcpu *vc __debug_restore_state(vcpu, guest_dbg, guest_ctxt); } -static inline void __hyp_text __debug_switch_to_host_common(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 65d2a879b93b..8f622688fa64 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -30,7 +30,7 @@ extern const char __hyp_panic_string[]; /* Check whether the FP regs were dirtied while in the host-side run loop: */ -static inline bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) +static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) { /* * When the system doesn't support FP/SIMD, we cannot rely on @@ -48,7 +48,7 @@ static inline bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) } /* Save the 32-bit only FPSIMD system register state */ -static inline void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { if (!vcpu_el1_is_32bit(vcpu)) return; @@ -56,7 +56,7 @@ static inline void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); } -static inline void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) { /* * We are about to set CPTR_EL2.TFP to trap all floating point @@ -73,7 +73,7 @@ static inline void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } -static inline void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -89,13 +89,13 @@ static inline void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } -static inline void __hyp_text __deactivate_traps_common(void) +static inline void __deactivate_traps_common(void) { write_sysreg(0, hstr_el2); write_sysreg(0, pmuserenr_el0); } -static inline void __hyp_text ___activate_traps(struct kvm_vcpu *vcpu) +static inline void ___activate_traps(struct kvm_vcpu *vcpu) { u64 hcr = vcpu->arch.hcr_el2; @@ -108,7 +108,7 @@ static inline void __hyp_text ___activate_traps(struct kvm_vcpu *vcpu) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); } -static inline void __hyp_text ___deactivate_traps(struct kvm_vcpu *vcpu) +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) { /* * If we pended a virtual abort, preserve it until it gets @@ -122,12 +122,12 @@ static inline void __hyp_text ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline void __hyp_text __activate_vm(struct kvm *kvm) +static inline void __activate_vm(struct kvm *kvm) { __load_guest_stage2(kvm); } -static inline bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -156,7 +156,7 @@ static inline bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } -static inline bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { u8 ec; u64 esr; @@ -196,7 +196,7 @@ static inline bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) } /* Check for an FPSIMD/SVE trap and handle as appropriate */ -static inline bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { bool vhe, sve_guest, sve_host; u8 hsr_ec; @@ -283,7 +283,7 @@ static inline bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) return true; } -static inline bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) { u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); int rt = kvm_vcpu_sys_get_rt(vcpu); @@ -338,7 +338,7 @@ static inline bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } -static inline bool __hyp_text esr_is_ptrauth_trap(u32 esr) +static inline bool esr_is_ptrauth_trap(u32 esr) { u32 ec = ESR_ELx_EC(esr); @@ -371,7 +371,7 @@ static inline bool __hyp_text esr_is_ptrauth_trap(u32 esr) regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ }) -static inline bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *ctxt; u64 val; @@ -401,7 +401,7 @@ static inline bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) * the guest, false when we should restore the host state and return to the * main run loop. */ -static inline bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -473,7 +473,7 @@ exit: return false; } -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) +static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) { if (!cpus_have_final_cap(ARM64_SSBD)) return false; @@ -481,7 +481,7 @@ static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); } -static inline void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) { #ifdef CONFIG_ARM64_SSBD /* @@ -494,7 +494,7 @@ static inline void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu #endif } -static inline void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) { #ifdef CONFIG_ARM64_SSBD /* diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 3e0585fbd403..6e04e061f762 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -15,18 +15,18 @@ #include #include -static inline void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); } -static inline void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); } -static inline void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); @@ -51,7 +51,7 @@ static inline void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ct ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); } -static inline void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); @@ -60,18 +60,18 @@ static inline void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_cont ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); } -static inline void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); } -static inline void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); } -static inline void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); @@ -130,7 +130,7 @@ static inline void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); } -static inline void __hyp_text __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { u64 pstate = ctxt->gp_regs.regs.pstate; u64 mode = pstate & PSR_AA32_MODE_MASK; @@ -156,7 +156,7 @@ static inline void __hyp_text __sysreg_restore_el2_return_state(struct kvm_cpu_c write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); } -static inline void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) { u64 *spsr, *sysreg; @@ -178,7 +178,7 @@ static inline void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); } -static inline void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) { u64 *spsr, *sysreg; diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ad8729f5e814..0b34414557d6 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -21,7 +21,13 @@ $(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE $(call if_changed,hypcopy) quiet_cmd_hypcopy = HYPCOPY $@ - cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@ + cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ + +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) # KVM nVHE code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 828c0d48e790..91a711aa8382 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -14,7 +14,7 @@ #include #include -static void __hyp_text __debug_save_spe(u64 *pmscr_el1) +static void __debug_save_spe(u64 *pmscr_el1) { u64 reg; @@ -46,7 +46,7 @@ static void __hyp_text __debug_save_spe(u64 *pmscr_el1) dsb(nsh); } -static void __hyp_text __debug_restore_spe(u64 pmscr_el1) +static void __debug_restore_spe(u64 pmscr_el1) { if (!pmscr_el1) return; @@ -58,20 +58,20 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1) write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); } -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); __debug_switch_to_guest_common(vcpu); } -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +void __debug_switch_to_host(struct kvm_vcpu *vcpu) { __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); __debug_switch_to_host_common(vcpu); } -u32 __hyp_text __kvm_get_mdcr_el2(void) +u32 __kvm_get_mdcr_el2(void) { return read_sysreg(mdcr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f08bfb951df6..a1dcf59bd45e 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -27,7 +27,7 @@ #include #include -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -58,7 +58,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) } } -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +static void __deactivate_traps(struct kvm_vcpu *vcpu) { u64 mdcr_el2; @@ -93,13 +93,13 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +static void __deactivate_vm(struct kvm_vcpu *vcpu) { write_sysreg(0, vttbr_el2); } /* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); @@ -108,7 +108,7 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) } /* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); @@ -119,7 +119,7 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) /** * Disable host events, enable guest events */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) { struct kvm_host_data *host; struct kvm_pmu_events *pmu; @@ -139,7 +139,7 @@ static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) /** * Disable guest events, enable host events */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) { struct kvm_host_data *host; struct kvm_pmu_events *pmu; @@ -155,7 +155,7 @@ static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) } /* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -242,7 +242,7 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) return exit_code; } -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 710cf28ab1ec..88a25fc8fcd3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -18,7 +18,7 @@ * Non-VHE: Both host and guest must save everything. */ -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) { __sysreg_save_el1_state(ctxt); __sysreg_save_common_state(ctxt); @@ -26,7 +26,7 @@ void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_save_el2_return_state(ctxt); } -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) { __sysreg_restore_el1_state(ctxt); __sysreg_restore_common_state(ctxt); @@ -34,7 +34,7 @@ void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_restore_el2_return_state(ctxt); } -void __hyp_text __kvm_enable_ssbs(void) +void __kvm_enable_ssbs(void) { u64 tmp; diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index 42c8ed71d06e..9072e71693ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -19,7 +19,7 @@ void __kvm_timer_set_cntvoff(u64 cntvoff) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +void __timer_disable_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +void __timer_enable_traps(struct kvm_vcpu *vcpu) { u64 val; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index deb48c8c00ee..d4475f8340c4 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -12,8 +12,7 @@ struct tlb_inv_context { u64 tcr; }; -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) +static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) { if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -36,8 +35,7 @@ static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) +static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) { write_sysreg(0, vttbr_el2); @@ -49,7 +47,7 @@ static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, } } -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { struct tlb_inv_context cxt; @@ -103,7 +101,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) __tlb_switch_to_host(kvm, &cxt); } -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) +void __kvm_tlb_flush_vmid(struct kvm *kvm) { struct tlb_inv_context cxt; @@ -120,7 +118,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) __tlb_switch_to_host(kvm, &cxt); } -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); struct tlb_inv_context cxt; @@ -135,7 +133,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) __tlb_switch_to_host(kvm, &cxt); } -void __hyp_text __kvm_flush_vm_context(void) +void __kvm_flush_vm_context(void) { dsb(ishst); __tlbi(alle1is); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 4f3a087e36d5..bd1bab551d48 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -13,7 +13,7 @@ #include #include -static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +static bool __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); @@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) * 0: Not a GICV access * -1: Illegal GICV access successfully performed */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct vgic_dist *vgic = &kvm->arch.vgic; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 10ed539835c1..d31eb6266f2e 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -16,7 +16,7 @@ #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) -static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +static u64 __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { case 0: @@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) unreachable(); } -static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +static void __gic_v3_set_lr(u64 val, int lr) { switch (lr & 0xf) { case 0: @@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +static void __vgic_v3_write_ap0rn(u32 val, int n) { switch (n) { case 0: @@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) } } -static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +static void __vgic_v3_write_ap1rn(u32 val, int n) { switch (n) { case 0: @@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) } } -static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +static u32 __vgic_v3_read_ap0rn(int n) { u32 val; @@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n) return val; } -static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +static u32 __vgic_v3_read_ap1rn(int n) { u32 val; @@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n) return val; } -void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; @@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; int i; @@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) { /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) { u64 val; @@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_init_lrs(void) +void __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); int i; @@ -403,28 +403,28 @@ void __hyp_text __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +u64 __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } -u64 __hyp_text __vgic_v3_read_vmcr(void) +u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } -static int __hyp_text __vgic_v3_bpr_min(void) +static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } -static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; @@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) #define GICv3_IDLE_PRIORITY 0xff -static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, - u32 vmcr, - u64 *lr_val) +static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; u8 priority = GICv3_IDLE_PRIORITY; @@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } -static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, - int intid, u64 *lr_val) +static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; int i; @@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, return -1; } -static int __hyp_text __vgic_v3_get_highest_active_priority(void) +static int __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; } -static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; @@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) * Convert a priority to a preemption level, taking the relevant BPR * into account by zeroing the sub-priority bits. */ -static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) { unsigned int bpr; @@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) * matter what the guest does with its BPR, we can always set/get the * same value of a priority. */ -static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) { u8 pre, ap; u32 val; @@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } -static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +static int __vgic_v3_clear_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; u8 lr_prio, pmr; @@ -661,7 +660,7 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } -static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) { lr_val &= ~ICH_LR_ACTIVE_BIT; if (lr_val & ICH_LR_HW) { @@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) __gic_v3_set_lr(lr_val, lr); } -static void __hyp_text __vgic_v3_bump_eoicount(void) +static void __vgic_v3_bump_eoicount(void) { u32 hcr; @@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void) write_gicreg(hcr, ICH_HCR_EL2); } -static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); } -static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); } -static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); } -static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); } -static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min() - 1; @@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); @@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val; @@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val = vcpu_get_reg(vcpu, rt); @@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int __vgic_v3_write_ap1rn(val, n); } -static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; int lr, lr_grp, grp; @@ -915,16 +906,14 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } -static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vmcr &= ICH_VMCR_PMR_MASK; vmcr >>= ICH_VMCR_PMR_SHIFT; vcpu_set_reg(vcpu, rt, vmcr); } -static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = __vgic_v3_get_highest_active_priority(); vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vtr, val; @@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; u32 esr; -- cgit From f9a026e3d38ba81cd274725f1caaf64322a86aa5 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 25 Jun 2020 14:14:20 +0100 Subject: KVM: arm64: Lift instrumentation restrictions on VHE With VHE and nVHE executable code completely separated, remove build config that disabled GCOV/KASAN/UBSAN/KCOV instrumentation for VHE as these now execute under the same memory mappings as the rest of the kernel. No violations are currently being reported by either KASAN or UBSAN. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200625131420.71444-16-dbrazdil@google.com --- arch/arm64/kvm/hyp/vhe/Makefile | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 090fd1e14be2..461e97c375cc 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -9,11 +9,3 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n -- cgit From 731532176716e2775a5d21115bb9c5c61e0cb704 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 1 Jul 2020 16:02:06 +0200 Subject: KVM: arm64: vgic-its: Change default outer cacheability for {PEND, PROP}BASER PENDBASER and PROPBASER define the outer caching mode for LPI tables. The memory backing them may not be outer sharable, so we mark them as nC by default. This however, breaks Windows on ARM which only accepts SameAsInner or RaWaWb as values for outer cachability. We do today already allow the outer mode to be set to SameAsInner explicitly, so the easy fix is to default to that instead of nC for situations when an OS asks for a not fulfillable cachability request. This fixes booting Windows in KVM with vgicv3 and ITS enabled for me. Signed-off-by: Alexander Graf Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200701140206.8664-1-graf@amazon.com --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index d2339a2b9fb9..5c786b915cd3 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -389,7 +389,7 @@ u64 vgic_sanitise_outer_cacheability(u64 field) case GIC_BASER_CACHE_nC: return field; default: - return GIC_BASER_CACHE_nC; + return GIC_BASER_CACHE_SameAsInner; } } -- cgit From 95fa0ba83e66dea0d3af48ad69842ae8c1dd9af2 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Wed, 1 Jul 2020 08:07:09 -0400 Subject: KVM: arm64: Drop long gone function parameter documentation update_vmid() just has one parameter "vmid".The other parameter "kvm" is no longer used. Signed-off-by: Peng Hao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200701120709.388377-1-richard.peng@oppo.com --- arch/arm64/kvm/arm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 90cb90561446..5bf9bf54b22c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -466,7 +466,6 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid) /** * update_vmid - Update the vmid with a valid VMID for the current generation - * @kvm: The guest that struct vmid belongs to * @vmid: The stage-2 VMID information struct */ static void update_vmid(struct kvm_vmid *vmid) -- cgit From 3a949f4c93548751a377691b2a208063da05e369 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Jun 2020 11:57:05 +1000 Subject: KVM: arm64: Rename HSR to ESR kvm/arm32 isn't supported since commit 541ad0150ca4 ("arm: Remove 32bit KVM host support"). So HSR isn't meaningful since then. This renames HSR to ESR accordingly. This shouldn't cause any functional changes: * Rename kvm_vcpu_get_hsr() to kvm_vcpu_get_esr() to make the function names self-explanatory. * Rename variables from @hsr to @esr to make them self-explanatory. Note that the renaming on uapi and tracepoint will cause ABI changes, which we should avoid. Specificly, there are 4 related source files in this regard: * arch/arm64/include/uapi/asm/kvm.h (struct kvm_debug_exit_arch::hsr) * arch/arm64/kvm/handle_exit.c (struct kvm_debug_exit_arch::hsr) * arch/arm64/kvm/trace_arm.h (tracepoints) * arch/arm64/kvm/trace_handle_exit.h (tracepoints) Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Acked-by: Andrew Scull Link: https://lore.kernel.org/r/20200630015705.103366-1-gshan@redhat.com --- arch/arm64/include/asm/kvm_emulate.h | 34 +++++++++++++++++----------------- arch/arm64/kvm/handle_exit.c | 32 ++++++++++++++++---------------- arch/arm64/kvm/hyp/aarch32.c | 2 +- arch/arm64/kvm/hyp/switch.c | 14 +++++++------- arch/arm64/kvm/hyp/vgic-v3-sr.c | 4 ++-- arch/arm64/kvm/mmu.c | 6 +++--- arch/arm64/kvm/sys_regs.c | 28 ++++++++++++++-------------- 7 files changed, 60 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4d0f8ea600ba..c9ba0df47f7d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -259,14 +259,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -291,64 +291,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK; } static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV); } static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); + return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE); } static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF); } static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; + return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM); } static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL); } static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) @@ -358,12 +358,12 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) @@ -387,7 +387,7 @@ static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5a02d4c90559..98ab33139982 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,7 +89,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { + if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); @@ -119,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int ret = 0; run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.hsr = hsr; + run->debug.arch.hsr = esr; - switch (ESR_ELx_EC(hsr)) { + switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -135,8 +135,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) case ESR_ELx_EC_BRK64: break; default: - kvm_err("%s: un-handled case hsr: %#08x\n", - __func__, (unsigned int) hsr); + kvm_err("%s: un-handled case esr: %#08x\n", + __func__, (unsigned int) esr); ret = -1; break; } @@ -146,10 +146,10 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); - kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); + kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n", + esr, esr_get_class_string(esr)); kvm_inject_undefined(vcpu); return 1; @@ -200,10 +200,10 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = ESR_ELx_EC(hsr); + u32 esr = kvm_vcpu_get_esr(vcpu); + u8 esr_ec = ESR_ELx_EC(esr); - return arm_exit_handlers[hsr_ec]; + return arm_exit_handlers[esr_ec]; } /* @@ -241,15 +241,15 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { if (ARM_SERROR_PENDING(exception_index)) { - u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); /* * HVC/SMC already have an adjusted PC, which we need * to correct in order to return to after having * injected the SError. */ - if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 || - hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) { + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || + esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; *vcpu_pc(vcpu) -= adj; } @@ -307,5 +307,5 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, exception_index = ARM_EXCEPTION_CODE(exception_index); if (exception_index == ARM_EXCEPTION_EL1_SERROR) - kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); + kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index 25c0e47d57cb..1e948704d60f 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -51,7 +51,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) int cond; /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) + if (kvm_vcpu_get_esr(vcpu) >> 30) return true; /* Is condition field valid? */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index db1c4487d95d..5164074c1ae1 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -356,7 +356,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { bool vhe, sve_guest, sve_host; - u8 hsr_ec; + u8 esr_ec; if (!system_supports_fpsimd()) return false; @@ -371,14 +371,14 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) vhe = has_vhe(); } - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) + esr_ec = kvm_vcpu_trap_get_class(vcpu); + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) return false; /* Don't handle SVE traps for non-SVE vcpus here: */ if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) + if (esr_ec != ESR_ELx_EC_FP_ASIMD) return false; /* Valid trap. Switch the context: */ @@ -437,7 +437,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) { - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); int rt = kvm_vcpu_sys_get_rt(vcpu); u64 val = vcpu_get_reg(vcpu, rt); @@ -529,7 +529,7 @@ static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) u64 val; if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) return false; ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 10ed539835c1..bee0a74671ca 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -426,7 +426,7 @@ static int __hyp_text __vgic_v3_bpr_min(void) static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; return crm != 8; @@ -992,7 +992,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) bool is_read; u32 sysreg; - esr = kvm_vcpu_get_hsr(vcpu); + esr = kvm_vcpu_get_esr(vcpu); if (vcpu_mode_is_32bit(vcpu)) { if (!kvm_condition_valid(vcpu)) { __kvm_skip_instr(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8c0035cab6b6..36506112480e 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2079,7 +2079,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) return 1; if (unlikely(!is_iabt)) { @@ -2088,7 +2088,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) } } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ @@ -2097,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), - (unsigned long)kvm_vcpu_get_hsr(vcpu)); + (unsigned long)kvm_vcpu_get_esr(vcpu)); return -EFAULT; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index baf5ce9225ce..a96dd62a90ce 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2220,10 +2220,10 @@ static int emulate_cp(struct kvm_vcpu *vcpu, static void unhandled_cp_access(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); int cp = -1; - switch(hsr_ec) { + switch (esr_ec) { case ESR_ELx_EC_CP15_32: case ESR_ELx_EC_CP15_64: cp = 15; @@ -2254,17 +2254,17 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, size_t nr_specific) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - int Rt2 = (hsr >> 10) & 0x1f; + int Rt2 = (esr >> 10) & 0x1f; params.is_aarch32 = true; params.is_32bit = false; - params.CRm = (hsr >> 1) & 0xf; - params.is_write = ((hsr & 1) == 0); + params.CRm = (esr >> 1) & 0xf; + params.is_write = ((esr & 1) == 0); params.Op0 = 0; - params.Op1 = (hsr >> 16) & 0xf; + params.Op1 = (esr >> 16) & 0xf; params.Op2 = 0; params.CRn = 0; @@ -2311,18 +2311,18 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, size_t nr_specific) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; params.is_32bit = true; - params.CRm = (hsr >> 1) & 0xf; + params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); - params.is_write = ((hsr & 1) == 0); - params.CRn = (hsr >> 10) & 0xf; + params.is_write = ((esr & 1) == 0); + params.CRn = (esr >> 10) & 0xf; params.Op0 = 0; - params.Op1 = (hsr >> 14) & 0x7; - params.Op2 = (hsr >> 17) & 0x7; + params.Op1 = (esr >> 14) & 0x7; + params.Op2 = (esr >> 17) & 0x7; if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || !emulate_cp(vcpu, ¶ms, global, nr_global)) { @@ -2421,7 +2421,7 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct sys_reg_params params; - unsigned long esr = kvm_vcpu_get_hsr(vcpu); + unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); int ret; -- cgit From 6fef0d4ea575c0561b38fd9149a05f1acb505bc0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 08:55:47 +0100 Subject: mfd: twl4030-irq: Fix incorrect type in assignment warning Silences Sparse warning: drivers/mfd/twl4030-irq.c:485:26: warning: incorrect type in assignment (different base types) drivers/mfd/twl4030-irq.c:485:26: expected unsigned int [usertype] word drivers/mfd/twl4030-irq.c:485:26: got restricted __le32 [usertype] Cc: Tony Lindgren Cc: Kai Svahn Cc: Syed Khasim Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/twl4030-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 910a304b397c..d05bc74daba3 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -477,7 +477,7 @@ static void twl4030_sih_bus_sync_unlock(struct irq_data *data) if (agent->imr_change_pending) { union { - u32 word; + __le32 word; u8 bytes[4]; } imr; -- cgit From b174015b1ddd0a7009acaf6aa7019e9883b0f75b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 09:01:07 +0100 Subject: mfd: twl4030-irq: Fix cast to restricted __le32 warning Silences Sparse warning(s): drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 drivers/mfd/twl4030-irq.c:573:40: warning: cast to restricted __le32 Cc: Tony Lindgren Cc: Kai Svahn Cc: Syed Khasim Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/twl4030-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index d05bc74daba3..ab417438d1fa 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -561,7 +561,7 @@ static inline int sih_read_isr(const struct sih *sih) int status; union { u8 bytes[4]; - u32 word; + __le32 word; } isr; /* FIXME need retry-on-error ... */ -- cgit From c504a2486ab67cd9fc38940ad2841075d6a3e9a4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 09:15:56 +0100 Subject: mfd: tps6586x: Fix cast to restricted __le32 warning Silences Sparse warning(s): drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 drivers/mfd/tps6586x.c:323:16: warning: cast to restricted __le32 Cc: Mike Rapoport Cc: Eric Miao Signed-off-by: Lee Jones --- drivers/mfd/tps6586x.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index c8aadd39324e..c36597797ddd 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -309,18 +309,19 @@ static const struct irq_domain_ops tps6586x_domain_ops = { static irqreturn_t tps6586x_irq(int irq, void *data) { struct tps6586x *tps6586x = data; - u32 acks; + uint32_t acks; + __le32 val; int ret = 0; ret = tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, - sizeof(acks), (uint8_t *)&acks); + sizeof(acks), (uint8_t *)&val); if (ret < 0) { dev_err(tps6586x->dev, "failed to read interrupt status\n"); return IRQ_NONE; } - acks = le32_to_cpu(acks); + acks = le32_to_cpu(val); while (acks) { int i = __ffs(acks); -- cgit From d9ca7801b6e5ffdfd7a4ed3f287b277895e63e90 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 09:55:05 +0100 Subject: mfd: altera-sysmgr: Fix physical address storing hacks Sparse reports: drivers/mfd/altera-sysmgr.c:150:30: warning: incorrect type in assignment (different address spaces) drivers/mfd/altera-sysmgr.c:150:30: expected unsigned int [usertype] *base drivers/mfd/altera-sysmgr.c:150:30: got void [noderef] * drivers/mfd/altera-sysmgr.c:156:26: warning: incorrect type in argument 3 (different address spaces) drivers/mfd/altera-sysmgr.c:156:26: expected void [noderef] *regs drivers/mfd/altera-sysmgr.c:156:26: got unsigned int [usertype] *base It appears as though the driver data property 'resource_size_t *base' was being used to store 2 different types of addresses (physical and IO-mapped) under a single declared type. Fortunately, no value is recalled from the driver data entry, so it can be easily omitted. Instead we can use the value obtained directly from the platform resource to pass through Regmap into the call-backs to be used for the SMCC call and use a local dedicated __iomem variable for IO-remapping. Cc: Thor Thayer Signed-off-by: Lee Jones --- drivers/mfd/altera-sysmgr.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index d2a13a547a3c..83f0765f819b 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -22,11 +22,9 @@ /** * struct altr_sysmgr - Altera SOCFPGA System Manager * @regmap: the regmap used for System Manager accesses. - * @base : the base address for the System Manager */ struct altr_sysmgr { struct regmap *regmap; - resource_size_t *base; }; static struct platform_driver altr_sysmgr_driver; @@ -127,6 +125,7 @@ static int sysmgr_probe(struct platform_device *pdev) struct regmap_config sysmgr_config = altr_sysmgr_regmap_cfg; struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; + void __iomem *base; sysmgr = devm_kzalloc(dev, sizeof(*sysmgr), GFP_KERNEL); if (!sysmgr) @@ -139,22 +138,19 @@ static int sysmgr_probe(struct platform_device *pdev) sysmgr_config.max_register = resource_size(res) - sysmgr_config.reg_stride; if (of_device_is_compatible(np, "altr,sys-mgr-s10")) { - /* Need physical address for SMCC call */ - sysmgr->base = (resource_size_t *)res->start; sysmgr_config.reg_read = s10_protected_reg_read; sysmgr_config.reg_write = s10_protected_reg_write; - regmap = devm_regmap_init(dev, NULL, sysmgr->base, + /* Need physical address for SMCC call */ + regmap = devm_regmap_init(dev, NULL, (void *)res->start, &sysmgr_config); } else { - sysmgr->base = devm_ioremap(dev, res->start, - resource_size(res)); - if (!sysmgr->base) + base = devm_ioremap(dev, res->start, resource_size(res)); + if (!base) return -ENOMEM; sysmgr_config.max_register = res->end - res->start - 3; - regmap = devm_regmap_init_mmio(dev, sysmgr->base, - &sysmgr_config); + regmap = devm_regmap_init_mmio(dev, base, &sysmgr_config); } if (IS_ERR(regmap)) { -- cgit From 3d4a87576f3762c8c986950af157bc9025808b5c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 10:08:41 +0100 Subject: mfd: sprd-sc27xx-spi: Fix symbol 'sprd_pmic_detect_charger_type' was not declared warning Sparse reports: drivers/mfd/sprd-sc27xx-spi.c:59:23: warning: symbol 'sprd_pmic_detect_charger_type' was not declared. Should it be static? ... due to a missing header file. Cc: Orson Zhai Cc: Chunyan Zhang Signed-off-by: Lee Jones Reviewed-by: Baolin Wang Signed-off-by: Lee Jones --- drivers/mfd/sprd-sc27xx-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index d030a5da5544..d3486c2a1b33 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include -- cgit From 54daa5d47c47662067115a78dd317fba3de9e828 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 10:26:11 +0100 Subject: mfd: ab3100-core: Fix incompatible types in comparison expression warning Smatch reports: drivers/mfd/ab3100-core.c:501:20: error: incompatible types in comparison expression (different type sizes): drivers/mfd/ab3100-core.c:501:20: unsigned int * drivers/mfd/ab3100-core.c:501:20: unsigned long * drivers/mfd/ab8500-debugfs.c:1804:20: error: incompatible types in comparison expression (different type sizes): drivers/mfd/ab8500-debugfs.c:1804:20: unsigned int * drivers/mfd/ab8500-debugfs.c:1804:20: unsigned long * Since the second min() argument can be less than 0 a signed variable is required for assignment. However, the non-sized type size_t is passed in from the userspace handlers. In order to firstly compare, then assign the smallest value, we firstly need to cast them both to the same as the receiving size_t typed variable. Cc: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/ab3100-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c index 57723f116bb5..ee71ae04b5e6 100644 --- a/drivers/mfd/ab3100-core.c +++ b/drivers/mfd/ab3100-core.c @@ -498,7 +498,7 @@ static ssize_t ab3100_get_set_reg(struct file *file, int i = 0; /* Get userspace string and assure termination */ - buf_size = min(count, (sizeof(buf)-1)); + buf_size = min((ssize_t)count, (ssize_t)(sizeof(buf)-1)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; buf[buf_size] = 0; -- cgit From ddb6b26c4102aab886277b2ca3c027b4976d819b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 11:05:23 +0100 Subject: mfd: ab8500-debugfs: Fix incompatible types in comparison expression issue Smatch reports: drivers/mfd/ab8500-debugfs.c:1804:20: error: incompatible types in comparison expression (different type sizes): drivers/mfd/ab8500-debugfs.c:1804:20: unsigned int * drivers/mfd/ab8500-debugfs.c:1804:20: unsigned long * This is due to mixed types being compared in a min() comparison. Fix this by treating values as signed and casting them to the same type as the receiving variable. Cc: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/ab8500-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 1a9a3414d4fa..6d1bf7c3ca3b 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1801,7 +1801,7 @@ static ssize_t ab8500_hwreg_write(struct file *file, int buf_size, ret; /* Get userspace string and assure termination */ - buf_size = min(count, (sizeof(buf)-1)); + buf_size = min((int)count, (int)(sizeof(buf)-1)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; buf[buf_size] = 0; -- cgit From 0dfae4a3205cb73e8ab2aafd4cbd0ff1fb082b5f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 13:36:30 +0100 Subject: mfd: tc3589x: Remove invalid use of kerneldoc syntax Kerneldoc is for documenting function arguments and return values. Prevents warnings like: drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35890' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35892' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35893' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35894' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35895' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_TC35896' not described in enum 'tc3589x_version' drivers/mfd/tc3589x.c:32: warning: Enum value 'TC3589X_UNKNOWN' not described in enum 'tc3589x_version' Signed-off-by: Lee Jones --- drivers/mfd/tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c index 67c9995bb1aa..7882a37ffc35 100644 --- a/drivers/mfd/tc3589x.c +++ b/drivers/mfd/tc3589x.c @@ -18,7 +18,7 @@ #include #include -/** +/* * enum tc3589x_version - indicates the TC3589x version */ enum tc3589x_version { -- cgit From f139ef70789a6e8ae1692e00cf0a3ef7d68bf6b1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 14:06:40 +0100 Subject: mfd: wm8400-core: Supply description for wm8400_reset_codec_reg_cache's arg Kerneldoc syntax is used, but not complete. Descriptions required. Prevents warnings like: drivers/mfd/wm8400-core.c:113: warning: Function parameter or member 'wm8400' not described in 'wm8400_reset_codec_reg_cache' Cc: Mark Brown Cc: patches@opensource.cirrus.com Signed-off-by: Lee Jones Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm8400-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c index 3055d6f47afc..0fe32a05421b 100644 --- a/drivers/mfd/wm8400-core.c +++ b/drivers/mfd/wm8400-core.c @@ -108,6 +108,8 @@ static const struct regmap_config wm8400_regmap_config = { /** * wm8400_reset_codec_reg_cache - Reset cached codec registers to * their default values. + * + * @wm8400: pointer to local driver data structure */ void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400) { -- cgit From 38ea9f47317d8e8a74b15b719d5c794a79346be8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 15:33:33 +0100 Subject: mfd: wm831x-core: Supply description wm831x_reg_{un}lock args Kerneldoc syntax is used, but not complete. Descriptions required. Prevents warnings like: drivers/mfd/wm831x-core.c:119: warning: Function parameter or member 'wm831x' not described in 'wm831x_reg_lock' drivers/mfd/wm831x-core.c:145: warning: Function parameter or member 'wm831x' not described in 'wm831x_reg_unlock' Cc: Mark Brown Cc: patches@opensource.cirrus.com Signed-off-by: Lee Jones Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm831x-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 02f879b23d9f..b0344e5353e4 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -114,6 +114,8 @@ static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg) * The WM831x has a user key preventing writes to particularly * critical registers. This function locks those registers, * allowing writes to them. + * + * @wm831x: pointer to local driver data structure */ void wm831x_reg_lock(struct wm831x *wm831x) { @@ -140,6 +142,8 @@ EXPORT_SYMBOL_GPL(wm831x_reg_lock); * The WM831x has a user key preventing writes to particularly * critical registers. This function locks those registers, * preventing spurious writes. + * + * @wm831x: pointer to local driver data structure */ int wm831x_reg_unlock(struct wm831x *wm831x) { -- cgit From afb718a870ef20efcf18d6fbea45fa6576a54d27 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 15:44:48 +0100 Subject: mfd: wm8350-core: Supply description wm8350_reg_{un}lock args Kerneldoc syntax is used, but not complete. Descriptions required. Prevents warnings like: drivers/mfd/wm8350-core.c:136: warning: Function parameter or member 'wm8350' not described in 'wm8350_reg_lock' drivers/mfd/wm8350-core.c:165: warning: Function parameter or member 'wm8350' not described in 'wm8350_reg_unlock' Cc: patches@opensource.cirrus.com Signed-off-by: Lee Jones --- drivers/mfd/wm8350-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index 42b16503e6cd..fbc77b218215 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -131,6 +131,8 @@ EXPORT_SYMBOL_GPL(wm8350_block_write); * The WM8350 has a hardware lock which can be used to prevent writes to * some registers (generally those which can cause particularly serious * problems if misused). This function enables that lock. + * + * @wm8350: pointer to local driver data structure */ int wm8350_reg_lock(struct wm8350 *wm8350) { @@ -160,6 +162,8 @@ EXPORT_SYMBOL_GPL(wm8350_reg_lock); * problems if misused). This function disables that lock so updates * can be performed. For maximum safety this should be done only when * required. + * + * @wm8350: pointer to local driver data structure */ int wm8350_reg_unlock(struct wm8350 *wm8350) { -- cgit From 5a0ffef8b74fda62114d7bfba92f318fcc4b9283 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 23 Jun 2020 15:51:28 +0100 Subject: mfd: mfd-core: Complete kerneldoc header for devm_mfd_add_devices() Each function parameter should be documented in kerneldoc format. Squashes the following W=1 warnings: drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'dev' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'id' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'cells' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'n_devs' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'mem_base' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'irq_base' not described in 'devm_mfd_add_devices' drivers/mfd/mfd-core.c:326: warning: Function parameter or member 'domain' not described in 'devm_mfd_add_devices' Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index f5a73af60dd4..720e5c8b1588 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -318,6 +318,16 @@ static void devm_mfd_dev_release(struct device *dev, void *res) * Returns 0 on success or an appropriate negative error number on failure. * All child-devices of the MFD will automatically be removed when it gets * unbinded. + * + * @dev: Pointer to parent device. + * @id: Can be PLATFORM_DEVID_AUTO to let the Platform API take care + * of device numbering, or will be added to a device's cell_id. + * @cells: Array of (struct mfd_cell)s describing child devices. + * @n_devs: Number of child devices to register. + * @mem_base: Parent register range resource for child devices. + * @irq_base: Base of the range of virtual interrupt numbers allocated for + * this MFD device. Unused if @domain is specified. + * @domain: Interrupt domain to create mappings for hardware interrupts. */ int devm_mfd_add_devices(struct device *dev, int id, const struct mfd_cell *cells, int n_devs, -- cgit From 3ecbcd20e06f2c1254a85fc208ccc6aa20594314 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 09:34:00 +0100 Subject: mfd: db8500-prcmu: Add description for 'reset_reason' in kerneldoc Each function parameter should be documented in kerneldoc format. Squashes the following W=1 warnings: drivers/mfd/db8500-prcmu.c:2281: warning: Function parameter or member 'reset_code' not described in 'db8500_prcmu_system_reset' drivers/mfd/db8500-prcmu.c:3012: warning: Function parameter or member 'pdev' not described in 'db8500_prcmu_probe' Cc: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/db8500-prcmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 0452b43b0423..9b58b0296763 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2276,6 +2276,8 @@ bool db8500_prcmu_is_ac_wake_requested(void) * * Saves the reset reason code and then sets the APE_SOFTRST register which * fires interrupt to fw + * + * @reset_code: The reason for system reset */ void db8500_prcmu_system_reset(u16 reset_code) { -- cgit From 07d88c97aef826f1ba8a5026ea30700f85433057 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 09:35:24 +0100 Subject: mfd: db8500-prcmu: Remove incorrect function header from .probe() function Not only is the current header incorrect, the isn't actually a need to document the ubiquitous platform probe call. Cc: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/db8500-prcmu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 9b58b0296763..a9d9c1cdf546 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -3006,10 +3006,6 @@ static int db8500_prcmu_register_ab8500(struct device *parent) return mfd_add_devices(parent, 0, ab850x_cell, 1, NULL, 0, NULL); } -/** - * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic - * - */ static int db8500_prcmu_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; -- cgit From 5d36df75839d2dc6216f13ed86a60f801c38937f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 09:42:33 +0100 Subject: mfd: omap-usb-host: Remove invalid use of kerneldoc syntax Kerneldoc is for documenting function arguments and return values. Prevents warnings like: drivers/mfd/omap-usb-host.c:128: warning: cannot understand function prototype: 'const char * const port_modes[] = ' Cc: Tony Lindgren Cc: Keshava Munegowda Cc: Roger Quadros Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/omap-usb-host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 1f4f01b02d98..f56cdf3149dc 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -120,7 +120,7 @@ static inline u32 usbhs_read(void __iomem *base, u32 reg) /*-------------------------------------------------------------------------*/ -/** +/* * Map 'enum usbhs_omap_port_mode' found in * to the device tree binding portN-mode found in * 'Documentation/devicetree/bindings/mfd/omap-usb-host.txt' -- cgit From 3fc65627c81c700c2b12eb3b8ca8a47f4cb4dd20 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 09:45:56 +0100 Subject: mfd: omap-usb-host: Provide description for 'pdev' argument to .probe() Kerneldoc syntax is used, but not complete. Arg descriptions required. Prevents warnings like: drivers/mfd/omap-usb-host.c:531: warning: Function parameter or member 'pdev' not described in 'usbhs_omap_probe' Cc: Tony Lindgren Cc: Keshava Munegowda Cc: Roger Quadros Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/omap-usb-host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index f56cdf3149dc..aca5a160c1b2 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -526,6 +526,8 @@ static const struct of_device_id usbhs_child_match_table[] = { * usbhs_omap_probe - initialize TI-based HCDs * * Allocates basic resources for this USB host controller. + * + * @pdev: Pointer to this device's platform device structure */ static int usbhs_omap_probe(struct platform_device *pdev) { -- cgit From 55bbf5d42ee6c609de56702ae7f766c4e88efbed Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 09:48:17 +0100 Subject: mfd: omap-usb-tll: Provide description for 'pdev' argument to .probe() Kerneldoc syntax is used, but not complete. Arg descriptions required. Prevents warnings like: drivers/mfd/omap-usb-tll.c:204: warning: Function parameter or member 'pdev' not described in 'usbtll_omap_probe Cc: Tony Lindgren Cc: Keshava Munegowda Cc: Roger Quadros Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/omap-usb-tll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c index 4b7f73c317e8..04a444007cf4 100644 --- a/drivers/mfd/omap-usb-tll.c +++ b/drivers/mfd/omap-usb-tll.c @@ -199,6 +199,8 @@ static unsigned ohci_omap3_fslsmode(enum usbhs_omap_port_mode mode) * usbtll_omap_probe - initialize TI-based HCDs * * Allocates basic resources for this USB host controller. + * + * @pdev: Pointer to this device's platform device structure */ static int usbtll_omap_probe(struct platform_device *pdev) { -- cgit From 1574360a98cef9842d86e208944bb45243150440 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 12:29:02 +0100 Subject: mfd: atmel-smc: Add missing colon(s) for 'conf' arguments Kerneldoc valication gets confused if syntax isn't "@.*: ". Adding the missing colons squashes the following W=1 warnings: drivers/mfd/atmel-smc.c:247: warning: Function parameter or member 'conf' not described in 'atmel_smc_cs_conf_apply' drivers/mfd/atmel-smc.c:268: warning: Function parameter or member 'conf' not described in 'atmel_hsmc_cs_conf_apply' Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: Boris Brezillon Signed-off-by: Lee Jones --- drivers/mfd/atmel-smc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/atmel-smc.c b/drivers/mfd/atmel-smc.c index 1fa2ec950e7d..d96f1d689e7f 100644 --- a/drivers/mfd/atmel-smc.c +++ b/drivers/mfd/atmel-smc.c @@ -237,7 +237,7 @@ EXPORT_SYMBOL_GPL(atmel_smc_cs_conf_set_cycle); * atmel_smc_cs_conf_apply - apply an SMC CS conf * @regmap: the SMC regmap * @cs: the CS id - * @conf the SMC CS conf to apply + * @conf: the SMC CS conf to apply * * Applies an SMC CS configuration. * Only valid on at91sam9/avr32 SoCs. @@ -257,7 +257,7 @@ EXPORT_SYMBOL_GPL(atmel_smc_cs_conf_apply); * @regmap: the HSMC regmap * @cs: the CS id * @layout: the layout of registers - * @conf the SMC CS conf to apply + * @conf: the SMC CS conf to apply * * Applies an SMC CS configuration. * Only valid on post-sama5 SoCs. -- cgit From 0824c889e1a4b1ec0edf890689c7852363c4b055 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 12:56:11 +0100 Subject: mfd: altera-sysmgr: Supply descriptions for 'np' and 'property' function args Kerneldoc syntax is used, but not complete. Arg descriptions are required. Fixes the following W=1 build warnings: drivers/mfd/altera-sysmgr.c:95: warning: Function parameter or member 'np' not described in 'altr_sysmgr_regmap_lookup_by_phandle' drivers/mfd/altera-sysmgr.c:95: warning: Function parameter or member 'property' not described in 'altr_sysmgr_regmap_lookup_by_phandle' Cc: Thor Thayer Signed-off-by: Lee Jones --- drivers/mfd/altera-sysmgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 83f0765f819b..41076d121dd5 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -89,6 +89,9 @@ static struct regmap_config altr_sysmgr_regmap_cfg = { * altr_sysmgr_regmap_lookup_by_phandle * Find the sysmgr previous configured in probe() and return regmap property. * Return: regmap if found or error if not found. + * + * @np: Pointer to device's Device Tree node + * @property: Device Tree property name which references the sysmgr */ struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np, const char *property) -- cgit From 5ae3d1bcea485e3f0056c508d2e12d1a95c453ee Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:07:45 +0100 Subject: mfd: cros_ec_dev: Fix cros_feature_to_{name,cells} struct descriptions Kerneldoc expects kernel structures to be prefixed with 'struct'. Fixes the following W=1 level warnings: drivers/mfd/cros_ec_dev.c:32: warning: cannot understand function prototype: 'struct cros_feature_to_name ' drivers/mfd/cros_ec_dev.c:44: warning: cannot understand function prototype: 'struct cros_feature_to_cells ' Cc: Benson Leung Cc: Guenter Roeck Cc: Bill Richardson Signed-off-by: Lee Jones Acked-by: Enric Balletbo i Serra Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 32c2b912b58b..d07b43d7c761 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -24,7 +24,7 @@ static struct class cros_class = { }; /** - * cros_feature_to_name - CrOS feature id to name/short description. + * struct cros_feature_to_name - CrOS feature id to name/short description. * @id: The feature identifier. * @name: Device name associated with the feature id. * @desc: Short name that will be displayed. @@ -36,7 +36,7 @@ struct cros_feature_to_name { }; /** - * cros_feature_to_cells - CrOS feature id to mfd cells association. + * struct cros_feature_to_cells - CrOS feature id to mfd cells association. * @id: The feature identifier. * @mfd_cells: Pointer to the array of mfd cells that needs to be added. * @num_cells: Number of mfd cells into the array. -- cgit From 9c3739ee293bc956739c9a90d357595f41f46d39 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:10:54 +0100 Subject: mfd: tps65218: Repair incorrect function argument name 's/tps65218/tps/' The kerneldocs for both tps65218_reg_write() and tps65218_update_bits() describe their first arguments as 'tps65218', when in reality these are simply called 'tps'. Fixes the following W=1 warnings: drivers/mfd/tps65218.c:58: warning: Function parameter or member 'tps' not described in 'tps65218_reg_write' drivers/mfd/tps65218.c:58: warning: Excess function parameter 'tps65218' description in 'tps65218_reg_write' drivers/mfd/tps65218.c:90: warning: Function parameter or member 'tps' not described in 'tps65218_update_bits' drivers/mfd/tps65218.c:90: warning: Excess function parameter 'tps65218' description in 'tps65218_update_bits' Cc: Tony Lindgren Cc: J Keerthy Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/tps65218.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index a62ea4cb8be7..d41dd864b472 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -48,7 +48,7 @@ static const struct mfd_cell tps65218_cells[] = { /** * tps65218_reg_write: Write a single tps65218 register. * - * @tps65218: Device to write to. + * @tps: Device to write to. * @reg: Register to write to. * @val: Value to write. * @level: Password protected level @@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(tps65218_reg_write); /** * tps65218_update_bits: Modify bits w.r.t mask, val and level. * - * @tps65218: Device to write to. + * @tps: Device to write to. * @reg: Register to read-write to. * @mask: Mask. * @val: Value to write. -- cgit From 4976bfb8d853a246f83da7ccdfb49fa34093ac0d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:14:51 +0100 Subject: mfd: tps65217: Repair incorrect function argument name 's/tps65217/tps/' The kerneldocs for both tps65217_reg_write() and tps65217_update_bits() describe their first arguments as 'tps65217', when in reality these are simply called 'tps'. Fixes the following W=1 warnings: drivers/mfd/tps65217.c:215: warning: Function parameter or member 'tps' not described in 'tps65217_reg_write' drivers/mfd/tps65217.c:215: warning: Excess function parameter 'tps65217' description in 'tps65217_reg_write' drivers/mfd/tps65217.c:261: warning: Function parameter or member 'tps' not described in 'tps65217_update_bits' drivers/mfd/tps65217.c:261: warning: Excess function parameter 'tps65217' description in 'tps65217_update_bits' Cc: Tony Lindgren Cc: AnilKumar Ch Cc: linux-omap@vger.kernel.org Signed-off-by: Lee Jones --- drivers/mfd/tps65217.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c index 7566ce4457a0..923602599549 100644 --- a/drivers/mfd/tps65217.c +++ b/drivers/mfd/tps65217.c @@ -205,7 +205,7 @@ EXPORT_SYMBOL_GPL(tps65217_reg_read); /** * tps65217_reg_write: Write a single tps65217 register. * - * @tps65217: Device to write to. + * @tps: Device to write to. * @reg: Register to write to. * @val: Value to write. * @level: Password protected level @@ -250,7 +250,7 @@ EXPORT_SYMBOL_GPL(tps65217_reg_write); /** * tps65217_update_bits: Modify bits w.r.t mask, val and level. * - * @tps65217: Device to write to. + * @tps: Device to write to. * @reg: Register to read-write to. * @mask: Mask. * @val: Value to write. -- cgit From 20d60f850d2d8b6f9536c1a499fc6d6a5c724a55 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:17:01 +0100 Subject: mfd: ab3100-otp: Add missing colon(s) for all documented kerneldoc arguments Kerneldoc validation gets confused if syntax isn't "@.*: ". Adding the missing colons squashes the following W=1 warnings: drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'dev' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'locked' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'freq' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'paf' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'imeich' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'cid' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'tac' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'fac' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'svn' not described in 'ab3100_otp' drivers/mfd/ab3100-otp.c:60: warning: Function parameter or member 'debugfs' not described in 'ab3100_otp' Cc: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/ab3100-otp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/ab3100-otp.c b/drivers/mfd/ab3100-otp.c index c4751fb9bc22..c393102e3a39 100644 --- a/drivers/mfd/ab3100-otp.c +++ b/drivers/mfd/ab3100-otp.c @@ -29,22 +29,22 @@ /** * struct ab3100_otp - * @dev containing device - * @locked whether the OTP is locked, after locking, no more bits + * @dev: containing device + * @locked: whether the OTP is locked, after locking, no more bits * can be changed but before locking it is still possible * to change bits from 1->0. - * @freq clocking frequency for the OTP, this frequency is either + * @freq: clocking frequency for the OTP, this frequency is either * 32768Hz or 1MHz/30 - * @paf product activation flag, indicates whether this is a real + * @paf: product activation flag, indicates whether this is a real * product (paf true) or a lab board etc (paf false) - * @imeich if this is set it is possible to override the + * @imeich: if this is set it is possible to override the * IMEI number found in the tac, fac and svn fields with * (secured) software - * @cid customer ID - * @tac type allocation code of the IMEI - * @fac final assembly code of the IMEI - * @svn software version number of the IMEI - * @debugfs a debugfs file used when dumping to file + * @cid: customer ID + * @tac: type allocation code of the IMEI + * @fac: final assembly code of the IMEI + * @svn: software version number of the IMEI + * @debugfs: a debugfs file used when dumping to file */ struct ab3100_otp { struct device *dev; -- cgit From 2fbd583443905d6f8c6d87aebdef2717ae039f0b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:31:31 +0100 Subject: mfd: tps65010: Remove delcared and set, but never used variable 'status' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'status' hasn't been checked since 2008. It's probably safe to remove it. Fixes W=1 warning: drivers/mfd/tps65010.c:407:7: warning: variable ‘status’ set but not used [-Wunused-but-set-variable] 407 | int status; | ^~~~~~ Signed-off-by: Lee Jones --- drivers/mfd/tps65010.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c index 65fcc58c02da..7e7dbee58ca9 100644 --- a/drivers/mfd/tps65010.c +++ b/drivers/mfd/tps65010.c @@ -404,7 +404,6 @@ static void tps65010_work(struct work_struct *work) tps65010_interrupt(tps); if (test_and_clear_bit(FLAG_VBUS_CHANGED, &tps->flags)) { - int status; u8 chgconfig, tmp; chgconfig = i2c_smbus_read_byte_data(tps->client, @@ -415,8 +414,8 @@ static void tps65010_work(struct work_struct *work) else if (tps->vbus >= 100) chgconfig |= TPS_VBUS_CHARGING; - status = i2c_smbus_write_byte_data(tps->client, - TPS_CHGCONFIG, chgconfig); + i2c_smbus_write_byte_data(tps->client, + TPS_CHGCONFIG, chgconfig); /* vbus update fails unless VBUS is connected! */ tmp = i2c_smbus_read_byte_data(tps->client, TPS_CHGCONFIG); -- cgit From 3c719388f6ff505239f8285a423571236abbd3c2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:36:23 +0100 Subject: mfd: si476x-cmd: Repair wrongly described function argument 's/response/resp' si476x_core_send_command()'s 5th argument has never been called response. This change must have occurred prior to the driver being Mainlined. We're also taking the opportunity to bring the first description back into line, making my OCD happy! This fixes the following W=1 warning(s): drivers/mfd/si476x-cmd.c:264: warning: Function parameter or member 'resp' not described in 'si476x_core_send_command drivers/mfd/si476x-cmd.c:264: warning: Excess function parameter 'response' description in 'si476x_core_send_command' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c index 4a09ce9609c9..316d44c2edcd 100644 --- a/drivers/mfd/si476x-cmd.c +++ b/drivers/mfd/si476x-cmd.c @@ -241,13 +241,13 @@ static int si476x_core_parse_and_nag_about_error(struct si476x_core *core) /** * si476x_core_send_command() - sends a command to si476x and waits its * response - * @core: si476x_device structure for the device we are + * @core: si476x_device structure for the device we are * communicating with * @command: command id * @args: command arguments we are sending * @argn: actual size of @args - * @response: buffer to place the expected response from the device - * @respn: actual size of @response + * @resp: buffer to place the expected response from the device + * @respn: actual size of @resp * @usecs: amount of time to wait before reading the response (in * usecs) * -- cgit From 9745ef7dcf86bbad277c4a6575393a4eaabca1b4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 13:43:25 +0100 Subject: mfd: si476x-cmd: Add missing colon(s) for all documented kerneldoc arguments Kerneldoc validation gets confused if syntax isn't "@.*: ". Adding the missing colons squashes the following W=1 warnings: drivers/mfd/si476x-cmd.c:525: warning: Function parameter or member 'dout' not described in 'si476x_core_cmd_dig_audio_pin_c drivers/mfd/si476x-cmd.c:525: warning: Function parameter or member 'xout' not described in 'si476x_core_cmd_dig_audio_pin_c drivers/mfd/si476x-cmd.c:574: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_zif_pin_cfg' drivers/mfd/si476x-cmd.c:574: warning: Function parameter or member 'iqclk' not described in 'si476x_core_cmd_zif_pin_cfg' drivers/mfd/si476x-cmd.c:574: warning: Function parameter or member 'iqfs' not described in 'si476x_core_cmd_zif_pin_cfg' drivers/mfd/si476x-cmd.c:574: warning: Function parameter or member 'iout' not described in 'si476x_core_cmd_zif_pin_cfg' drivers/mfd/si476x-cmd.c:574: warning: Function parameter or member 'qout' not described in 'si476x_core_cmd_zif_pin_cfg' drivers/mfd/si476x-i2c.c:550: warning: Function parameter or member 'func' not described in 'si476x_core_fwver_to_revision' drivers/mfd/si476x-cmd.c:631: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_ic_link_gpo_ctl drivers/mfd/si476x-cmd.c:631: warning: Function parameter or member 'icin' not described in 'si476x_core_cmd_ic_link_gpo_ctl drivers/mfd/si476x-cmd.c:631: warning: Function parameter or member 'icip' not described in 'si476x_core_cmd_ic_link_gpo_ctl drivers/mfd/si476x-cmd.c:631: warning: Function parameter or member 'icon' not described in 'si476x_core_cmd_ic_link_gpo_ctl drivers/mfd/si476x-cmd.c:631: warning: Function parameter or member 'icop' not described in 'si476x_core_cmd_ic_link_gpo_ctl drivers/mfd/si476x-cmd.c:662: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_ana_audio_pin_c drivers/mfd/si476x-cmd.c:662: warning: Function parameter or member 'lrout' not described in 'si476x_core_cmd_ana_audio_pin_ drivers/mfd/si476x-cmd.c:697: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_intb_pin_cfg_a1 drivers/mfd/si476x-cmd.c:697: warning: Function parameter or member 'intb' not described in 'si476x_core_cmd_intb_pin_cfg_a1 drivers/mfd/si476x-cmd.c:697: warning: Function parameter or member 'a1' not described in 'si476x_core_cmd_intb_pin_cfg_a10' drivers/mfd/si476x-cmd.c:746: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_am_rsq_status' drivers/mfd/si476x-cmd.c:746: warning: Function parameter or member 'rsqargs' not described in 'si476x_core_cmd_am_rsq_statu drivers/mfd/si476x-cmd.c:746: warning: Function parameter or member 'report' not described in 'si476x_core_cmd_am_rsq_status drivers/mfd/si476x-cmd.c:878: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_fm_seek_start' drivers/mfd/si476x-cmd.c:878: warning: Function parameter or member 'seekup' not described in 'si476x_core_cmd_fm_seek_start drivers/mfd/si476x-cmd.c:878: warning: Function parameter or member 'wrap' not described in 'si476x_core_cmd_fm_seek_start' drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_fm_rds_status' drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'status_only' not described in 'si476x_core_cmd_fm_rds_s drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'mtfifo' not described in 'si476x_core_cmd_fm_rds_status drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'intack' not described in 'si476x_core_cmd_fm_rds_status drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'report' not described in 'si476x_core_cmd_fm_rds_status drivers/mfd/si476x-cmd.c:1052: warning: Function parameter or member 'core' not described in 'si476x_core_cmd_am_seek_start' drivers/mfd/si476x-cmd.c:1052: warning: Function parameter or member 'seekup' not described in 'si476x_core_cmd_am_seek_star drivers/mfd/si476x-cmd.c:1052: warning: Function parameter or member 'wrap' not described in 'si476x_core_cmd_am_seek_start' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-cmd.c | 66 ++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c index 316d44c2edcd..5b42dfab431a 100644 --- a/drivers/mfd/si476x-cmd.c +++ b/drivers/mfd/si476x-cmd.c @@ -496,7 +496,7 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_get_property); * enable 1MOhm pulldown * SI476X_DFS_DAUDIO - set the pin to be a part of digital * audio interface - * @dout - DOUT pin function configuration: + * @dout: - DOUT pin function configuration: * SI476X_DOUT_NOOP - do not modify the behaviour * SI476X_DOUT_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_get_property); * port 1 * SI476X_DOUT_I2S_INPUT - set this pin to be digital in on I2S * port 1 - * @xout - XOUT pin function configuration: + * @xout: - XOUT pin function configuration: * SI476X_XOUT_NOOP - do not modify the behaviour * SI476X_XOUT_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -540,25 +540,25 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_dig_audio_pin_cfg); /** * si476x_cmd_zif_pin_cfg - send 'ZIF_PIN_CFG_COMMAND' - * @core - device to send the command to - * @iqclk - IQCL pin function configuration: + * @core: - device to send the command to + * @iqclk: - IQCL pin function configuration: * SI476X_IQCLK_NOOP - do not modify the behaviour * SI476X_IQCLK_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_IQCLK_IQ - set pin to be a part of I/Q interace * in master mode - * @iqfs - IQFS pin function configuration: + * @iqfs: - IQFS pin function configuration: * SI476X_IQFS_NOOP - do not modify the behaviour * SI476X_IQFS_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_IQFS_IQ - set pin to be a part of I/Q interace * in master mode - * @iout - IOUT pin function configuration: + * @iout: - IOUT pin function configuration: * SI476X_IOUT_NOOP - do not modify the behaviour * SI476X_IOUT_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_IOUT_OUTPUT - set pin to be I out - * @qout - QOUT pin function configuration: + * @qout: - QOUT pin function configuration: * SI476X_QOUT_NOOP - do not modify the behaviour * SI476X_QOUT_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -590,29 +590,29 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_zif_pin_cfg); /** * si476x_cmd_ic_link_gpo_ctl_pin_cfg - send * 'IC_LINK_GPIO_CTL_PIN_CFG' comand to the device - * @core - device to send the command to - * @icin - ICIN pin function configuration: + * @core: - device to send the command to + * @icin: - ICIN pin function configuration: * SI476X_ICIN_NOOP - do not modify the behaviour * SI476X_ICIN_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_ICIN_GPO1_HIGH - set pin to be an output, drive it high * SI476X_ICIN_GPO1_LOW - set pin to be an output, drive it low * SI476X_ICIN_IC_LINK - set the pin to be a part of Inter-Chip link - * @icip - ICIP pin function configuration: + * @icip: - ICIP pin function configuration: * SI476X_ICIP_NOOP - do not modify the behaviour * SI476X_ICIP_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_ICIP_GPO1_HIGH - set pin to be an output, drive it high * SI476X_ICIP_GPO1_LOW - set pin to be an output, drive it low * SI476X_ICIP_IC_LINK - set the pin to be a part of Inter-Chip link - * @icon - ICON pin function configuration: + * @icon: - ICON pin function configuration: * SI476X_ICON_NOOP - do not modify the behaviour * SI476X_ICON_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_ICON_I2S - set the pin to be a part of audio * interface in slave mode (DCLK) * SI476X_ICON_IC_LINK - set the pin to be a part of Inter-Chip link - * @icop - ICOP pin function configuration: + * @icop: - ICOP pin function configuration: * SI476X_ICOP_NOOP - do not modify the behaviour * SI476X_ICOP_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -647,8 +647,8 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_ic_link_gpo_ctl_pin_cfg); /** * si476x_cmd_ana_audio_pin_cfg - send 'ANA_AUDIO_PIN_CFG' to the * device - * @core - device to send the command to - * @lrout - LROUT pin function configuration: + * @core: - device to send the command to + * @lrout: - LROUT pin function configuration: * SI476X_LROUT_NOOP - do not modify the behaviour * SI476X_LROUT_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -675,15 +675,15 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_ana_audio_pin_cfg); /** * si476x_cmd_intb_pin_cfg - send 'INTB_PIN_CFG' command to the device - * @core - device to send the command to - * @intb - INTB pin function configuration: + * @core: - device to send the command to + * @intb: - INTB pin function configuration: * SI476X_INTB_NOOP - do not modify the behaviour * SI476X_INTB_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown * SI476X_INTB_DAUDIO - set pin to be a part of digital * audio interface in slave mode * SI476X_INTB_IRQ - set pin to be an interrupt request line - * @a1 - A1 pin function configuration: + * @a1: - A1 pin function configuration: * SI476X_A1_NOOP - do not modify the behaviour * SI476X_A1_TRISTATE - put the pin in tristate condition, * enable 1MOhm pulldown @@ -728,14 +728,14 @@ static int si476x_core_cmd_intb_pin_cfg_a20(struct si476x_core *core, /** * si476x_cmd_am_rsq_status - send 'AM_RSQ_STATUS' command to the * device - * @core - device to send the command to - * @rsqack - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT, + * @core: - device to send the command to + * @rsqack: - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT, * RSSSILINT, BLENDINT, MULTHINT and MULTLINT - * @attune - when set the values in the status report are the values + * @attune: - when set the values in the status report are the values * that were calculated at tune - * @cancel - abort ongoing seek/tune opertation - * @stcack - clear the STCINT bin in status register - * @report - all signal quality information retured by the command + * @cancel: - abort ongoing seek/tune opertation + * @stcack: - clear the STCINT bin in status register + * @report: - all signal quality information retured by the command * (if NULL then the output of the command is ignored) * * Function returns 0 on success and negative error code on failure @@ -862,9 +862,9 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_am_acf_status); /** * si476x_cmd_fm_seek_start - send 'FM_SEEK_START' command to the * device - * @core - device to send the command to - * @seekup - if set the direction of the search is 'up' - * @wrap - if set seek wraps when hitting band limit + * @core: - device to send the command to + * @seekup: - if set the direction of the search is 'up' + * @wrap: - if set seek wraps when hitting band limit * * This function begins search for a valid station. The station is * considered valid when 'FM_VALID_SNR_THRESHOLD' and @@ -890,12 +890,12 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_seek_start); /** * si476x_cmd_fm_rds_status - send 'FM_RDS_STATUS' command to the * device - * @core - device to send the command to - * @status_only - if set the data is not removed from RDSFIFO, + * @core: - device to send the command to + * @status_only: - if set the data is not removed from RDSFIFO, * RDSFIFOUSED is not decremented and data in all the * rest RDS data contains the last valid info received - * @mtfifo if set the command clears RDS receive FIFO - * @intack if set the command clards the RDSINT bit. + * @mtfifo: if set the command clears RDS receive FIFO + * @intack: if set the command clards the RDSINT bit. * * Function returns 0 on success and negative error code on failure */ @@ -1036,9 +1036,9 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_div_status); /** * si476x_cmd_am_seek_start - send 'FM_SEEK_START' command to the * device - * @core - device to send the command to - * @seekup - if set the direction of the search is 'up' - * @wrap - if set seek wraps when hitting band limit + * @core: - device to send the command to + * @seekup: - if set the direction of the search is 'up' + * @wrap: - if set seek wraps when hitting band limit * * This function begins search for a valid station. The station is * considered valid when 'FM_VALID_SNR_THRESHOLD' and -- cgit From c9b55f99fc679b3c7c9c1677b2fd2c86d6ac5e03 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 14:32:06 +0100 Subject: mfd: si476x-i2c: Add description for si476x_core_fwver_to_revision()'s arg 'func' Kerneldoc syntax is used, but not complete. Descriptions are required for all arguments. Fixes the following W=1 build warning: drivers/mfd/si476x-i2c.c:550: warning: Function parameter or member 'func' not described in 'si476x_core_fwver_to_revision' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-i2c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c index c8d28b844def..517d49bcd667 100644 --- a/drivers/mfd/si476x-i2c.c +++ b/drivers/mfd/si476x-i2c.c @@ -534,6 +534,11 @@ static irqreturn_t si476x_core_interrupt(int irq, void *dev) /** * si476x_firmware_version_to_revision() * @core: Core device structure + * @func: Selects the boot function of the device: + * *_BOOTLOADER - Boot loader + * *_FM_RECEIVER - FM receiver + * *_AM_RECEIVER - AM receiver + * *_WB_RECEIVER - Weatherband receiver * @major: Firmware major number * @minor1: Firmware first minor number * @minor2: Firmware second minor number -- cgit From b1ded80a61f3b7ee5337318f0dc1bada280f0605 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 14:32:33 +0100 Subject: mfd: si476x-i2c: Fix spelling mistake in case() statement's FALLTHROUGH comment 's/FALLTHROUG/FALLTHROUGH' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c index 517d49bcd667..c1d7b845244e 100644 --- a/drivers/mfd/si476x-i2c.c +++ b/drivers/mfd/si476x-i2c.c @@ -588,7 +588,7 @@ static int si476x_core_fwver_to_revision(struct si476x_core *core, goto unknown_revision; } case SI476X_FUNC_BOOTLOADER: - default: /* FALLTHROUG */ + default: /* FALLTHROUGH */ BUG(); return -1; } -- cgit From 748160e7718d1fb3c90c0df86c4b07e4b5e92621 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 14:26:16 +0100 Subject: mfd: si476x-cmd: Update si476x_cmd_am_rsq_status()'s kerneldoc 4 of the old arguments were grouped and moved into a struct which is now passed as a pointer instead of the arguments themselves. However, whoever carried out this work forgot to update the function's kerneldoc header. Fixes the following W=1 warnings: drivers/mfd/si476x-cmd.c:746: warning: Function parameter or member 'rsqargs' not described in 'si476x_core_cmd_am_rsq_s drivers/mfd/si476x-cmd.c:746: warning: Excess function parameter 'rsqack' description in 'si476x_core_cmd_am_rsq_status' drivers/mfd/si476x-cmd.c:746: warning: Excess function parameter 'attune' description in 'si476x_core_cmd_am_rsq_status' drivers/mfd/si476x-cmd.c:746: warning: Excess function parameter 'cancel' description in 'si476x_core_cmd_am_rsq_status' drivers/mfd/si476x-cmd.c:746: warning: Excess function parameter 'stcack' description in 'si476x_core_cmd_am_rsq_status' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-cmd.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c index 5b42dfab431a..0a40c5cb751d 100644 --- a/drivers/mfd/si476x-cmd.c +++ b/drivers/mfd/si476x-cmd.c @@ -729,12 +729,8 @@ static int si476x_core_cmd_intb_pin_cfg_a20(struct si476x_core *core, * si476x_cmd_am_rsq_status - send 'AM_RSQ_STATUS' command to the * device * @core: - device to send the command to - * @rsqack: - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT, - * RSSSILINT, BLENDINT, MULTHINT and MULTLINT - * @attune: - when set the values in the status report are the values - * that were calculated at tune - * @cancel: - abort ongoing seek/tune opertation - * @stcack: - clear the STCINT bin in status register + * @rsqargs: - pointer to a structure containing a group of sub-args + * relevant to sending the RSQ status command * @report: - all signal quality information retured by the command * (if NULL then the output of the command is ignored) * -- cgit From 981b1261bfdf7cb3e3c2029ecc4a1510d46eff9e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 14:29:18 +0100 Subject: mfd: si476x-cmd: Add missing documentation for si476x_cmd_fm_rds_status()'s arg 'report' Kerneldoc syntax is used, but not complete. Descriptions are required for all arguments. Fixes the following W=1 build warning: drivers/mfd/si476x-cmd.c:907: warning: Function parameter or member 'report' not described in 'si476x_core_cmd_fm_rds_status' Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/si476x-cmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c index 0a40c5cb751d..d15b3e783369 100644 --- a/drivers/mfd/si476x-cmd.c +++ b/drivers/mfd/si476x-cmd.c @@ -892,6 +892,8 @@ EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_seek_start); * rest RDS data contains the last valid info received * @mtfifo: if set the command clears RDS receive FIFO * @intack: if set the command clards the RDSINT bit. + * @report: - all signal quality information retured by the command + * (if NULL then the output of the command is ignored) * * Function returns 0 on success and negative error code on failure */ -- cgit From 768c1e38dc43b1fc43f73409f58c58344f243892 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 24 Jun 2020 14:52:27 +0100 Subject: mfd: rave-sp: Fix mistake in 'struct rave_sp_deframer's kerneldoc Argument 'received' was incorrectly named by its struct type 'completion' instead of its name. Fixes the following W=1 warning: drivers/mfd/rave-sp.c:107: warning: Function parameter or member 'received' not described in 'rave_sp_reply' Cc: Andrey Vostrikov Cc: Nikita Yushchenko Cc: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/mfd/rave-sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/rave-sp.c b/drivers/mfd/rave-sp.c index 26c7b63e008a..abaab541df19 100644 --- a/drivers/mfd/rave-sp.c +++ b/drivers/mfd/rave-sp.c @@ -96,7 +96,7 @@ struct rave_sp_deframer { * @data: Buffer to store reply payload in * @code: Expected reply code * @ackid: Expected reply ACK ID - * @completion: Successful reply reception completion + * @received: Successful reply reception completion */ struct rave_sp_reply { size_t length; -- cgit From ec46855df33973ac6ed646e26cc8977b97cc7cd0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 25 Jun 2020 09:39:11 +0100 Subject: mfd: sprd-sc27xx-spi: Fix-up bogus IRQ register offset and mask setting 'i / pdata->num_irqs' always equates to 0 and 'BIT(i % pdata->num_irqs)' always ends up being BIT(i) here, so make that clearer in the code. If the code base needs to support more than 32 IRQs in the future, this will have to be reworked, but lets just keep it simple for as long as we can. This fixes the following W=1 warning: drivers/mfd/sprd-sc27xx-spi.c:255 sprd_pmic_probe() debug: sval_binop_unsigned: divide by zero Cc: Orson Zhai Cc: Chunyan Zhang Suggested-by: Baolin Wang Signed-off-by: Lee Jones Reviewed-by: Baolin Wang Signed-off-by: Lee Jones --- drivers/mfd/sprd-sc27xx-spi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index d3486c2a1b33..f8a8b918c60d 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -185,10 +185,8 @@ static int sprd_pmic_probe(struct spi_device *spi) return -ENOMEM; ddata->irq_chip.irqs = ddata->irqs; - for (i = 0; i < pdata->num_irqs; i++) { - ddata->irqs[i].reg_offset = i / pdata->num_irqs; - ddata->irqs[i].mask = BIT(i % pdata->num_irqs); - } + for (i = 0; i < pdata->num_irqs; i++) + ddata->irqs[i].mask = BIT(i); ret = devm_regmap_add_irq_chip(&spi->dev, ddata->regmap, ddata->irq, IRQF_ONESHOT | IRQF_NO_SUSPEND, 0, -- cgit From 21b2998d4aaeee0c54b6866aba591d8a91213d99 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 25 Jun 2020 10:03:55 +0100 Subject: mfd: axp20x-i2c: Do not define 'struct acpi_device_id' when !CONFIG_ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ACPI_PTR() is used to NULLify the value when !CONFIG_ACPI, struct axp20x_i2c_acpi_match becomes defined by unused. This squashes the current W=1 kernel builds warning: drivers/mfd/axp20x-i2c.c:82:36: warning: ‘axp20x_i2c_acpi_match’ defined but not used [-Wunused-const-variable=] Cc: Chen-Yu Tsai Cc: Carlo Caione Signed-off-by: Lee Jones --- drivers/mfd/axp20x-i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/axp20x-i2c.c b/drivers/mfd/axp20x-i2c.c index 14f9df74f855..3c930316d48b 100644 --- a/drivers/mfd/axp20x-i2c.c +++ b/drivers/mfd/axp20x-i2c.c @@ -79,6 +79,7 @@ static const struct i2c_device_id axp20x_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, axp20x_i2c_id); +#ifdef CONFIG_ACPI static const struct acpi_device_id axp20x_i2c_acpi_match[] = { { .id = "INT33F4", @@ -87,6 +88,7 @@ static const struct acpi_device_id axp20x_i2c_acpi_match[] = { { }, }; MODULE_DEVICE_TABLE(acpi, axp20x_i2c_acpi_match); +#endif static struct i2c_driver axp20x_i2c_driver = { .driver = { -- cgit From a0e50aa3f4a8a5c9fb791b8f32d624e2a1a735bd Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Jan 2019 21:09:05 +0100 Subject: KVM: arm64: Factor out stage 2 page table data from struct kvm As we are about to reuse our stage 2 page table manipulation code for shadow stage 2 page tables in the context of nested virtualization, we are going to manage multiple stage 2 page tables for a single VM. This requires some pretty invasive changes to our data structures, which moves the vmid and pgd pointers into a separate structure and change pretty much all of our mmu code to operate on this structure instead. The new structure is called struct kvm_s2_mmu. There is no intended functional change by this patch alone. Reviewed-by: James Morse Reviewed-by: Alexandru Elisei [Designed data structure layout in collaboration] Signed-off-by: Christoffer Dall Co-developed-by: Marc Zyngier [maz: Moved the last_vcpu_ran down to the S2 MMU structure as well] Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 7 +- arch/arm64/include/asm/kvm_host.h | 32 +++- arch/arm64/include/asm/kvm_mmu.h | 16 +- arch/arm64/kvm/arm.c | 36 ++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 33 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 26 +-- arch/arm64/kvm/mmu.c | 280 ++++++++++++++++++-------------- 10 files changed, 240 insertions(+), 198 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5a91aaae78d2..18d39b30d9c9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -77,6 +77,7 @@ struct kvm; struct kvm_vcpu; +struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); @@ -90,9 +91,9 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #endif extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e0920df1d0c1..85a529eeeae3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -66,19 +66,34 @@ struct kvm_vmid { u32 vmid; }; -struct kvm_arch { +struct kvm_s2_mmu { struct kvm_vmid vmid; - /* stage2 entry level table */ - pgd_t *pgd; - phys_addr_t pgd_phys; - - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * stage2 entry level table + * + * Two kvm_s2_mmu structures in the same VM can point to the same + * pgd here. This happens when running a guest using a + * translation regime that isn't affected by its own stage-2 + * translation, such as a non-VHE hypervisor running at vEL2, or + * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the + * canonical stage-2 page tables. + */ + pgd_t *pgd; + phys_addr_t pgd_phys; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; + struct kvm *kvm; +}; + +struct kvm_arch { + struct kvm_s2_mmu mmu; + + /* VTCR_EL2 value for this VM */ + u64 vtcr; + /* The maximum number of vCPUs depends on the used GIC model */ int max_vcpus; @@ -254,6 +269,9 @@ struct kvm_vcpu_arch { void *sve_state; unsigned int sve_max_vl; + /* Stage 2 paging state used by the hardware on next switch */ + struct kvm_s2_mmu *hw_mmu; + /* HYP configuration */ u64 hcr_el2; u32 mdcr_el2; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b12bfc1f051a..22157ded04ca 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -134,8 +134,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_alloc_stage2_pgd(struct kvm *kvm); -void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); @@ -577,13 +577,13 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) +static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { - struct kvm_vmid *vmid = &kvm->arch.vmid; + struct kvm_vmid *vmid = &mmu->vmid; u64 vmid_field, baddr; u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; - baddr = kvm->arch.pgd_phys; + baddr = mmu->pgd_phys; vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } @@ -592,10 +592,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_guest_stage2(struct kvm *kvm) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) { - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* * ARM errata 1165522 and 1530923 require the actual execution of the diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0bf2cf5614c6..beb0e68cccaa 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void) */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - int ret, cpu; + int ret; ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; - kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); - if (!kvm->arch.last_vcpu_ran) - return -ENOMEM; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; - - ret = kvm_alloc_stage2_pgd(kvm); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); if (ret) - goto out_fail_alloc; + return ret; ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) @@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); - /* Mark the initial VMID generation invalid */ - kvm->arch.vmid.vmid_gen = 0; - /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); return ret; out_free_stage2_pgd: - kvm_free_stage2_pgd(kvm); -out_fail_alloc: - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; + kvm_free_stage2_pgd(&kvm->arch.mmu); return ret; } @@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_vcpu_destroy(kvm->vcpus[i]); @@ -279,6 +263,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_arm_pvtime_vcpu_init(&vcpu->arch); + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + err = kvm_vgic_vcpu_init(vcpu); if (err) return err; @@ -334,16 +320,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvm_s2_mmu *mmu; int *last_ran; - last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + mmu = vcpu->arch.hw_mmu; + last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); *last_ran = vcpu->vcpu_id; } @@ -680,7 +668,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ cond_resched(); - update_vmid(&vcpu->kvm->arch.vmid); + update_vmid(&vcpu->arch.hw_mmu->vmid); check_vcpu_requests(vcpu); @@ -729,7 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || + if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8f622688fa64..5c03441b5b6c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -122,9 +122,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline void __activate_vm(struct kvm *kvm) +static inline void __activate_vm(struct kvm_s2_mmu *mmu) { - __load_guest_stage2(kvm); + __load_guest_stage2(mmu); } static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a1dcf59bd45e..37321b2157d9 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -194,7 +194,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg32_restore_state(vcpu); __sysreg_restore_state_nvhe(guest_ctxt); - __activate_vm(kern_hyp_va(vcpu->kvm)); + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d4475f8340c4..11dbe031f0ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -12,7 +12,8 @@ struct tlb_inv_context { u64 tcr; }; -static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) { if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -30,12 +31,10 @@ static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) isb(); } - /* __load_guest_stage2() includes an ISB for the workaround. */ - __load_guest_stage2(kvm); - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); + __load_guest_stage2(mmu); } -static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) { write_sysreg(0, vttbr_el2); @@ -47,15 +46,15 @@ static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) } } -void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) { struct tlb_inv_context cxt; dsb(ishst); /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -98,39 +97,39 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (icache_is_vpipt()) __flush_icache_all(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_vmid(struct kvm *kvm) +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; dsb(ishst); /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) { - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } void __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c0d33deba77e..c52d714e0d75 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -125,7 +125,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * stage 2 translation, and __activate_traps clear HCR_EL2.TGE * (among other things). */ - __activate_vm(vcpu->kvm); + __activate_vm(vcpu->arch.hw_mmu); __activate_traps(vcpu); sysreg_restore_guest_state_vhe(guest_ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b275101e9c9c..ada1d56f0709 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -16,7 +16,8 @@ struct tlb_inv_context { u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) { u64 val; @@ -52,14 +53,14 @@ static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt) * place before clearing TGE. __load_guest_stage2() already * has an ISB in order to deal with this. */ - __load_guest_stage2(kvm); + __load_guest_stage2(mmu); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); isb(); } -static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) { /* * We're done with the TLB operation, let's restore the host's @@ -78,14 +79,14 @@ static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt) local_irq_restore(cxt->flags); } -void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) { struct tlb_inv_context cxt; dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); + __tlb_switch_to_guest(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -106,38 +107,37 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_vmid(struct kvm *kvm) +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); + __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) { - struct kvm *kvm = vcpu->kvm; struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); + __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host(kvm, &cxt); + __tlb_switch_to_host(&cxt); } void __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8c0035cab6b6..2c6d59b509a5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -55,12 +55,12 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ void kvm_flush_remote_tlbs(struct kvm *kvm) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa); } /* @@ -90,37 +90,39 @@ static bool kvm_is_device_pfn(unsigned long pfn) /** * stage2_dissolve_pmd() - clear and flush huge PMD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pmd: pmd pointer for IPA * * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd) { if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); put_page(virt_to_page(pmd)); } /** * stage2_dissolve_pud() - clear and flush huge PUD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pud: pud pointer for IPA * * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp) { + struct kvm *kvm = mmu->kvm; + if (!stage2_pud_huge(kvm, *pudp)) return; stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); put_page(virt_to_page(pudp)); } @@ -156,40 +158,44 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); stage2_pgd_clear(kvm, pgd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); stage2_p4d_free(kvm, p4d_table); put_page(virt_to_page(pgd)); } -static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr) +static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); stage2_p4d_clear(kvm, p4d); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); stage2_pud_free(kvm, pud_table); put_page(virt_to_page(p4d)); } -static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } -static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } @@ -255,7 +261,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp) * we then fully enforce cacheability of RAM, no matter what the guest * does. */ -static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -267,7 +273,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); /* No need to invalidate the cache for device mappings */ if (!kvm_is_device_pfn(pte_pfn(old_pte))) @@ -277,13 +283,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(kvm, start_pte)) - clear_stage2_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(mmu->kvm, start_pte)) + clear_stage2_pmd_entry(mmu, pmd, start_addr); } -static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; @@ -295,24 +302,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, pmd_t old_pmd = *pmd; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); kvm_flush_dcache_pmd(old_pmd); put_page(virt_to_page(pmd)); } else { - unmap_stage2_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(mmu, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); if (stage2_pmd_table_empty(kvm, start_pmd)) - clear_stage2_pud_entry(kvm, pud, start_addr); + clear_stage2_pud_entry(mmu, pud, start_addr); } -static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, +static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; @@ -324,22 +332,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, pud_t old_pud = *pud; stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { - unmap_stage2_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); if (stage2_pud_table_empty(kvm, start_pud)) - clear_stage2_p4d_entry(kvm, p4d, start_addr); + clear_stage2_p4d_entry(mmu, p4d, start_addr); } -static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; p4d_t *p4d, *start_p4d; @@ -347,11 +356,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - unmap_stage2_puds(kvm, p4d, addr, next); + unmap_stage2_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); if (stage2_p4d_table_empty(kvm, start_p4d)) - clear_stage2_pgd_entry(kvm, pgd, start_addr); + clear_stage2_pgd_entry(mmu, pgd, start_addr); } /** @@ -365,8 +374,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; @@ -374,18 +384,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread * could have possibly freed the page table, while we released * the lock. */ - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - unmap_stage2_p4ds(kvm, pgd, addr, next); + unmap_stage2_p4ds(mmu, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. @@ -395,7 +405,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } -static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, +static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { pte_t *pte; @@ -407,9 +417,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -420,14 +431,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else - stage2_flush_ptes(kvm, pmd, addr, next); + stage2_flush_ptes(mmu, pmd, addr, next); } } while (pmd++, addr = next, addr != end); } -static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -438,14 +450,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else - stage2_flush_pmds(kvm, pud, addr, next); + stage2_flush_pmds(mmu, pud, addr, next); } } while (pud++, addr = next, addr != end); } -static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -453,23 +466,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_flush_puds(kvm, p4d, addr, next); + stage2_flush_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - stage2_flush_p4ds(kvm, pgd, addr, next); + stage2_flush_p4ds(mmu, pgd, addr, next); if (next != end) cond_resched_lock(&kvm->mmu_lock); @@ -996,21 +1010,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, } /** - * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. - * @kvm: The KVM struct pointer for the VM. + * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure + * @kvm: The pointer to the KVM structure + * @mmu: The pointer to the s2 MMU structure * * Allocates only the stage-2 HW PGD level table(s) of size defined by - * stage2_pgd_size(kvm). + * stage2_pgd_size(mmu->kvm). * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_alloc_stage2_pgd(struct kvm *kvm) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) { phys_addr_t pgd_phys; pgd_t *pgd; + int cpu; - if (kvm->arch.pgd != NULL) { + if (mmu->pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } @@ -1024,8 +1040,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) return -EINVAL; - kvm->arch.pgd = pgd; - kvm->arch.pgd_phys = pgd_phys; + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); + if (!mmu->last_vcpu_ran) { + free_pages_exact(pgd, stage2_pgd_size(kvm)); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) + *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; + + mmu->kvm = kvm; + mmu->pgd = pgd; + mmu->pgd_phys = pgd_phys; + mmu->vmid.vmid_gen = 0; + return 0; } @@ -1064,7 +1092,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(kvm, gpa, vm_end - vm_start); + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); } hva = vm_end; } while (hva < reg_end); @@ -1096,39 +1124,34 @@ void stage2_unmap_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } -/** - * kvm_free_stage2_pgd - free all stage-2 tables - * @kvm: The KVM struct pointer for the VM. - * - * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all - * underlying level-2 and level-3 tables before freeing the actual level-1 table - * and setting the struct pointer to NULL. - */ -void kvm_free_stage2_pgd(struct kvm *kvm) +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { + struct kvm *kvm = mmu->kvm; void *pgd = NULL; spin_lock(&kvm->mmu_lock); - if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); - pgd = READ_ONCE(kvm->arch.pgd); - kvm->arch.pgd = NULL; - kvm->arch.pgd_phys = 0; + if (mmu->pgd) { + unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(mmu->pgd); + mmu->pgd = NULL; } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - if (pgd) + if (pgd) { free_pages_exact(pgd, stage2_pgd_size(kvm)); + free_percpu(mmu->last_vcpu_ran); + } } -static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; p4d_t *p4d; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; @@ -1140,13 +1163,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_p4d_offset(kvm, pgd, addr); } -static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; pud_t *pud; - p4d = stage2_get_p4d(kvm, cache, addr); + p4d = stage2_get_p4d(mmu, cache, addr); if (stage2_p4d_none(kvm, *p4d)) { if (!cache) return NULL; @@ -1158,13 +1182,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pud_offset(kvm, p4d, addr); } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud || stage2_pud_huge(kvm, *pud)) return NULL; @@ -1179,13 +1204,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pmd_offset(kvm, pud, addr); } -static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache - *cache, phys_addr_t addr, const pmd_t *new_pmd) +static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; retry: - pmd = stage2_get_pmd(kvm, cache, addr); + pmd = stage2_get_pmd(mmu, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; @@ -1218,7 +1244,7 @@ retry: * get handled accordingly. */ if (!pmd_thp_or_huge(old_pmd)) { - unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE); goto retry; } /* @@ -1234,7 +1260,7 @@ retry: */ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); } else { get_page(virt_to_page(pmd)); } @@ -1243,13 +1269,15 @@ retry: return 0; } -static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pud_t *new_pudp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp, old_pud; retry: - pudp = stage2_get_pud(kvm, cache, addr); + pudp = stage2_get_pud(mmu, cache, addr); VM_BUG_ON(!pudp); old_pud = *pudp; @@ -1268,13 +1296,13 @@ retry: * the range for this block and retry. */ if (!stage2_pud_huge(kvm, old_pud)) { - unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE); goto retry; } WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); } else { get_page(virt_to_page(pudp)); } @@ -1289,9 +1317,10 @@ retry: * leaf-entry is returned in the appropriate level variable - pudpp, * pmdpp, ptepp. */ -static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, +static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -1300,7 +1329,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, *pmdpp = NULL; *ptepp = NULL; - pudp = stage2_get_pud(kvm, NULL, addr); + pudp = stage2_get_pud(mmu, NULL, addr); if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) return false; @@ -1326,14 +1355,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, return true; } -static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) +static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr) { pud_t *pudp; pmd_t *pmdp; pte_t *ptep; bool found; - found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep); if (!found) return false; @@ -1345,10 +1374,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) return kvm_s2pte_exec(ptep); } -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pte(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; @@ -1358,7 +1389,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud) { /* * Ignore calls from kvm_set_spte_hva for unallocated @@ -1372,7 +1403,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * on to allocate page. */ if (logging_active) - stage2_dissolve_pud(kvm, addr, pud); + stage2_dissolve_pud(mmu, addr, pud); if (stage2_pud_none(kvm, *pud)) { if (!cache) @@ -1396,7 +1427,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * allocate page. */ if (logging_active) - stage2_dissolve_pmd(kvm, addr, pmd); + stage2_dissolve_pmd(mmu, addr, pmd); /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { @@ -1420,7 +1451,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr); } else { get_page(virt_to_page(pte)); } @@ -1486,8 +1517,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, &cache, addr, &pte, - KVM_S2PTE_FLAG_IS_IOMAP); + ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -1526,9 +1557,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -1549,13 +1581,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, /** * stage2_wp_puds - write protect P4D range - * @pgd: pointer to pgd entry + * @p4d: pointer to p4d entry * @addr: range start address * @end: range end address */ -static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -1567,7 +1600,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, if (!kvm_s2pud_readonly(pud)) kvm_set_s2pud_readonly(pud); } else { - stage2_wp_pmds(kvm, pud, addr, next); + stage2_wp_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); @@ -1579,9 +1612,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, * @addr: range start address * @end: range end address */ -static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -1589,7 +1623,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_wp_puds(kvm, p4d, addr, next); + stage2_wp_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } @@ -1599,12 +1633,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, * @addr: Start address of range * @end: End address of range */ -static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1616,11 +1651,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * the lock. */ cond_resched_lock(&kvm->mmu_lock); - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (stage2_pgd_present(kvm, *pgd)) - stage2_wp_p4ds(kvm, pgd, addr, next); + stage2_wp_p4ds(mmu, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1650,7 +1685,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1674,7 +1709,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); } /* @@ -1837,6 +1872,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); unsigned long vma_pagesize, flags = 0; + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_iabt(vcpu); @@ -1958,7 +1994,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * execute permissions, and we preserve whatever we have. */ needs_exec = exec_fault || - (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa)); + (fault_status == FSC_PERM && stage2_is_exec(mmu, fault_ipa)); if (vma_pagesize == PUD_SIZE) { pud_t new_pud = kvm_pfn_pud(pfn, mem_type); @@ -1970,7 +2006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pud = kvm_s2pud_mkexec(new_pud); - ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud); } else if (vma_pagesize == PMD_SIZE) { pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); @@ -1982,7 +2018,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pmd = kvm_s2pmd_mkexec(new_pmd); - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = kvm_pfn_pte(pfn, mem_type); @@ -1994,7 +2030,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pte = kvm_s2pte_mkexec(new_pte); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags); } out_unlock: @@ -2023,7 +2059,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) spin_lock(&vcpu->kvm->mmu_lock); - if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte)) goto out; if (pud) { /* HugeTLB */ @@ -2197,14 +2233,14 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); return 0; } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_unmap_hva_range(start, end); @@ -2224,7 +2260,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data * therefore stage2_set_pte() never needs to clear out a huge PMD * through this calling path. */ - stage2_set_pte(kvm, NULL, gpa, pte, 0); + stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0); return 0; } @@ -2235,7 +2271,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_pfn_t pfn = pte_pfn(pte); pte_t stage2_pte; - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_set_spte_hva(hva); @@ -2258,7 +2294,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2276,7 +2312,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2289,7 +2325,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); @@ -2297,7 +2333,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, @@ -2510,7 +2546,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); @@ -2529,7 +2565,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_free_stage2_pgd(kvm); + kvm_free_stage2_pgd(&kvm->arch.mmu); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -2539,7 +2575,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); spin_unlock(&kvm->mmu_lock); } -- cgit From efaa5b93afde088411b67a0dde34e67aedc6e72f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Jan 2019 12:34:25 +0000 Subject: KVM: arm64: Use TTL hint in when invalidating stage-2 translations Since we often have a precise idea of the level we're dealing with when invalidating TLBs, we can provide it to as a hint to our invalidation helper. Reviewed-by: James Morse Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 3 ++- arch/arm64/kvm/hyp/nvhe/tlb.c | 5 +++-- arch/arm64/kvm/hyp/vhe/tlb.c | 5 +++-- arch/arm64/kvm/mmu.c | 29 +++++++++++++++-------------- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 18d39b30d9c9..61f654a4ae00 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -91,7 +91,8 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #endif extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 11dbe031f0ba..69eae608d670 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -46,7 +46,8 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt) } } -void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) { struct tlb_inv_context cxt; @@ -62,7 +63,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) * whole of Stage-1. Weep... */ ipa >>= 12; - __tlbi(ipas2e1is, ipa); + __tlbi_level(ipas2e1is, ipa, level); /* * We have to ensure completion of the invalidation at Stage-2, diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index ada1d56f0709..fd7895945bbc 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -79,7 +79,8 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt) local_irq_restore(cxt->flags); } -void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) { struct tlb_inv_context cxt; @@ -94,7 +95,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) * whole of Stage-1. Weep... */ ipa >>= 12; - __tlbi(ipas2e1is, ipa); + __tlbi_level(ipas2e1is, ipa, level); /* * We have to ensure completion of the invalidation at Stage-2, diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2c6d59b509a5..41eedbfd961e 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -58,9 +58,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa) +static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level); } /* @@ -102,7 +103,7 @@ static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t return; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); put_page(virt_to_page(pmd)); } @@ -122,7 +123,7 @@ static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t return; stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); put_page(virt_to_page(pudp)); } @@ -163,7 +164,7 @@ static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr struct kvm *kvm = mmu->kvm; p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); stage2_pgd_clear(kvm, pgd); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_p4d_free(kvm, p4d_table); put_page(virt_to_page(pgd)); } @@ -173,7 +174,7 @@ static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr struct kvm *kvm = mmu->kvm; pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); stage2_p4d_clear(kvm, p4d); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pud_free(kvm, pud_table); put_page(virt_to_page(p4d)); } @@ -185,7 +186,7 @@ static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr VM_BUG_ON(stage2_pud_huge(kvm, *pud)); stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } @@ -195,7 +196,7 @@ static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } @@ -273,7 +274,7 @@ static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); /* No need to invalidate the cache for device mappings */ if (!kvm_is_device_pfn(pte_pfn(old_pte))) @@ -302,7 +303,7 @@ static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, pmd_t old_pmd = *pmd; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); kvm_flush_dcache_pmd(old_pmd); @@ -332,7 +333,7 @@ static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, pud_t old_pud = *pud; stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { @@ -1260,7 +1261,7 @@ retry: */ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); } else { get_page(virt_to_page(pmd)); } @@ -1302,7 +1303,7 @@ retry: WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); } else { get_page(virt_to_page(pudp)); } @@ -1451,7 +1452,7 @@ static int stage2_set_pte(struct kvm_s2_mmu *mmu, return 0; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(mmu, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); } else { get_page(virt_to_page(pte)); } -- cgit From 1b422dd7fc3c85556df656e3d8f1475ce99f93a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Jun 2019 19:57:41 +0100 Subject: KVM: arm64: Introduce accessor for ctxt->sys_reg In order to allow the disintegration of the per-vcpu sysreg array, let's introduce a new helper (ctxt_sys_reg()) that returns the in-memory copy of a system register, picked from a given context. __vcpu_sys_reg() is rewritten to use this helper. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 85a529eeeae3..5b168226088e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -405,12 +405,17 @@ struct kvm_vcpu_arch { #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) /* - * Only use __vcpu_sys_reg if you know you want the memory backed version of a - * register, and not the one most recently accessed by a running VCPU. For - * example, for userspace access or for system registers that are never context - * switched, but only emulated. + * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the + * memory backed version of a register, and not the one most recently + * accessed by a running VCPU. For example, for userspace access or + * for system registers that are never context switched, but only + * emulated. */ -#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) + +#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) + +#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); -- cgit From 71071acfd392c51d9c0b286067033534b59f6be4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 12 Apr 2020 14:00:43 +0100 Subject: KVM: arm64: hyp: Use ctxt_sys_reg/__vcpu_sys_reg instead of raw sys_regs access Switch the hypervisor code to using ctxt_sys_reg/__vcpu_sys_reg instead of raw sys_regs accesses. No intended functionnal change. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 7 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 110 ++++++++++++++--------------- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +- 5 files changed, 62 insertions(+), 65 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5b168226088e..d9360f91ef44 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -561,7 +561,7 @@ DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); + ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } static inline bool kvm_arch_requires_vhe(void) diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 24e8acf9ec10..5499d6c1fd9f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -104,7 +104,7 @@ static inline void __debug_save_state(struct kvm_vcpu *vcpu, save_debug(dbg->dbg_wcr, dbgwcr, wrps); save_debug(dbg->dbg_wvr, dbgwvr, wrps); - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } static inline void __debug_restore_state(struct kvm_vcpu *vcpu, @@ -124,7 +124,7 @@ static inline void __debug_restore_state(struct kvm_vcpu *vcpu, restore_debug(dbg->dbg_wcr, dbgwcr, wrps); restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); } static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 5c03441b5b6c..7cf14e4f9f77 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -53,7 +53,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -268,15 +268,14 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) sve_load_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); } else { __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); } /* Skip restoring fpexc32 for AArch64 guests */ if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 6e04e061f762..37ef3e2cdbef 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -17,34 +17,34 @@ static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); } static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); @@ -57,55 +57,55 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); } static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); } static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); } static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); if (has_vhe() || !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { /* * Must only be done for guest registers, hence the context * test. We're coming from the host, so SCTLR.M is already * set. Pairs with nVHE's __activate_traps(). */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | TCR_EPD1_MASK | TCR_EPD0_MASK), SYS_TCR); isb(); } - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); if (!has_vhe() && cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && @@ -120,9 +120,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) * deconfigured and disabled. We can now restore the host's * S1 configuration: SCTLR, and only then TCR. */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); } write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); @@ -153,51 +153,49 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx write_sysreg_el2(pstate, SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); } static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) { - u64 *spsr, *sysreg; + u64 *spsr; if (!vcpu_el1_is_32bit(vcpu)) return; spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) { - u64 *spsr, *sysreg; + u64 *spsr; if (!vcpu_el1_is_32bit(vcpu)) return; spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); write_sysreg(spsr[KVM_SPSR_UND], spsr_und); write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); } #endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 37321b2157d9..341be2f2f312 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -52,9 +52,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) * configured and enabled. We can now restore the guest's S1 * configuration: SCTLR, and only then TCR. */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); } } -- cgit From 308472c69213cea9b22a2156a19ea9ec06ea3bc1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 4 Jul 2019 17:25:09 +0100 Subject: KVM: arm64: sve: Use __vcpu_sys_reg() instead of raw sys_regs access Now that we have a wrapper for the sysreg accesses, let's use that consistently. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e329a36b2bee..e503caff14d1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -109,12 +109,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) - *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12); } else if (host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we -- cgit From 2c3db77c62b681af60757378219edcc93f658ba1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Jul 2019 11:21:27 +0100 Subject: KVM: arm64: pauth: Use ctxt_sys_reg() instead of raw sys_regs access Now that we have a wrapper for the sysreg accesses, let's use that consistently. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 7cf14e4f9f77..70367699d69a 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -364,11 +364,14 @@ static inline bool esr_is_ptrauth_trap(u32 esr) return false; } -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) { @@ -380,11 +383,11 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) return false; ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); vcpu_ptrauth_enable(vcpu); -- cgit From 5b78077a00f8109f7a4717b19309ef0a76d73372 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Jul 2019 11:25:10 +0100 Subject: KVM: arm64: debug: Drop useless vpcu parameter As part of the ongoing spring cleanup, remove the now useless vcpu parameter that is passed around (host and guest contexts give us everything we need). Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 5499d6c1fd9f..0297dc63988c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -88,9 +88,8 @@ default: write_debug(ptr[0], reg, 0); \ } -static inline void __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -107,9 +106,8 @@ static inline void __debug_save_state(struct kvm_vcpu *vcpu, ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } -static inline void __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -142,8 +140,8 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); } static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) @@ -161,8 +159,8 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } -- cgit From e47c2055c68e06977d7d8b7dbc2f7468a36d3658 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 22:40:58 +0100 Subject: KVM: arm64: Make struct kvm_regs userspace-only struct kvm_regs is used by userspace to indicate which register gets accessed by the {GET,SET}_ONE_REG API. But as we're about to refactor the layout of the in-kernel register structures, we need the kernel to move away from it. Let's make kvm_regs userspace only, and let the kernel map it to its own internal representation. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 18 ++++---- arch/arm64/include/asm/kvm_host.h | 12 ++++- arch/arm64/kernel/asm-offsets.c | 3 +- arch/arm64/kvm/fpsimd.c | 2 +- arch/arm64/kvm/guest.c | 70 ++++++++++++++++++++++++------ arch/arm64/kvm/hyp/entry.S | 3 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 24 +++++----- arch/arm64/kvm/regmap.c | 6 +-- arch/arm64/kvm/reset.c | 2 +- 10 files changed, 96 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 269a76cd51ff..cd607999abc2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -124,12 +124,12 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; } static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; + return (unsigned long *)&vcpu->arch.ctxt.elr_el1; } static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) @@ -150,7 +150,7 @@ static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; } static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) @@ -179,14 +179,14 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num]; } static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) - vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; + vcpu_gp_regs(vcpu)->regs[reg_num] = val; } static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) @@ -197,7 +197,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + return vcpu->arch.ctxt.spsr[KVM_SPSR_EL1]; } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -210,7 +210,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; + vcpu->arch.ctxt.spsr[KVM_SPSR_EL1] = v; } /* @@ -519,11 +519,11 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d9360f91ef44..bc1e91573d00 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -236,7 +236,15 @@ enum vcpu_sysreg { #define NR_COPRO_REGS (NR_SYS_REGS * 2) struct kvm_cpu_context { - struct kvm_regs gp_regs; + struct user_pt_regs regs; /* sp = sp_el0 */ + + u64 sp_el1; + u64 elr_el1; + + u64 spsr[KVM_NR_SPSR]; + + struct user_fpsimd_state fp_regs; + union { u64 sys_regs[NR_SYS_REGS]; u32 copro[NR_COPRO_REGS]; @@ -402,7 +410,7 @@ struct kvm_vcpu_arch { system_supports_generic_auth()) && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)) -#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0577e2142284..7d32fc959b1a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -102,13 +102,12 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); - DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); #endif diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e503caff14d1..3e081d556e81 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -85,7 +85,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index aea43ec60f37..9dd5bbeefae6 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -101,19 +101,60 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) return size; } -static int validate_core_offset(const struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { u64 off = core_reg_offset_from_id(reg->id); int size = core_reg_size_from_offset(vcpu, off); if (size < 0) - return -EINVAL; + return NULL; if (KVM_REG_SIZE(reg->id) != size) - return -EINVAL; + return NULL; - return 0; + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + off -= KVM_REG_ARM_CORE_REG(regs.regs[0]); + off /= 2; + return &vcpu->arch.ctxt.regs.regs[off]; + + case KVM_REG_ARM_CORE_REG(regs.sp): + return &vcpu->arch.ctxt.regs.sp; + + case KVM_REG_ARM_CORE_REG(regs.pc): + return &vcpu->arch.ctxt.regs.pc; + + case KVM_REG_ARM_CORE_REG(regs.pstate): + return &vcpu->arch.ctxt.regs.pstate; + + case KVM_REG_ARM_CORE_REG(sp_el1): + return &vcpu->arch.ctxt.sp_el1; + + case KVM_REG_ARM_CORE_REG(elr_el1): + return &vcpu->arch.ctxt.elr_el1; + + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + off -= KVM_REG_ARM_CORE_REG(spsr[0]); + off /= 2; + return &vcpu->arch.ctxt.spsr[off]; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]); + off /= 4; + return &vcpu->arch.ctxt.fp_regs.vregs[off]; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return &vcpu->arch.ctxt.fp_regs.fpsr; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return &vcpu->arch.ctxt.fp_regs.fpcr; + + default: + return NULL; + } } static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -125,8 +166,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * off the index in the "array". */ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); + void *addr; u32 off; /* Our ID is an index into the kvm_regs struct. */ @@ -135,10 +176,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; - if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; @@ -147,10 +189,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); __uint128_t tmp; - void *valp = &tmp; + void *valp = &tmp, *addr; u64 off; int err = 0; @@ -160,7 +201,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) @@ -198,7 +240,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } } - memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { int i; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index dfb4e6d359ab..ee32a7743389 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -16,8 +16,7 @@ #include #include -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 70367699d69a..784a581071fc 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -266,11 +266,11 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) if (sve_guest) { sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, + &vcpu->arch.ctxt.fp_regs.fpsr, sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); } /* Skip restoring fpexc32 for AArch64 guests */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 37ef3e2cdbef..50938093cc5d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -46,15 +46,15 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); + ctxt->sp_el1 = read_sysreg(sp_el1); + ctxt->elr_el1 = read_sysreg_el1(SYS_ELR); + ctxt->spsr[KVM_SPSR_EL1] = read_sysreg_el1(SYS_SPSR); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); @@ -125,14 +125,14 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); } - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); + write_sysreg(ctxt->sp_el1, sp_el1); + write_sysreg_el1(ctxt->elr_el1, SYS_ELR); + write_sysreg_el1(ctxt->spsr[KVM_SPSR_EL1], SYS_SPSR); } static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { - u64 pstate = ctxt->gp_regs.regs.pstate; + u64 pstate = ctxt->regs.pstate; u64 mode = pstate & PSR_AA32_MODE_MASK; /* @@ -149,7 +149,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) pstate = PSR_MODE_EL2h | PSR_IL_BIT; - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); write_sysreg_el2(pstate, SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) @@ -163,7 +163,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - spsr = vcpu->arch.ctxt.gp_regs.spsr; + spsr = vcpu->arch.ctxt.spsr; spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); @@ -184,7 +184,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - spsr = vcpu->arch.ctxt.gp_regs.spsr; + spsr = vcpu->arch.ctxt.spsr; write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); write_sysreg(spsr[KVM_SPSR_UND], spsr_und); diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index a900181e3867..b1596f314087 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -100,7 +100,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { */ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) { - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; switch (mode) { @@ -148,7 +148,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) int spsr_idx = vcpu_spsr32_mode(vcpu); if (!vcpu->arch.sysregs_loaded_on_cpu) - return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; + return vcpu->arch.ctxt.spsr[spsr_idx]; switch (spsr_idx) { case KVM_SPSR_SVC: @@ -171,7 +171,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) int spsr_idx = vcpu_spsr32_mode(vcpu); if (!vcpu->arch.sysregs_loaded_on_cpu) { - vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; + vcpu->arch.ctxt.spsr[spsr_idx] = v; return; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d3b209023727..8ca8607f5a9f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -288,7 +288,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset core registers */ memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); - vcpu_gp_regs(vcpu)->regs.pstate = pstate; + vcpu_gp_regs(vcpu)->pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); -- cgit From 98909e6d1c811b6368c9c84fa6b3f0817c32ac2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 23:05:38 +0100 Subject: KVM: arm64: Move ELR_EL1 to the system register array As ELR-EL1 is a VNCR-capable register with ARMv8.4-NV, let's move it to the sys_regs array and repaint the accessors. While we're at it, let's kill the now useless accessors used only on the fault injection path. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 21 --------------------- arch/arm64/include/asm/kvm_host.h | 3 ++- arch/arm64/kvm/guest.c | 2 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 4 ++-- arch/arm64/kvm/inject_fault.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 ++ 6 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index cd607999abc2..a12b5dc5db0d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -127,27 +127,6 @@ static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; } -static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) -{ - return (unsigned long *)&vcpu->arch.ctxt.elr_el1; -} - -static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_ELR); - else - return *__vcpu_elr_el1(vcpu); -} - -static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_ELR); - else - *__vcpu_elr_el1(vcpu) = v; -} - static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bc1e91573d00..f255507dd916 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -185,6 +185,8 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, + ELR_EL1, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -239,7 +241,6 @@ struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ u64 sp_el1; - u64 elr_el1; u64 spsr[KVM_NR_SPSR]; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9dd5bbeefae6..99ff09ad24e8 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -132,7 +132,7 @@ static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return &vcpu->arch.ctxt.sp_el1; case KVM_REG_ARM_CORE_REG(elr_el1): - return &vcpu->arch.ctxt.elr_el1; + return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 50938093cc5d..9ebbd626d4ab 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -47,7 +47,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); ctxt->sp_el1 = read_sysreg(sp_el1); - ctxt->elr_el1 = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt->spsr[KVM_SPSR_EL1] = read_sysreg_el1(SYS_SPSR); } @@ -126,7 +126,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) } write_sysreg(ctxt->sp_el1, sp_el1); - write_sysreg_el1(ctxt->elr_el1, SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt->spsr[KVM_SPSR_EL1], SYS_SPSR); } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e21fdd93027a..ebfdfc27b2bd 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -64,7 +64,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, case PSR_MODE_EL1h: vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); break; default: /* Don't do that */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index baf5ce9225ce..6657b83c0647 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -94,6 +94,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -133,6 +134,7 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; -- cgit From 1bded23ea71cee2053fe1dd55c5d36d35bec56aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 23:05:38 +0100 Subject: KVM: arm64: Move SP_EL1 to the system register array SP_EL1 being a VNCR-capable register with ARMv8.4-NV, move it to the system register array and update the accessors. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 +-- arch/arm64/kvm/guest.c | 2 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f255507dd916..2bd6285eaf4c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -186,6 +186,7 @@ enum vcpu_sysreg { APGAKEYHI_EL1, ELR_EL1, + SP_EL1, /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ @@ -240,8 +241,6 @@ enum vcpu_sysreg { struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ - u64 sp_el1; - u64 spsr[KVM_NR_SPSR]; struct user_fpsimd_state fp_regs; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 99ff09ad24e8..d614716e073b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -129,7 +129,7 @@ static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return &vcpu->arch.ctxt.regs.pstate; case KVM_REG_ARM_CORE_REG(sp_el1): - return &vcpu->arch.ctxt.sp_el1; + return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1); case KVM_REG_ARM_CORE_REG(elr_el1): return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 9ebbd626d4ab..4c26ba72120e 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -46,7 +46,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); - ctxt->sp_el1 = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt->spsr[KVM_SPSR_EL1] = read_sysreg_el1(SYS_SPSR); } @@ -125,7 +125,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); } - write_sysreg(ctxt->sp_el1, sp_el1); + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt->spsr[KVM_SPSR_EL1], SYS_SPSR); } -- cgit From fd85b66789aaa594a158f0a8aa4482c3ed0fed3d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 23:36:42 +0100 Subject: KVM: arm64: Disintegrate SPSR array As we're about to move SPSR_EL1 into the VNCR page, we need to disassociate it from the rest of the 32bit cruft. Let's break the array into individual fields. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_host.h | 6 ++++- arch/arm64/kvm/guest.c | 19 +++++++++++----- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 28 +++++++++--------------- arch/arm64/kvm/regmap.c | 35 +++++++++++++++++++++++++++--- 5 files changed, 63 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a12b5dc5db0d..5f959fedff09 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -176,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu->arch.ctxt.spsr[KVM_SPSR_EL1]; + return vcpu->arch.ctxt.spsr_el1; } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -189,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu->arch.ctxt.spsr[KVM_SPSR_EL1] = v; + vcpu->arch.ctxt.spsr_el1 = v; } /* diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2bd6285eaf4c..dfb97ed2f680 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -241,7 +241,11 @@ enum vcpu_sysreg { struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ - u64 spsr[KVM_NR_SPSR]; + u64 spsr_el1; /* aka spsr_svc */ + u64 spsr_abt; + u64 spsr_und; + u64 spsr_irq; + u64 spsr_fiq; struct user_fpsimd_state fp_regs; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d614716e073b..70215f3a6f89 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -134,11 +134,20 @@ static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM_CORE_REG(elr_el1): return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); - case KVM_REG_ARM_CORE_REG(spsr[0]) ... - KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): - off -= KVM_REG_ARM_CORE_REG(spsr[0]); - off /= 2; - return &vcpu->arch.ctxt.spsr[off]; + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): + return &vcpu->arch.ctxt.spsr_el1; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): + return &vcpu->arch.ctxt.spsr_abt; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]): + return &vcpu->arch.ctxt.spsr_und; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]): + return &vcpu->arch.ctxt.spsr_irq; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]): + return &vcpu->arch.ctxt.spsr_fiq; case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4c26ba72120e..fb4bc42e72c5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -48,7 +48,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); - ctxt->spsr[KVM_SPSR_EL1] = read_sysreg_el1(SYS_SPSR); + ctxt->spsr_el1 = read_sysreg_el1(SYS_SPSR); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -127,7 +127,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); - write_sysreg_el1(ctxt->spsr[KVM_SPSR_EL1], SYS_SPSR); + write_sysreg_el1(ctxt->spsr_el1, SYS_SPSR); } static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) @@ -158,17 +158,13 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) { - u64 *spsr; - if (!vcpu_el1_is_32bit(vcpu)) return; - spsr = vcpu->arch.ctxt.spsr; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); @@ -179,17 +175,13 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) { - u64 *spsr; - if (!vcpu_el1_is_32bit(vcpu)) return; - spsr = vcpu->arch.ctxt.spsr; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index b1596f314087..97c110810527 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -147,8 +147,20 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) { int spsr_idx = vcpu_spsr32_mode(vcpu); - if (!vcpu->arch.sysregs_loaded_on_cpu) - return vcpu->arch.ctxt.spsr[spsr_idx]; + if (!vcpu->arch.sysregs_loaded_on_cpu) { + switch (spsr_idx) { + case KVM_SPSR_SVC: + return vcpu->arch.ctxt.spsr_el1; + case KVM_SPSR_ABT: + return vcpu->arch.ctxt.spsr_abt; + case KVM_SPSR_UND: + return vcpu->arch.ctxt.spsr_und; + case KVM_SPSR_IRQ: + return vcpu->arch.ctxt.spsr_irq; + case KVM_SPSR_FIQ: + return vcpu->arch.ctxt.spsr_fiq; + } + } switch (spsr_idx) { case KVM_SPSR_SVC: @@ -171,7 +183,24 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) int spsr_idx = vcpu_spsr32_mode(vcpu); if (!vcpu->arch.sysregs_loaded_on_cpu) { - vcpu->arch.ctxt.spsr[spsr_idx] = v; + switch (spsr_idx) { + case KVM_SPSR_SVC: + vcpu->arch.ctxt.spsr_el1 = v; + break; + case KVM_SPSR_ABT: + vcpu->arch.ctxt.spsr_abt = v; + break; + case KVM_SPSR_UND: + vcpu->arch.ctxt.spsr_und = v; + break; + case KVM_SPSR_IRQ: + vcpu->arch.ctxt.spsr_irq = v; + break; + case KVM_SPSR_FIQ: + vcpu->arch.ctxt.spsr_fiq = v; + break; + } + return; } -- cgit From 710f1982187afd3f25413d92a0804ccd780634f7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 23:05:38 +0100 Subject: KVM: arm64: Move SPSR_EL1 to the system register array SPSR_EL1 being a VNCR-capable register with ARMv8.4-NV, move it to the sysregs array and update the accessors. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/guest.c | 2 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 4 ++-- arch/arm64/kvm/regmap.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5f959fedff09..57f9042e7270 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -176,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu->arch.ctxt.spsr_el1; + return __vcpu_sys_reg(vcpu, SPSR_EL1); } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -189,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu->arch.ctxt.spsr_el1 = v; + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; } /* diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index dfb97ed2f680..91b1adb6789c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -187,6 +187,7 @@ enum vcpu_sysreg { ELR_EL1, SP_EL1, + SPSR_EL1, /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ @@ -241,7 +242,6 @@ enum vcpu_sysreg { struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ - u64 spsr_el1; /* aka spsr_svc */ u64 spsr_abt; u64 spsr_und; u64 spsr_irq; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 70215f3a6f89..dfb5218137ca 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -135,7 +135,7 @@ static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): - return &vcpu->arch.ctxt.spsr_el1; + return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1); case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): return &vcpu->arch.ctxt.spsr_abt; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index fb4bc42e72c5..7a986030145f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -48,7 +48,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); - ctxt->spsr_el1 = read_sysreg_el1(SYS_SPSR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -127,7 +127,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); - write_sysreg_el1(ctxt->spsr_el1, SYS_SPSR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); } static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index 97c110810527..accc1d5fba61 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -150,7 +150,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) if (!vcpu->arch.sysregs_loaded_on_cpu) { switch (spsr_idx) { case KVM_SPSR_SVC: - return vcpu->arch.ctxt.spsr_el1; + return __vcpu_sys_reg(vcpu, SPSR_EL1); case KVM_SPSR_ABT: return vcpu->arch.ctxt.spsr_abt; case KVM_SPSR_UND: @@ -185,7 +185,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) if (!vcpu->arch.sysregs_loaded_on_cpu) { switch (spsr_idx) { case KVM_SPSR_SVC: - vcpu->arch.ctxt.spsr_el1 = v; + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; break; case KVM_SPSR_ABT: vcpu->arch.ctxt.spsr_abt = v; -- cgit From 3c5ff0c60f2febb170bc4096d4b8a8390ebb0bad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Apr 2020 08:58:22 +0100 Subject: KVM: arm64: timers: Rename kvm_timer_sync_hwstate to kvm_timer_sync_user kvm_timer_sync_hwstate() has nothing to do with the timer HW state, but more to do with the state of a userspace interrupt controller. Change the suffix from _hwstate to_user, in keeping with the rest of the code. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/arm.c | 4 ++-- include/kvm/arm_arch_timer.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index a1fe0ea3254e..33d85a504720 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -615,7 +615,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) } } -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +void kvm_timer_sync_user(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index beb0e68cccaa..e52f2b2305b5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -723,7 +723,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); preempt_enable(); @@ -768,7 +768,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * timer virtual interrupt state. */ if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_arch_vcpu_ctxsync_fp(vcpu); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d120e6c323e7..a821dd1df0cf 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -71,7 +71,7 @@ int kvm_timer_hyp_init(bool); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_sync_user(struct kvm_vcpu *vcpu); bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); -- cgit From 41ce82f63c0624556b94b90551e4f70a0c913879 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 28 Jun 2019 15:23:43 +0100 Subject: KVM: arm64: timers: Move timer registers to the sys_regs file Move the timer gsisters to the sysreg file. This will further help when they are directly changed by a nesting hypervisor in the VNCR page. This requires moving the initialisation of the timer struct so that some of the helpers (such as arch_timer_ctx_index) can work correctly at an early stage. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 6 ++ arch/arm64/kvm/arch_timer.c | 155 ++++++++++++++++++++++++++++++-------- arch/arm64/kvm/trace_arm.h | 8 +- include/kvm/arm_arch_timer.h | 11 +-- 4 files changed, 136 insertions(+), 44 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 91b1adb6789c..e1a32c0707bb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -189,6 +189,12 @@ enum vcpu_sysreg { SP_EL1, SPSR_EL1, + CNTVOFF_EL2, + CNTV_CVAL_EL0, + CNTV_CTL_EL0, + CNTP_CVAL_EL0, + CNTP_CTL_EL0, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 33d85a504720..32ba6fbc3814 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -51,6 +51,93 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +u32 timer_get_ctl(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +u64 timer_get_cval(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +static u64 timer_get_offset(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); + default: + return 0; + } +} + +static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; + break; + default: + WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + } +} + u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -124,8 +211,8 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) { u64 cval, now; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); if (now < cval) { u64 ns; @@ -144,8 +231,8 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { WARN_ON(timer_ctx && timer_ctx->loaded); return timer_ctx && - !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); + ((timer_get_ctl(timer_ctx) & + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); } /* @@ -256,8 +343,8 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) if (!kvm_timer_irq_can_fire(timer_ctx)) return false; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); return cval <= now; } @@ -350,8 +437,8 @@ static void timer_save_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTV_CTL); @@ -359,8 +446,8 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case TIMER_PTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); @@ -429,14 +516,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; case NR_KVM_TIMERS: BUG(); @@ -528,7 +615,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) kvm_timer_vcpu_load_nogic(vcpu); } - set_cntvoff(map.direct_vtimer->cntvoff); + set_cntvoff(timer_get_offset(map.direct_vtimer)); kvm_timer_unblocking(vcpu); @@ -639,8 +726,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - vcpu_vtimer(vcpu)->cnt_ctl = 0; - vcpu_ptimer(vcpu)->cnt_ctl = 0; + timer_set_ctl(vcpu_vtimer(vcpu), 0); + timer_set_ctl(vcpu_ptimer(vcpu), 0); if (timer->enabled) { kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); @@ -668,13 +755,13 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, tmp, kvm) - vcpu_vtimer(tmp)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(tmp), cntvoff); /* * When called from the vcpu create path, the CPU being created is not * included in the loop above, so we just set it here as well. */ - vcpu_vtimer(vcpu)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(vcpu), cntvoff); mutex_unlock(&kvm->lock); } @@ -684,9 +771,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; + /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); - ptimer->cntvoff = 0; + timer_set_offset(ptimer, 0); hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; @@ -704,9 +794,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->host_timer_irq_flags = host_vtimer_irq_flags; ptimer->host_timer_irq_flags = host_ptimer_irq_flags; - - vtimer->vcpu = vcpu; - ptimer->vcpu = vcpu; } static void kvm_timer_init_interrupt(void *info) @@ -756,10 +843,12 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit * regardless of ENABLE bit for our implementation convenience. */ + u32 ctl = timer_get_ctl(timer); + if (!kvm_timer_compute_delta(timer)) - return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; - else - return timer->cnt_ctl; + ctl |= ARCH_TIMER_CTRL_IT_STAT; + + return ctl; } u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) @@ -795,8 +884,8 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: - val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; - val &= lower_32_bits(val); + val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); + val = lower_32_bits(val); break; case TIMER_REG_CTL: @@ -804,11 +893,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, break; case TIMER_REG_CVAL: - val = timer->cnt_cval; + val = timer_get_cval(timer); break; case TIMER_REG_CNT: - val = kvm_phys_timer_read() - timer->cntvoff; + val = kvm_phys_timer_read() - timer_get_offset(timer); break; default: @@ -842,15 +931,15 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; + timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); break; case TIMER_REG_CTL: - timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); break; case TIMER_REG_CVAL: - timer->cnt_cval = val; + timer_set_cval(timer, val); break; default: diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 4c71270cc097..4691053c5ee4 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -301,8 +301,8 @@ TRACE_EVENT(kvm_timer_save_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), @@ -323,8 +323,8 @@ TRACE_EVENT(kvm_timer_restore_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index a821dd1df0cf..51c19381108c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -26,16 +26,9 @@ enum kvm_arch_timer_regs { struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Registers: control register, timer value */ - u32 cnt_ctl; - u64 cnt_cval; - /* Timer IRQ */ struct kvm_irq_level irq; - /* Virtual offset */ - u64 cntvoff; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; @@ -109,4 +102,8 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timer_regs treg, u64 val); +/* Needed for tracing */ +u32 timer_get_ctl(struct arch_timer_context *ctxt); +u64 timer_get_cval(struct arch_timer_context *ctxt); + #endif -- cgit From 6de7dd31ded078b31a3812a46d8874f871b4e82f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 7 Jul 2020 15:34:06 +0100 Subject: KVM: arm64: Don't use has_vhe() for CHOOSE_HYP_SYM() The recently introduced CHOOSE_HYP_SYM() macro picks one symbol or another, depending on whether the kernel run as a VHE hypervisor or not. For that, it uses the has_vhe() helper, which is itself implemented as a final capability. Unfortunately, __copy_hyp_vect_bpi now indirectly uses CHOOSE_HYP_SYM to get the __bp_harden_hyp_vecs symbol, using has_vhe() in the process. At this stage, the capability isn't final and things explode: [ 0.000000] ACPI: SRAT not present [ 0.000000] percpu: Embedded 34 pages/cpu s101264 r8192 d29808 u139264 [ 0.000000] Detected PIPT I-cache on CPU0 [ 0.000000] ------------[ cut here ]------------ [ 0.000000] kernel BUG at arch/arm64/include/asm/cpufeature.h:459! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc4-00080-gd630681366e5 #1388 [ 0.000000] pstate: 80000085 (Nzcv daIf -PAN -UAO BTYPE=--) [ 0.000000] pc : check_branch_predictor+0x3a4/0x408 [ 0.000000] lr : check_branch_predictor+0x2a4/0x408 [ 0.000000] sp : ffff800011693e90 [ 0.000000] x29: ffff800011693e90 x28: ffff8000116a1530 [ 0.000000] x27: ffff8000112c1008 x26: ffff800010ca6ff8 [ 0.000000] x25: ffff8000112c1000 x24: ffff8000116a1320 [ 0.000000] x23: 0000000000000000 x22: ffff8000112c1000 [ 0.000000] x21: ffff800010177120 x20: ffff8000116ae108 [ 0.000000] x19: 0000000000000000 x18: ffff800011965c90 [ 0.000000] x17: 0000000000022000 x16: 0000000000000003 [ 0.000000] x15: 00000000ffffffff x14: ffff8000118c3a38 [ 0.000000] x13: 0000000000000021 x12: 0000000000000022 [ 0.000000] x11: d37a6f4de9bd37a7 x10: 000000000000001d [ 0.000000] x9 : 0000000000000000 x8 : ffff800011f8dad8 [ 0.000000] x7 : ffff800011965ad0 x6 : 0000000000000003 [ 0.000000] x5 : 0000000000000000 x4 : 0000000000000000 [ 0.000000] x3 : 0000000000000100 x2 : 0000000000000004 [ 0.000000] x1 : ffff8000116ae148 x0 : 0000000000000000 [ 0.000000] Call trace: [ 0.000000] check_branch_predictor+0x3a4/0x408 [ 0.000000] update_cpu_capabilities+0x84/0x138 [ 0.000000] init_cpu_features+0x2c0/0x2d8 [ 0.000000] cpuinfo_store_boot_cpu+0x54/0x64 [ 0.000000] smp_prepare_boot_cpu+0x2c/0x60 [ 0.000000] start_kernel+0x16c/0x574 [ 0.000000] Code: 17ffffc7 91010281 14000198 17ffffca (d4210000) This is addressed using a two-fold process: - Replace has_vhe() with is_kernel_in_hyp_mode(), which tests whether we are running at EL2. - Make CHOOSE_HYP_SYM() return an *undefined* symbol when compiled in the nVHE hypervisor, as we really should never use this helper in the nVHE-specific code. With this in place, we're back to a bootable kernel again. Fixes: b877e9849d41 ("KVM: arm64: Build hyp-entry.S separately for VHE/nVHE") Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5a91aaae78d2..1a66815ea01b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -62,8 +62,26 @@ #define CHOOSE_VHE_SYM(sym) sym #define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) -#define CHOOSE_HYP_SYM(sym) (has_vhe() ? CHOOSE_VHE_SYM(sym) \ + +#ifndef __KVM_NVHE_HYPERVISOR__ +/* + * BIG FAT WARNINGS: + * + * - Don't be tempted to change the following is_kernel_in_hyp_mode() + * to has_vhe(). has_vhe() is implemented as a *final* capability, + * while this is used early at boot time, when the capabilities are + * not final yet.... + * + * - Don't let the nVHE hypervisor have access to this, as it will + * pick the *wrong* symbol (yes, it runs at EL2...). + */ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ : CHOOSE_NVHE_SYM(sym)) +#else +/* The nVHE hypervisor shouldn't even try to access anything */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol +#endif /* Translate a kernel address @ptr into its equivalent linear mapping */ #define kvm_ksym_ref(ptr) \ -- cgit From 465931e70881476a210d44705102ef8b6ee6cdb0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 18 Jun 2020 18:56:29 +0100 Subject: clk: rockchip: Revert "fix wrong mmc sample phase shift for rk3328" This reverts commit 82f4b67f018c88a7cc9337f0067ed3d6ec352648. According to a subsequent revert in the vendor kernel, the original change was based on unclear documentation and was in fact incorrect. Emprically, my board's HS200 eMMC at 200MHZ apparently gets lucky with a phase where this had no impact, but limiting max-frequency to 150MHz to match the nominal capability of the I/O pins made it virtually unusable, constantly throwing errors and retuning. With this revert, it starts behaving perfectly at 150MHz too. Fixes: 82f4b67f018c ("clk: rockchip: fix wrong mmc sample phase shift for rk3328") Signed-off-by: Robin Murphy Reviewed-by: Shawn Lin Link: https://lore.kernel.org/r/c80eb52e34c03f817586b6b7912fbd4e31be9079.1589475794.git.robin.murphy@arm.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3328.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c index c186a1985bf4..2429b7c2a8b3 100644 --- a/drivers/clk/rockchip/clk-rk3328.c +++ b/drivers/clk/rockchip/clk-rk3328.c @@ -808,22 +808,22 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3328_SDMMC_CON0, 1), MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", - RK3328_SDMMC_CON1, 0), + RK3328_SDMMC_CON1, 1), MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3328_SDIO_CON0, 1), MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", - RK3328_SDIO_CON1, 0), + RK3328_SDIO_CON1, 1), MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK3328_EMMC_CON0, 1), MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", - RK3328_EMMC_CON1, 0), + RK3328_EMMC_CON1, 1), MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext", RK3328_SDMMC_EXT_CON0, 1), MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext", - RK3328_SDMMC_EXT_CON1, 0), + RK3328_SDMMC_EXT_CON1, 1), }; static const char *const rk3328_critical_clocks[] __initconst = { -- cgit From ac7cbb221159efbf4491de0177b4e20a5d62e5ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2020 16:20:06 -0400 Subject: NFS: Allow applications to speed up readdir+statx() using AT_STATX_DONT_SYNC If the application uses the AT_STATX_DONT_SYNC flag after doing readdir(), then we should still mark the parent inode as seeing a readdirplus hit. That ensures that we continue to use readdirplus in the 'ls -l' type of workflow to do fast lookups of the dentries. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0bf1f835de01..90f77fd0d1ca 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -794,8 +794,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat, trace_nfs_getattr_enter(inode); - if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) + if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { + nfs_readdirplus_parent_cache_hit(path->dentry); goto out_no_update; + } /* Flush out writes to the server in order to update c/mtime. */ if ((request_mask & (STATX_CTIME|STATX_MTIME)) && -- cgit From f97ff92bd16644c8d6f579bc6728974057de3ca5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Jun 2020 13:22:04 -0400 Subject: pNFS/flexfiles: Clean up redundant calls to pnfs_put_lseg() Both nfs_pageio_reset_read_mds() and nfs_pageio_reset_write_mds() do call pnfs_generic_pg_cleanup() for us. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index de03e440b7ee..b3ec12e5fde1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -844,8 +844,7 @@ retry: if (!ds) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; + pnfs_generic_pg_cleanup(pgio); /* Sleep for 1 second before retrying */ ssleep(1); goto retry; @@ -871,8 +870,6 @@ out_mds: 0, NFS4_MAX_UINT64, IOMODE_READ, NFS_I(pgio->pg_inode)->layout, pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; pgio->pg_maxretrans = 0; nfs_pageio_reset_read_mds(pgio); } @@ -916,8 +913,7 @@ retry: if (!ds) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; + pnfs_generic_pg_cleanup(pgio); /* Sleep for 1 second before retrying */ ssleep(1); goto retry; @@ -939,8 +935,6 @@ out_mds: 0, NFS4_MAX_UINT64, IOMODE_RW, NFS_I(pgio->pg_inode)->layout, pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; pgio->pg_maxretrans = 0; nfs_pageio_reset_write_mds(pgio); pgio->pg_error = -EAGAIN; -- cgit From 18eb87f4443a0a23757e8a1ce3b3fa03714702fd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Jun 2020 13:30:39 -0400 Subject: pNFS/flexfiles: The mirror count could depend on the layout segment range Make sure we specify the layout segment range when calculating the mirror count. In theory, that number could depend on the range to which we're writing. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b3ec12e5fde1..b26173d72735 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -947,8 +947,8 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_RW, false, GFP_NOFS); -- cgit From 9a67fcc8f3fd1e294922f28f20003c31d7f6cfeb Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:53 +0000 Subject: nfs: add client side only definitions for user xattrs Add client-side only definitions for user extended attributes (RFC8276). These are the access bits as used by the client code, and the CLNT procedure number definition. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 5 +++++ include/linux/nfs_fs.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e6ca9d1d2e76..db13026ac7d1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -553,6 +553,11 @@ enum { NFSPROC4_CLNT_LAYOUTERROR, NFSPROC4_CLNT_COPY_NOTIFY, + + NFSPROC4_CLNT_GETXATTR, + NFSPROC4_CLNT_SETXATTR, + NFSPROC4_CLNT_LISTXATTRS, + NFSPROC4_CLNT_REMOVEXATTR, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6ee9119acc5d..b743988fcbd0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -212,6 +212,9 @@ struct nfs4_copy_state { #define NFS_ACCESS_EXTEND 0x0008 #define NFS_ACCESS_DELETE 0x0010 #define NFS_ACCESS_EXECUTE 0x0020 +#define NFS_ACCESS_XAREAD 0x0040 +#define NFS_ACCESS_XAWRITE 0x0080 +#define NFS_ACCESS_XALIST 0x0100 /* * Cache validity bit flags -- cgit From 04a5da690e8f2da23c2ac940f2921e3aa622db82 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:54 +0000 Subject: NFSv4.2: define limits and sizes for user xattr handling Set limits for extended attributes (attribute value size and listxattr buffer size), based on the fs-independent limits (XATTR_*_MAX). Define the maximum XDR sizes for the RFC 8276 XATTR operations. In the case of operations that carry a larger payload (SETXATTR, GETXATTR, LISTXATTR), these exclude that payload, which is added as separate pages, like other operations do. Define, much like for read and write operations, the maximum overhead sizes for get/set/listxattr, and use them to limit the maximum payload size for those operations, in combination with the channel attributes. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 19 ++++++++++-- fs/nfs/nfs42.h | 16 ++++++++++ fs/nfs/nfs42xdr.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 6 ++++ fs/nfs/nfs4client.c | 31 ++++++++++++++++++++ include/linux/nfs_fs_sb.h | 5 ++++ 6 files changed, 149 insertions(+), 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f1ff3076e4a4..055040bf1a8e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -50,6 +50,7 @@ #include "nfs.h" #include "netns.h" #include "sysfs.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -749,7 +750,7 @@ error: static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) { - unsigned long max_rpc_payload; + unsigned long max_rpc_payload, raw_max_rpc_payload; /* Work out a lot of parameters */ if (server->rsize == 0) @@ -762,7 +763,9 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) server->wsize = nfs_block_size(fsinfo->wtmax, NULL); - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + raw_max_rpc_payload = rpc_max_payload(server->client); + max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL); + if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; if (server->rsize > NFS_MAX_FILE_IO_SIZE) @@ -795,6 +798,18 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + +#ifdef CONFIG_NFS_V4_2 + /* + * Defaults until limited by the session parameters. + */ + server->gxasize = min_t(unsigned int, raw_max_rpc_payload, + XATTR_SIZE_MAX); + server->sxasize = min_t(unsigned int, raw_max_rpc_payload, + XATTR_SIZE_MAX); + server->lxasize = min_t(unsigned int, raw_max_rpc_payload, + nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); +#endif } /* diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index c891af949886..51de8ddc7d88 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +#include + /* * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support * more? Need to consider not to pre-alloc too much for a compound. @@ -36,5 +38,19 @@ static inline bool nfs42_files_from_same_server(struct file *in, return nfs4_check_serverowner_major_id(c_in->cl_serverowner, c_out->cl_serverowner); } + +/* + * Maximum XDR buffer size needed for a listxattr buffer of buflen size. + * + * The upper boundary is a buffer with all 1-byte sized attribute names. + * They would be 7 bytes long in the eventual buffer ("user.x\0"), and + * 8 bytes long XDR-encoded. + * + * Include the trailing eof word as well. + */ +static inline u32 nfs42_listxattr_xdrsize(u32 buflen) +{ + return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; +} #endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c03f3246d6c5..6712daa9d85b 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -169,6 +169,80 @@ decode_clone_maxsz + \ decode_getattr_maxsz) +#ifdef CONFIG_NFS_V4_2 +/* Not limited by NFS itself, limited by the generic xattr code */ +#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) + +#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_xattr_name_maxsz + 1) +#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) +#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1) +#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) + +#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getxattr_maxsz) +#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getxattr_maxsz) +#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setxattr_maxsz) +#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setxattr_maxsz) +#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_listxattrs_maxsz) +#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_listxattrs_maxsz) +#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_removexattr_maxsz) +#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_removexattr_maxsz) + +/* + * These values specify the maximum amount of data that is not + * associated with the extended attribute name or extended + * attribute list in the SETXATTR, GETXATTR and LISTXATTR + * respectively. + */ +const u32 nfs42_maxsetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_encode_hdr_maxsz + + encode_sequence_maxsz + + encode_putfh_maxsz + 1 + + nfs4_xattr_name_maxsz) + * XDR_UNIT); + +const u32 nfs42_maxgetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz + 1) * XDR_UNIT); + +const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz + 3) * XDR_UNIT); +#endif + static void encode_fallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args) { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2b7f6dcd2eb8..526b3e70d57c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -557,6 +557,12 @@ static inline void nfs4_unregister_sysctl(void) /* nfs4xdr.c */ extern const struct rpc_procinfo nfs4_procedures[]; +#ifdef CONFIG_NFS_V4_2 +extern const u32 nfs42_maxsetxattr_overhead; +extern const u32 nfs42_maxgetxattr_overhead; +extern const u32 nfs42_maxlistxattrs_overhead; +#endif + struct nfs4_mount_data; /* callback_xdr.c */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0bd77cc1f639..c41cbd86612c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -992,6 +992,36 @@ static void nfs4_session_limit_rwsize(struct nfs_server *server) #endif /* CONFIG_NFS_V4_1 */ } +/* + * Limit xattr sizes using the channel attributes. + */ +static void nfs4_session_limit_xasize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_2 + struct nfs4_session *sess; + u32 server_gxa_sz; + u32 server_sxa_sz; + u32 server_lxa_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + + sess = server->nfs_client->cl_session; + + server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead; + server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead; + server_lxa_sz = sess->fc_attrs.max_resp_sz - + nfs42_maxlistxattrs_overhead; + + if (server->gxasize > server_gxa_sz) + server->gxasize = server_gxa_sz; + if (server->sxasize > server_sxa_sz) + server->sxasize = server_sxa_sz; + if (server->lxasize > server_lxa_sz) + server->lxasize = server_lxa_sz; +#endif +} + static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1039,6 +1069,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, goto out; nfs4_session_limit_rwsize(server); + nfs4_session_limit_xasize(server); if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 465fa98258a3..128e01acb4ca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -163,6 +163,11 @@ struct nfs_server { unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ unsigned int bsize; /* server block size */ +#ifdef CONFIG_NFS_V4_2 + unsigned int gxasize; /* getxattr size */ + unsigned int sxasize; /* setxattr size */ + unsigned int lxasize; /* listxattr size */ +#endif unsigned int acregmin; /* attr cache timeouts */ unsigned int acregmax; unsigned int acdirmin; -- cgit From b78ef845c35dbae25e57b598901a65b13d940c81 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:55 +0000 Subject: NFSv4.2: query the server for extended attribute support Query the server for extended attribute support, and record it as the NFS_CAP_XATTR flag in the server capabilities. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +++ fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4xdr.c | 25 +++++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 055040bf1a8e..4b8cc93913f7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -809,6 +809,9 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, XATTR_SIZE_MAX); server->lxasize = min_t(unsigned int, raw_max_rpc_payload, nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); + + if (fsinfo->xattr_support) + server->caps |= NFS_CAP_XATTR; #endif } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e32717fd1169..64e081459327 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -256,6 +256,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_CLONE_BLKSIZE + | FATTR4_WORD2_XATTR_SUPPORT }; const u32 nfs4_fs_locations_bitmap[3] = { @@ -3740,7 +3741,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 47817ef0aadb..9e1b07640e9a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4201,6 +4201,26 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } +static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[2] & (FATTR4_WORD2_XATTR_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_XATTR_SUPPORT)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; + } + dprintk("%s: XATTR support=%s\n", __func__, + *res == 0 ? "false" : "true"); + return 0; +} + static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); @@ -4855,6 +4875,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if (status) goto xdr_error; + status = decode_attr_xattrsupport(xdr, bitmap, + &fsinfo->xattr_support); + if (status) + goto xdr_error; + status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 128e01acb4ca..7eae72a8762e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -286,5 +286,6 @@ struct nfs_server { #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) +#define NFS_CAP_XATTR (1U << 28) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5fd0a9ef425f..dee9b1cfa972 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -150,6 +150,7 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + __u32 xattr_support; /* User xattrs supported */ }; struct nfs_fsstat { -- cgit From 3e1f02123fba086d32dfd5729e6f4e2b54654acc Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:56 +0000 Subject: NFSv4.2: add client side XDR handling for extended attributes Define the argument and response structures that will be used for RFC 8276 extended attribute RPC calls, and implement the necessary functions to encode/decode the extended attribute operations. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 368 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 6 + include/linux/nfs_xdr.h | 59 +++++++- 3 files changed, 430 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6712daa9d85b..cc50085e151c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -169,7 +169,6 @@ decode_clone_maxsz + \ decode_getattr_maxsz) -#ifdef CONFIG_NFS_V4_2 /* Not limited by NFS itself, limited by the generic xattr code */ #define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) @@ -241,7 +240,6 @@ const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_decode_hdr_maxsz + decode_sequence_maxsz + decode_putfh_maxsz + 3) * XDR_UNIT); -#endif static void encode_fallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args) @@ -407,6 +405,210 @@ static void encode_layouterror(struct xdr_stream *xdr, encode_device_error(xdr, &args->errors[0]); } +static void encode_setxattr(struct xdr_stream *xdr, + const struct nfs42_setxattrargs *arg, + struct compound_hdr *hdr) +{ + __be32 *p; + + BUILD_BUG_ON(XATTR_CREATE != SETXATTR4_CREATE); + BUILD_BUG_ON(XATTR_REPLACE != SETXATTR4_REPLACE); + + encode_op_hdr(xdr, OP_SETXATTR, decode_setxattr_maxsz, hdr); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(arg->xattr_flags); + encode_string(xdr, strlen(arg->xattr_name), arg->xattr_name); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(arg->xattr_len); + if (arg->xattr_len) + xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len); +} + +static int decode_setxattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_SETXATTR); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + + +static void encode_getxattr(struct xdr_stream *xdr, const char *name, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_GETXATTR, decode_getxattr_maxsz, hdr); + encode_string(xdr, strlen(name), name); +} + +static int decode_getxattr(struct xdr_stream *xdr, + struct nfs42_getxattrres *res, + struct rpc_rqst *req) +{ + int status; + __be32 *p; + u32 len, rdlen; + + status = decode_op_hdr(xdr, OP_GETXATTR); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > req->rq_rcv_buf.page_len) + return -ERANGE; + + res->xattr_len = len; + + if (len > 0) { + rdlen = xdr_read_pages(xdr, len); + if (rdlen < len) + return -EIO; + } + + return 0; +} + +static void encode_removexattr(struct xdr_stream *xdr, const char *name, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_REMOVEXATTR, decode_removexattr_maxsz, hdr); + encode_string(xdr, strlen(name), name); +} + + +static int decode_removexattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVEXATTR); + if (status) + goto out; + + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static void encode_listxattrs(struct xdr_stream *xdr, + const struct nfs42_listxattrsargs *arg, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr); + + p = reserve_space(xdr, 12); + if (unlikely(!p)) + return; + + p = xdr_encode_hyper(p, arg->cookie); + /* + * RFC 8276 says to specify the full max length of the LISTXATTRS + * XDR reply. Count is set to the XDR length of the names array + * plus the EOF marker. So, add the cookie and the names count. + */ + *p = cpu_to_be32(arg->count + 8 + 4); +} + +static int decode_listxattrs(struct xdr_stream *xdr, + struct nfs42_listxattrsres *res) +{ + int status; + __be32 *p; + u32 count, len, ulen; + size_t left, copied; + char *buf; + + status = decode_op_hdr(xdr, OP_LISTXATTRS); + if (status) { + /* + * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL + * should be translated to ERANGE. + */ + if (status == -ETOOSMALL) + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + return -EIO; + + xdr_decode_hyper(p, &res->cookie); + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + left = res->xattr_len; + buf = res->xattr_buf; + + count = be32_to_cpup(p); + copied = 0; + + /* + * We have asked for enough room to encode the maximum number + * of possible attribute names, so everything should fit. + * + * But, don't rely on that assumption. Just decode entries + * until they don't fit anymore, just in case the server did + * something odd. + */ + while (count--) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EIO; + + ulen = len + XATTR_USER_PREFIX_LEN + 1; + if (buf) { + if (ulen > left) { + status = -ERANGE; + goto out; + } + + memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); + + buf[ulen - 1] = 0; + buf += ulen; + left -= ulen; + } + copied += ulen; + } + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + res->eof = be32_to_cpup(p); + res->copied = copied; + +out: + if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) + status = -E2BIG; + + return status; +} + /* * Encode ALLOCATE request */ @@ -1062,4 +1264,166 @@ out: return status; } +#ifdef CONFIG_NFS_V4_2 +static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_setxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setxattr(xdr, args, &hdr); + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + void *data) +{ + struct nfs42_setxattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, req); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + + status = decode_setxattr(xdr, &res->cinfo); +out: + return status; +} + +static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_getxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + size_t plen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getxattr(xdr, args->xattr_name, &hdr); + + plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, + hdr.replen); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_getxattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_getxattr(xdr, res, rqstp); +out: + return status; +} + +static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_listxattrsargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_listxattrs(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, + hdr.replen); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_listxattrsres *res = data; + struct compound_hdr hdr; + int status; + + xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_listxattrs(xdr, res); +out: + return status; +} + +static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_removexattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_removexattr(xdr, args->xattr_name, &hdr); + encode_nops(&hdr); +} + +static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, void *data) +{ + struct nfs42_removexattrres *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, req); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + + status = decode_removexattr(xdr, &res->cinfo); +out: + return status; +} +#endif #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9e1b07640e9a..388ac520b104 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7481,6 +7481,8 @@ static struct { { NFS4ERR_SYMLINK, -ELOOP }, { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, { NFS4ERR_DEADLOCK, -EDEADLK }, + { NFS4ERR_NOXATTR, -ENODATA }, + { NFS4ERR_XATTR2BIG, -E2BIG }, { -1, -EIO } }; @@ -7609,6 +7611,10 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), + PROC42(GETXATTR, enc_getxattr, dec_getxattr), + PROC42(SETXATTR, enc_setxattr, dec_setxattr), + PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), + PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dee9b1cfa972..9408f3252c8e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1498,7 +1498,64 @@ struct nfs42_seek_res { u32 sr_eof; u64 sr_offset; }; -#endif + +struct nfs42_setxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + u32 xattr_flags; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_setxattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +struct nfs42_getxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_getxattrres { + struct nfs4_sequence_res seq_res; + size_t xattr_len; +}; + +struct nfs42_listxattrsargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + u32 count; + u64 cookie; + struct page **xattr_pages; +}; + +struct nfs42_listxattrsres { + struct nfs4_sequence_res seq_res; + struct page *scratch; + void *xattr_buf; + size_t xattr_len; + u64 cookie; + bool eof; + size_t copied; +}; + +struct nfs42_removexattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; +}; + +struct nfs42_removexattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +#endif /* CONFIG_NFS_V4_2 */ struct nfs_page; -- cgit From d2ae4f8b21c111bb795c557588d89dccd005828d Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:57 +0000 Subject: nfs: define nfs_access_get_cached function The only consumer of nfs_access_get_cached_rcu and nfs_access_cached calls these static functions in order to first try RCU access, and then locked access. Combine them in to a single function, and call that. Make this function available to the rest of the NFS code. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 20 ++++++++++++++++---- include/linux/nfs_fs.h | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5a331da5f55a..f04fc0f7843b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2460,7 +2460,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } -static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2533,6 +2533,20 @@ out: return err; } +int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct +nfs_access_entry *res, bool may_block) +{ + int status; + + status = nfs_access_get_cached_rcu(inode, cred, res); + if (status != 0) + status = nfs_access_get_cached_locked(inode, cred, res, + may_block); + + return status; +} +EXPORT_SYMBOL_GPL(nfs_access_get_cached); + static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2647,9 +2661,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached_rcu(inode, cred, &cache); - if (status != 0) - status = nfs_access_get_cached(inode, cred, &cache, may_block); + status = nfs_access_get_cached(inode, cred, &cache, may_block); if (status == 0) goto out_cached; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b743988fcbd0..714b577dce19 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -493,6 +493,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, + bool may_block); /* * linux/fs/nfs/symlink.c -- cgit From 72832a2453d9ca752beedb3a4fb2fc82e375c46c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:58 +0000 Subject: NFSv4.2: query the extended attribute access bits RFC 8276 defines separate ACCESS bits for extended attribute checking. Query them in nfs_do_access and opendata. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ fs/nfs/nfs4proc.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f04fc0f7843b..a12f42e7d8c7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2673,6 +2673,10 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) * Determine which access bits we want to ask for... */ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; + if (nfs_server_capable(inode, NFS_CAP_XATTR)) { + cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE | + NFS_ACCESS_XALIST; + } if (S_ISDIR(inode->i_mode)) cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; else diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 64e081459327..c88351f8b18d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1341,6 +1341,12 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; +#ifdef CONFIG_NFS_V4_2 + if (server->caps & NFS_CAP_XATTR) + p->o_arg.access |= NFS4_ACCESS_XAREAD | + NFS4_ACCESS_XAWRITE | + NFS4_ACCESS_XALIST; +#endif } } p->o_arg.clientid = server->nfs_client->cl_clientid; -- cgit From 1b523ca972edba2025e45080b08385928a29aa30 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:59 +0000 Subject: nfs: modify update_changeattr to deal with regular files Until now, change attributes in change_info form were only returned by directory operations. However, they are also used for the RFC 8276 extended attribute operations, which work on both directories and regular files. Modify update_changeattr to deal: * Rename it to nfs4_update_changeattr and make it non-static. * Don't always use INO_INVALID_DATA, this isn't needed for a directory that only had its extended attributes changed by us. * Existing callers now always pass in INO_INVALID_DATA. For the current callers of this function, behavior is unchanged. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++++ fs/nfs/nfs4proc.c | 70 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 526b3e70d57c..c16ec860b8b3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -324,6 +324,11 @@ extern int update_open_stateid(struct nfs4_state *state, extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); +extern void nfs4_update_changeattr(struct inode *dir, + struct nfs4_change_info *cinfo, + unsigned long timestamp, + unsigned long cache_validity); + #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c88351f8b18d..6540071cb228 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1158,37 +1158,48 @@ nfs4_dec_nlink_locked(struct inode *inode) } static void -update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo, +nfs4_update_changeattr_locked(struct inode *inode, + struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity) { - struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME - | NFS_INO_INVALID_DATA | cache_validity; - if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { + + if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; } else { - nfs_force_lookup_revalidate(dir); - if (cinfo->before != inode_peek_iversion_raw(dir)) + if (S_ISDIR(inode->i_mode)) { + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_force_lookup_revalidate(inode); + } else { + if (!NFS_PROTO(inode)->have_delegation(inode, + FMODE_READ)) + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + } + + if (cinfo->before != inode_peek_iversion_raw(inode)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + NFS_INO_INVALID_ACL; } - inode_set_iversion_raw(dir, cinfo->after); + inode_set_iversion_raw(inode, cinfo->after); nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; - nfs_fscache_invalidate(dir); + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); } -static void -update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, +void +nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, cinfo, timestamp, cache_validity); + nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity); spin_unlock(&dir->i_lock); } @@ -2644,8 +2655,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, data->file_created = true; if (data->file_created || inode_peek_iversion_raw(dir) != o_res->cinfo.after) - update_changeattr(dir, &o_res->cinfo, - o_res->f_attr->time_start, 0); + nfs4_update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start, + NFS_INO_INVALID_DATA); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; @@ -4531,7 +4543,8 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype) status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); if (status == 0) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, &res.cinfo, timestamp, 0); + nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp, + NFS_INO_INVALID_DATA); /* Removing a directory decrements nlink in the parent */ if (ftype == NF4DIR && dir->i_nlink > 2) nfs4_dec_nlink_locked(dir); @@ -4615,8 +4628,9 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) &data->timeout) == -EAGAIN) return 0; if (task->tk_status == 0) - update_changeattr(dir, &res->cinfo, - res->dir_attr->time_start, 0); + nfs4_update_changeattr(dir, &res->cinfo, + res->dir_attr->time_start, + NFS_INO_INVALID_DATA); return 1; } @@ -4660,16 +4674,18 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, if (task->tk_status == 0) { if (new_dir != old_dir) { /* Note: If we moved a directory, nlink will change */ - update_changeattr(old_dir, &res->old_cinfo, + nfs4_update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start, - NFS_INO_INVALID_OTHER); - update_changeattr(new_dir, &res->new_cinfo, + NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_DATA); + nfs4_update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start, - NFS_INO_INVALID_OTHER); + NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_DATA); } else - update_changeattr(old_dir, &res->old_cinfo, + nfs4_update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start, - 0); + NFS_INO_INVALID_DATA); } return 1; } @@ -4710,7 +4726,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0); + nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, + NFS_INO_INVALID_DATA); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) nfs_setsecurity(inode, res.fattr, res.label); @@ -4788,8 +4805,9 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { spin_lock(&dir->i_lock); - update_changeattr_locked(dir, &data->res.dir_cinfo, - data->res.fattr->time_start, 0); + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, + data->res.fattr->time_start, + NFS_INO_INVALID_DATA); /* Creating a directory bumps nlink in the parent */ if (data->arg.ftype == NF4DIR) nfs4_inc_nlink_locked(dir); -- cgit From 0f44da51aeef9c974ea744c0d9e24d54eec4e94c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:00 +0000 Subject: nfs: define and use the NFS_INO_INVALID_XATTR flag Define the NFS_INO_INVALID_XATTR flag, to be used for the NFSv4.2 xattr cache, and use it where appropriate. No functional change as yet. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++++++- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfstrace.h | 3 ++- include/linux/nfs_fs.h | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0bf1f835de01..629af798dfc9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -204,7 +204,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) flags &= ~NFS_INO_INVALID_OTHER; flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_XATTR); } if (inode->i_mapping->nrpages == 0) @@ -542,6 +543,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + if (nfs_server_capable(inode, NFS_CAP_XATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1375,6 +1378,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + else if (nfs_server_capable(inode, NFS_CAP_XATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ ts = inode->i_ctime; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6540071cb228..0d123fe0a423 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1183,7 +1183,8 @@ nfs4_update_changeattr_locked(struct inode *inode, if (cinfo->before != inode_peek_iversion_raw(inode)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR; } inode_set_iversion_raw(inode, cinfo->after); nfsi->read_cache_jiffies = timestamp; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 547cec79899f..5a59dcdce0b2 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -59,7 +59,8 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ - { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ + { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }) TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); TRACE_DEFINE_ENUM(NFS_INO_STALE); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 714b577dce19..943ee750d68c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -234,6 +234,7 @@ struct nfs4_copy_state { #define NFS_INO_DATA_INVAL_DEFER \ BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ +#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ -- cgit From ccde1e9c0130b4182ae91adac3908f6f3277580a Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:01 +0000 Subject: nfs: make the buf_to_pages_noslab function available to the nfs code Make the buf_to_pages_noslab function available to the rest of the NFS code. Rename it to nfs4_buf_to_pages_noslab to be consistent. This will be used later in the NFSv4.2 xattr code. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c16ec860b8b3..2fa9e4ea98d2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -328,6 +328,8 @@ extern void nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, unsigned long timestamp, unsigned long cache_validity); +extern int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, + struct page **pages); #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d123fe0a423..0fbd2925a828 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5541,7 +5541,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) -static int buf_to_pages_noslab(const void *buf, size_t buflen, +int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages) { struct page *newpage, **spages; @@ -5783,7 +5783,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); + i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_make_writeable(inode); -- cgit From c10a75145febb588d96c6adda6eb5ae2338e4e32 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:02 +0000 Subject: NFSv4.2: add the extended attribute proc functions. Implement the extended attribute procedures for NFSv4.2 extended attribute support (RFC 8276). Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 8 ++ fs/nfs/nfs42proc.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 51de8ddc7d88..0fe5aacbcfdf 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -39,6 +39,14 @@ static inline bool nfs42_files_from_same_server(struct file *in, c_out->cl_serverowner); } +ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen); +int nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags); +ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp); +int nfs42_proc_removexattr(struct inode *inode, const char *name); + /* * Maximum XDR buffer size needed for a listxattr buffer of buflen size. * diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index e2ae54b35dfe..8c2e52bc986a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1088,3 +1088,239 @@ out_put_src_lock: nfs_put_lock_context(src_lock); return err; } + +#define NFS4XATTR_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) + +static int _nfs42_proc_removexattr(struct inode *inode, const char *name) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs42_removexattrargs args = { + .fh = NFS_FH(inode), + .xattr_name = name, + }; + struct nfs42_removexattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int ret; + unsigned long timestamp = jiffies; + + ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 1); + if (!ret) + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + + return ret; +} + +static int _nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4XATTR_MAXPAGES]; + struct nfs42_setxattrargs arg = { + .fh = NFS_FH(inode), + .xattr_pages = pages, + .xattr_len = buflen, + .xattr_name = name, + .xattr_flags = flags, + }; + struct nfs42_setxattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret, np; + unsigned long timestamp = jiffies; + + if (buflen > server->sxasize) + return -ERANGE; + + if (buflen > 0) { + np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages); + if (np < 0) + return np; + } else + np = 0; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 1); + + for (; np > 0; np--) + put_page(pages[np - 1]); + + if (!ret) + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + + return ret; +} + +static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4XATTR_MAXPAGES] = {}; + struct nfs42_getxattrargs arg = { + .fh = NFS_FH(inode), + .xattr_pages = pages, + .xattr_len = buflen, + .xattr_name = name, + }; + struct nfs42_getxattrres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETXATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret, np; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 0); + if (ret < 0) + return ret; + + if (buflen) { + if (res.xattr_len > buflen) + return -ERANGE; + _copy_from_pages(buf, pages, 0, res.xattr_len); + } + + np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE); + while (--np >= 0) + __free_page(pages[np]); + + return res.xattr_len; +} + +static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page **pages; + struct nfs42_listxattrsargs arg = { + .fh = NFS_FH(inode), + .cookie = *cookiep, + }; + struct nfs42_listxattrsres res = { + .eof = false, + .xattr_buf = buf, + .xattr_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LISTXATTRS], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + u32 xdrlen; + int ret, np; + + + res.scratch = alloc_page(GFP_KERNEL); + if (!res.scratch) + return -ENOMEM; + + xdrlen = nfs42_listxattr_xdrsize(buflen); + if (xdrlen > server->lxasize) + xdrlen = server->lxasize; + np = xdrlen / PAGE_SIZE + 1; + + pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + __free_page(res.scratch); + return -ENOMEM; + } + + arg.xattr_pages = pages; + arg.count = xdrlen; + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, + &res.seq_res, 0); + + if (ret >= 0) { + ret = res.copied; + *cookiep = res.cookie; + *eofp = res.eof; + } + + while (--np >= 0) { + if (pages[np]) + __free_page(pages[np]); + } + + __free_page(res.scratch); + kfree(pages); + + return ret; + +} + +ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t err; + + do { + err = _nfs42_proc_getxattr(inode, name, buf, buflen); + if (err >= 0) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_setxattr(struct inode *inode, const char *name, + const void *buf, size_t buflen, int flags) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_setxattr(inode, name, buf, buflen, flags); + if (!err) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, + size_t buflen, u64 *cookiep, bool *eofp) +{ + struct nfs4_exception exception = { }; + ssize_t err; + + do { + err = _nfs42_proc_listxattrs(inode, buf, buflen, + cookiep, eofp); + if (err >= 0) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_removexattr(struct inode *inode, const char *name) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_removexattr(inode, name); + if (!err) + break; + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + + return err; +} -- cgit From 012a211abd5db098094ce429de5f046368391e68 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:03 +0000 Subject: NFSv4.2: hook in the user extended attribute handlers Now that all the lower level code is there to make the RPC calls, hook it in to the xattr handlers and the listxattr entry point, to make them available. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0fbd2925a828..92a07956f07b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -66,6 +66,7 @@ #include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" +#include "nfs42.h" #include "nfs4trace.h" @@ -7440,6 +7441,103 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) #endif +#ifdef CONFIG_NFS_V4_2 +static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + /* + * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA* + * flags right now. Handling of xattr operations use the normal + * file read/write permissions. + * + * Just in case the server has other ideas (which RFC 8276 allows), + * do a cached access check for the XA* flags to possibly avoid + * doing an RPC and getting EACCES back. + */ + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XAWRITE)) + return -EACCES; + } + + if (buf == NULL) + return nfs42_proc_removexattr(inode, key); + else + return nfs42_proc_setxattr(inode, key, buf, buflen, flags); +} + +static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XAREAD)) + return -EACCES; + } + + return nfs42_proc_getxattr(inode, key, buf, buflen); +} + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + u64 cookie; + bool eof; + int ret, size; + char *buf; + size_t buflen; + struct nfs_access_entry cache; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return 0; + + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { + if (!(cache.mask & NFS_ACCESS_XALIST)) + return 0; + } + + cookie = 0; + eof = false; + buflen = list_len ? list_len : XATTR_LIST_MAX; + buf = list_len ? list : NULL; + size = 0; + + while (!eof) { + ret = nfs42_proc_listxattrs(inode, buf, buflen, + &cookie, &eof); + if (ret < 0) + return ret; + + if (list_len) { + buf += ret; + buflen -= ret; + } + size += ret; + } + + return size; +} + +#else + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_2 */ + /* * nfs_fhget will use either the mounted_on_fileid or the fileid */ @@ -10045,7 +10143,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { - ssize_t error, error2; + ssize_t error, error2, error3; error = generic_listxattr(dentry, list, size); if (error < 0) @@ -10058,7 +10156,17 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); if (error2 < 0) return error2; - return error + error2; + + if (list) { + list += error2; + size -= error2; + } + + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); + if (error3 < 0) + return error3; + + return error + error2 + error3; } static const struct inode_operations nfs4_dir_inode_operations = { @@ -10146,10 +10254,21 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { .set = nfs4_xattr_set_nfs4_acl, }; +#ifdef CONFIG_NFS_V4_2 +static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = nfs4_xattr_get_nfs4_user, + .set = nfs4_xattr_set_nfs4_user, +}; +#endif + const struct xattr_handler *nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, #ifdef CONFIG_NFS_V4_SECURITY_LABEL &nfs4_xattr_nfs4_label_handler, +#endif +#ifdef CONFIG_NFS_V4_2 + &nfs4_xattr_nfs4_user_handler, #endif NULL }; -- cgit From 95ad37f90c338e3fd4abf61cecfe02b6f3e080f0 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:04 +0000 Subject: NFSv4.2: add client side xattr caching. Implement client side caching for NFSv4.2 extended attributes. The cache is a per-inode hashtable, with name/value entries. There is one special entry for the listxattr cache. NFS inodes have a pointer to a cache structure. The cache structure is allocated on demand, freed when the cache is invalidated. Memory shrinkers keep the size in check. Large entries (> PAGE_SIZE) are collected by a separate shrinker, and freed more aggressively than others. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/inode.c | 9 +- fs/nfs/nfs42proc.c | 12 + fs/nfs/nfs42xattr.c | 1083 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 22 + fs/nfs/nfs4proc.c | 42 +- fs/nfs/nfs4super.c | 10 + include/linux/nfs_fs.h | 6 + include/uapi/linux/nfs_fs.h | 1 + 9 files changed, 1179 insertions(+), 8 deletions(-) create mode 100644 fs/nfs/nfs42xattr.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 2433c3e03cfa..22d11fdc6deb 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -30,7 +30,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o -nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o +nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o nfs42xattr.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 629af798dfc9..10048eed485d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -193,6 +193,7 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) return nfs_check_cache_invalid_not_delegated(inode, flags); } +EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { @@ -234,11 +235,13 @@ static void nfs_zap_caches_locked(struct inode *inode) | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR | NFS_INO_REVAL_PAGECACHE); } else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR | NFS_INO_REVAL_PAGECACHE); nfs_zap_label_cache_locked(nfsi); } @@ -1897,7 +1900,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(have_writers || have_delegation)) { invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR; /* Force revalidate of all attributes */ save_cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME @@ -2100,6 +2104,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb) #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ +#ifdef CONFIG_NFS_V4_2 + nfsi->xattr_cache = NULL; +#endif return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 8c2e52bc986a..e200522469af 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1182,6 +1182,18 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, if (ret < 0) return ret; + /* + * Normally, the caching is done one layer up, but for successful + * RPCS, always cache the result here, even if the caller was + * just querying the length, or if the reply was too big for + * the caller. This avoids a second RPC in the case of the + * common query-alloc-retrieve cycle for xattrs. + * + * Note that xattr_len is always capped to XATTR_SIZE_MAX. + */ + + nfs4_xattr_cache_add(inode, name, NULL, pages, res.xattr_len); + if (buflen) { if (res.xattr_len > buflen) return -ERANGE; diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c new file mode 100644 index 000000000000..23fdab977a2a --- /dev/null +++ b/fs/nfs/nfs42xattr.c @@ -0,0 +1,1083 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * User extended attribute client side cache functions. + * + * Author: Frank van der Linden + */ +#include +#include +#include +#include +#include + +#include "nfs4_fs.h" +#include "internal.h" + +/* + * User extended attributes client side caching is implemented by having + * a cache structure attached to NFS inodes. This structure is allocated + * when needed, and freed when the cache is zapped. + * + * The cache structure contains as hash table of entries, and a pointer + * to a special-cased entry for the listxattr cache. + * + * Accessing and allocating / freeing the caches is done via reference + * counting. The cache entries use a similar refcounting scheme. + * + * This makes freeing a cache, both from the shrinker and from the + * zap cache path, easy. It also means that, in current use cases, + * the large majority of inodes will not waste any memory, as they + * will never have any user extended attributes assigned to them. + * + * Attribute entries are hashed in to a simple hash table. They are + * also part of an LRU. + * + * There are three shrinkers. + * + * Two shrinkers deal with the cache entries themselves: one for + * large entries (> PAGE_SIZE), and one for smaller entries. The + * shrinker for the larger entries works more aggressively than + * those for the smaller entries. + * + * The other shrinker frees the cache structures themselves. + */ + +/* + * 64 buckets is a good default. There is likely no reasonable + * workload that uses more than even 64 user extended attributes. + * You can certainly add a lot more - but you get what you ask for + * in those circumstances. + */ +#define NFS4_XATTR_HASH_SIZE 64 + +#define NFSDBG_FACILITY NFSDBG_XATTRCACHE + +struct nfs4_xattr_cache; +struct nfs4_xattr_entry; + +struct nfs4_xattr_bucket { + spinlock_t lock; + struct hlist_head hlist; + struct nfs4_xattr_cache *cache; + bool draining; +}; + +struct nfs4_xattr_cache { + struct kref ref; + spinlock_t hash_lock; /* protects hashtable and lru */ + struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; + struct list_head lru; + struct list_head dispose; + atomic_long_t nent; + spinlock_t listxattr_lock; + struct inode *inode; + struct nfs4_xattr_entry *listxattr; + struct work_struct work; +}; + +struct nfs4_xattr_entry { + struct kref ref; + struct hlist_node hnode; + struct list_head lru; + struct list_head dispose; + char *xattr_name; + void *xattr_value; + size_t xattr_size; + struct nfs4_xattr_bucket *bucket; + uint32_t flags; +}; + +#define NFS4_XATTR_ENTRY_EXTVAL 0x0001 + +/* + * LRU list of NFS inodes that have xattr caches. + */ +static struct list_lru nfs4_xattr_cache_lru; +static struct list_lru nfs4_xattr_entry_lru; +static struct list_lru nfs4_xattr_large_entry_lru; + +static struct kmem_cache *nfs4_xattr_cache_cachep; + +static struct workqueue_struct *nfs4_xattr_cache_wq; + +/* + * Hashing helper functions. + */ +static void +nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) +{ + unsigned int i; + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&cache->buckets[i].hlist); + spin_lock_init(&cache->buckets[i].lock); + cache->buckets[i].cache = cache; + cache->buckets[i].draining = false; + } +} + +/* + * Locking order: + * 1. inode i_lock or bucket lock + * 2. list_lru lock (taken by list_lru_* functions) + */ + +/* + * Wrapper functions to add a cache entry to the right LRU. + */ +static bool +nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) +{ + struct list_lru *lru; + + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + return list_lru_add(lru, &entry->lru); +} + +static bool +nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) +{ + struct list_lru *lru; + + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + return list_lru_del(lru, &entry->lru); +} + +/* + * This function allocates cache entries. They are the normal + * extended attribute name/value pairs, but may also be a listxattr + * cache. Those allocations use the same entry so that they can be + * treated as one by the memory shrinker. + * + * xattr cache entries are allocated together with names. If the + * value fits in to one page with the entry structure and the name, + * it will also be part of the same allocation (kmalloc). This is + * expected to be the vast majority of cases. Larger allocations + * have a value pointer that is allocated separately by kvmalloc. + * + * Parameters: + * + * @name: Name of the extended attribute. NULL for listxattr cache + * entry. + * @value: Value of attribute, or listxattr cache. NULL if the + * value is to be copied from pages instead. + * @pages: Pages to copy the value from, if not NULL. Passed in to + * make it easier to copy the value after an RPC, even if + * the value will not be passed up to application (e.g. + * for a 'query' getxattr with NULL buffer). + * @len: Length of the value. Can be 0 for zero-length attribues. + * @value and @pages will be NULL if @len is 0. + */ +static struct nfs4_xattr_entry * +nfs4_xattr_alloc_entry(const char *name, const void *value, + struct page **pages, size_t len) +{ + struct nfs4_xattr_entry *entry; + void *valp; + char *namep; + size_t alloclen, slen; + char *buf; + uint32_t flags; + + BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + + XATTR_NAME_MAX + 1 > PAGE_SIZE); + + alloclen = sizeof(struct nfs4_xattr_entry); + if (name != NULL) { + slen = strlen(name) + 1; + alloclen += slen; + } else + slen = 0; + + if (alloclen + len <= PAGE_SIZE) { + alloclen += len; + flags = 0; + } else { + flags = NFS4_XATTR_ENTRY_EXTVAL; + } + + buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (buf == NULL) + return NULL; + entry = (struct nfs4_xattr_entry *)buf; + + if (name != NULL) { + namep = buf + sizeof(struct nfs4_xattr_entry); + memcpy(namep, name, slen); + } else { + namep = NULL; + } + + + if (flags & NFS4_XATTR_ENTRY_EXTVAL) { + valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (valp == NULL) { + kfree(buf); + return NULL; + } + } else if (len != 0) { + valp = buf + sizeof(struct nfs4_xattr_entry) + slen; + } else + valp = NULL; + + if (valp != NULL) { + if (value != NULL) + memcpy(valp, value, len); + else + _copy_from_pages(valp, pages, 0, len); + } + + entry->flags = flags; + entry->xattr_value = valp; + kref_init(&entry->ref); + entry->xattr_name = namep; + entry->xattr_size = len; + entry->bucket = NULL; + INIT_LIST_HEAD(&entry->lru); + INIT_LIST_HEAD(&entry->dispose); + INIT_HLIST_NODE(&entry->hnode); + + return entry; +} + +static void +nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) +{ + if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) + kvfree(entry->xattr_value); + kfree(entry); +} + +static void +nfs4_xattr_free_entry_cb(struct kref *kref) +{ + struct nfs4_xattr_entry *entry; + + entry = container_of(kref, struct nfs4_xattr_entry, ref); + + if (WARN_ON(!list_empty(&entry->lru))) + return; + + nfs4_xattr_free_entry(entry); +} + +static void +nfs4_xattr_free_cache_cb(struct kref *kref) +{ + struct nfs4_xattr_cache *cache; + int i; + + cache = container_of(kref, struct nfs4_xattr_cache, ref); + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) + return; + cache->buckets[i].draining = false; + } + + cache->listxattr = NULL; + + kmem_cache_free(nfs4_xattr_cache_cachep, cache); + +} + +static struct nfs4_xattr_cache * +nfs4_xattr_alloc_cache(void) +{ + struct nfs4_xattr_cache *cache; + + cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, + GFP_KERNEL_ACCOUNT | GFP_NOFS); + if (cache == NULL) + return NULL; + + kref_init(&cache->ref); + atomic_long_set(&cache->nent, 0); + + return cache; +} + +/* + * Set the listxattr cache, which is a special-cased cache entry. + * The special value ERR_PTR(-ESTALE) is used to indicate that + * the cache is being drained - this prevents a new listxattr + * cache from being added to what is now a stale cache. + */ +static int +nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, + struct nfs4_xattr_entry *new) +{ + struct nfs4_xattr_entry *old; + int ret = 1; + + spin_lock(&cache->listxattr_lock); + + old = cache->listxattr; + + if (old == ERR_PTR(-ESTALE)) { + ret = 0; + goto out; + } + + cache->listxattr = new; + if (new != NULL && new != ERR_PTR(-ESTALE)) + nfs4_xattr_entry_lru_add(new); + + if (old != NULL) { + nfs4_xattr_entry_lru_del(old); + kref_put(&old->ref, nfs4_xattr_free_entry_cb); + } +out: + spin_unlock(&cache->listxattr_lock); + + return ret; +} + +/* + * Unlink a cache from its parent inode, clearing out an invalid + * cache. Must be called with i_lock held. + */ +static struct nfs4_xattr_cache * +nfs4_xattr_cache_unlink(struct inode *inode) +{ + struct nfs_inode *nfsi; + struct nfs4_xattr_cache *oldcache; + + nfsi = NFS_I(inode); + + oldcache = nfsi->xattr_cache; + if (oldcache != NULL) { + list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); + oldcache->inode = NULL; + } + nfsi->xattr_cache = NULL; + nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; + + return oldcache; + +} + +/* + * Discard a cache. Usually called by a worker, since walking all + * the entries can take up some cycles that we don't want to waste + * in the I/O path. Can also be called from the shrinker callback. + * + * The cache is dead, it has already been unlinked from its inode, + * and no longer appears on the cache LRU list. + * + * Mark all buckets as draining, so that no new entries are added. This + * could still happen in the unlikely, but possible case that another + * thread had grabbed a reference before it was unlinked from the inode, + * and is still holding it for an add operation. + * + * Remove all entries from the LRU lists, so that there is no longer + * any way to 'find' this cache. Then, remove the entries from the hash + * table. + * + * At that point, the cache will remain empty and can be freed when the final + * reference drops, which is very likely the kref_put at the end of + * this function, or the one called immediately afterwards in the + * shrinker callback. + */ +static void +nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) +{ + unsigned int i; + struct nfs4_xattr_entry *entry; + struct nfs4_xattr_bucket *bucket; + struct hlist_node *n; + + nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE)); + + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { + bucket = &cache->buckets[i]; + + spin_lock(&bucket->lock); + bucket->draining = true; + hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { + nfs4_xattr_entry_lru_del(entry); + hlist_del_init(&entry->hnode); + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } + spin_unlock(&bucket->lock); + } + + atomic_long_set(&cache->nent, 0); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +static void +nfs4_xattr_discard_cache_worker(struct work_struct *work) +{ + struct nfs4_xattr_cache *cache = container_of(work, + struct nfs4_xattr_cache, work); + + nfs4_xattr_discard_cache(cache); +} + +static void +nfs4_xattr_reap_cache(struct nfs4_xattr_cache *cache) +{ + queue_work(nfs4_xattr_cache_wq, &cache->work); +} + +/* + * Get a referenced copy of the cache structure. Avoid doing allocs + * while holding i_lock. Which means that we do some optimistic allocation, + * and might have to free the result in rare cases. + * + * This function only checks the NFS_INO_INVALID_XATTR cache validity bit + * and acts accordingly, replacing the cache when needed. For the read case + * (!add), this means that the caller must make sure that the cache + * is valid before caling this function. getxattr and listxattr call + * revalidate_inode to do this. The attribute cache timeout (for the + * non-delegated case) is expected to be dealt with in the revalidate + * call. + */ + +static struct nfs4_xattr_cache * +nfs4_xattr_get_cache(struct inode *inode, int add) +{ + struct nfs_inode *nfsi; + struct nfs4_xattr_cache *cache, *oldcache, *newcache; + + nfsi = NFS_I(inode); + + cache = oldcache = NULL; + + spin_lock(&inode->i_lock); + + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) + oldcache = nfs4_xattr_cache_unlink(inode); + else + cache = nfsi->xattr_cache; + + if (cache != NULL) + kref_get(&cache->ref); + + spin_unlock(&inode->i_lock); + + if (add && cache == NULL) { + newcache = NULL; + + cache = nfs4_xattr_alloc_cache(); + if (cache == NULL) + goto out; + + spin_lock(&inode->i_lock); + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { + /* + * The cache was invalidated again. Give up, + * since what we want to enter is now likely + * outdated anyway. + */ + spin_unlock(&inode->i_lock); + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + cache = NULL; + goto out; + } + + /* + * Check if someone beat us to it. + */ + if (nfsi->xattr_cache != NULL) { + newcache = nfsi->xattr_cache; + kref_get(&newcache->ref); + } else { + kref_get(&cache->ref); + nfsi->xattr_cache = cache; + cache->inode = inode; + list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); + } + + spin_unlock(&inode->i_lock); + + /* + * If there was a race, throw away the cache we just + * allocated, and use the new one allocated by someone + * else. + */ + if (newcache != NULL) { + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + cache = newcache; + } + } + +out: + /* + * Discarding an old cache is done via a workqueue. + */ + if (oldcache != NULL) + nfs4_xattr_reap_cache(oldcache); + + return cache; +} + +static inline struct nfs4_xattr_bucket * +nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) +{ + return &cache->buckets[jhash(name, strlen(name), 0) & + (ARRAY_SIZE(cache->buckets) - 1)]; +} + +static struct nfs4_xattr_entry * +nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) +{ + struct nfs4_xattr_entry *entry; + + entry = NULL; + + hlist_for_each_entry(entry, &bucket->hlist, hnode) { + if (!strcmp(entry->xattr_name, name)) + break; + } + + return entry; +} + +static int +nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, + struct nfs4_xattr_entry *entry) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *oldentry = NULL; + int ret = 1; + + bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name); + entry->bucket = bucket; + + spin_lock(&bucket->lock); + + if (bucket->draining) { + ret = 0; + goto out; + } + + oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name); + if (oldentry != NULL) { + hlist_del_init(&oldentry->hnode); + nfs4_xattr_entry_lru_del(oldentry); + } else { + atomic_long_inc(&cache->nent); + } + + hlist_add_head(&entry->hnode, &bucket->hlist); + nfs4_xattr_entry_lru_add(entry); + +out: + spin_unlock(&bucket->lock); + + if (oldentry != NULL) + kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb); + + return ret; +} + +static void +nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *entry; + + bucket = nfs4_xattr_hash_bucket(cache, name); + + spin_lock(&bucket->lock); + + entry = nfs4_xattr_get_entry(bucket, name); + if (entry != NULL) { + hlist_del_init(&entry->hnode); + nfs4_xattr_entry_lru_del(entry); + atomic_long_dec(&cache->nent); + } + + spin_unlock(&bucket->lock); + + if (entry != NULL) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); +} + +static struct nfs4_xattr_entry * +nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) +{ + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_entry *entry; + + bucket = nfs4_xattr_hash_bucket(cache, name); + + spin_lock(&bucket->lock); + + entry = nfs4_xattr_get_entry(bucket, name); + if (entry != NULL) + kref_get(&entry->ref); + + spin_unlock(&bucket->lock); + + return entry; +} + +/* + * Entry point to retrieve an entry from the cache. + */ +ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, + ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + ssize_t ret; + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return -ENOENT; + + ret = 0; + entry = nfs4_xattr_hash_find(cache, name); + + if (entry != NULL) { + dprintk("%s: cache hit '%s', len %lu\n", __func__, + entry->xattr_name, (unsigned long)entry->xattr_size); + if (buflen == 0) { + /* Length probe only */ + ret = entry->xattr_size; + } else if (buflen < entry->xattr_size) + ret = -ERANGE; + else { + memcpy(buf, entry->xattr_value, entry->xattr_size); + ret = entry->xattr_size; + } + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } else { + dprintk("%s: cache miss '%s'\n", __func__, name); + ret = -ENOENT; + } + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + + return ret; +} + +/* + * Retrieve a cached list of xattrs from the cache. + */ +ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + ssize_t ret; + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return -ENOENT; + + spin_lock(&cache->listxattr_lock); + + entry = cache->listxattr; + + if (entry != NULL && entry != ERR_PTR(-ESTALE)) { + if (buflen == 0) { + /* Length probe only */ + ret = entry->xattr_size; + } else if (entry->xattr_size > buflen) + ret = -ERANGE; + else { + memcpy(buf, entry->xattr_value, entry->xattr_size); + ret = entry->xattr_size; + } + } else { + ret = -ENOENT; + } + + spin_unlock(&cache->listxattr_lock); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + + return ret; +} + +/* + * Add an xattr to the cache. + * + * This also invalidates the xattr list cache. + */ +void nfs4_xattr_cache_add(struct inode *inode, const char *name, + const char *buf, struct page **pages, ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + + dprintk("%s: add '%s' len %lu\n", __func__, + name, (unsigned long)buflen); + + cache = nfs4_xattr_get_cache(inode, 1); + if (cache == NULL) + return; + + entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen); + if (entry == NULL) + goto out; + + (void)nfs4_xattr_set_listcache(cache, NULL); + + if (!nfs4_xattr_hash_add(cache, entry)) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + +out: + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + + +/* + * Remove an xattr from the cache. + * + * This also invalidates the xattr list cache. + */ +void nfs4_xattr_cache_remove(struct inode *inode, const char *name) +{ + struct nfs4_xattr_cache *cache; + + dprintk("%s: remove '%s'\n", __func__, name); + + cache = nfs4_xattr_get_cache(inode, 0); + if (cache == NULL) + return; + + (void)nfs4_xattr_set_listcache(cache, NULL); + nfs4_xattr_hash_remove(cache, name); + + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +/* + * Cache listxattr output, replacing any possible old one. + */ +void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, + ssize_t buflen) +{ + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry; + + cache = nfs4_xattr_get_cache(inode, 1); + if (cache == NULL) + return; + + entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen); + if (entry == NULL) + goto out; + + /* + * This is just there to be able to get to bucket->cache, + * which is obviously the same for all buckets, so just + * use bucket 0. + */ + entry->bucket = &cache->buckets[0]; + + if (!nfs4_xattr_set_listcache(cache, entry)) + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + +out: + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); +} + +/* + * Zap the entire cache. Called when an inode is evicted. + */ +void nfs4_xattr_cache_zap(struct inode *inode) +{ + struct nfs4_xattr_cache *oldcache; + + spin_lock(&inode->i_lock); + oldcache = nfs4_xattr_cache_unlink(inode); + spin_unlock(&inode->i_lock); + + if (oldcache) + nfs4_xattr_discard_cache(oldcache); +} + +/* + * The entry LRU is shrunk more aggressively than the cache LRU, + * by settings @seeks to 1. + * + * Cache structures are freed only when they've become empty, after + * pruning all but one entry. + */ + +static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, + struct shrink_control *sc); + +static struct shrinker nfs4_xattr_cache_shrinker = { + .count_objects = nfs4_xattr_cache_count, + .scan_objects = nfs4_xattr_cache_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static struct shrinker nfs4_xattr_entry_shrinker = { + .count_objects = nfs4_xattr_entry_count, + .scan_objects = nfs4_xattr_entry_scan, + .seeks = DEFAULT_SEEKS, + .batch = 512, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static struct shrinker nfs4_xattr_large_entry_shrinker = { + .count_objects = nfs4_xattr_entry_count, + .scan_objects = nfs4_xattr_entry_scan, + .seeks = 1, + .batch = 512, + .flags = SHRINKER_MEMCG_AWARE, +}; + +static enum lru_status +cache_lru_isolate(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) +{ + struct list_head *dispose = arg; + struct inode *inode; + struct nfs4_xattr_cache *cache = container_of(item, + struct nfs4_xattr_cache, lru); + + if (atomic_long_read(&cache->nent) > 1) + return LRU_SKIP; + + /* + * If a cache structure is on the LRU list, we know that + * its inode is valid. Try to lock it to break the link. + * Since we're inverting the lock order here, only try. + */ + inode = cache->inode; + + if (!spin_trylock(&inode->i_lock)) + return LRU_SKIP; + + kref_get(&cache->ref); + + cache->inode = NULL; + NFS_I(inode)->xattr_cache = NULL; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; + list_lru_isolate(lru, &cache->lru); + + spin_unlock(&inode->i_lock); + + list_add_tail(&cache->dispose, dispose); + return LRU_REMOVED; +} + +static unsigned long +nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + struct nfs4_xattr_cache *cache; + + freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc, + cache_lru_isolate, &dispose); + while (!list_empty(&dispose)) { + cache = list_first_entry(&dispose, struct nfs4_xattr_cache, + dispose); + list_del_init(&cache->dispose); + nfs4_xattr_discard_cache(cache); + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); + } + + return freed; +} + + +static unsigned long +nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long count; + + count = list_lru_count(&nfs4_xattr_cache_lru); + return vfs_pressure_ratio(count); +} + +static enum lru_status +entry_lru_isolate(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) +{ + struct list_head *dispose = arg; + struct nfs4_xattr_bucket *bucket; + struct nfs4_xattr_cache *cache; + struct nfs4_xattr_entry *entry = container_of(item, + struct nfs4_xattr_entry, lru); + + bucket = entry->bucket; + cache = bucket->cache; + + /* + * Unhook the entry from its parent (either a cache bucket + * or a cache structure if it's a listxattr buf), so that + * it's no longer found. Then add it to the isolate list, + * to be freed later. + * + * In both cases, we're reverting lock order, so use + * trylock and skip the entry if we can't get the lock. + */ + if (entry->xattr_name != NULL) { + /* Regular cache entry */ + if (!spin_trylock(&bucket->lock)) + return LRU_SKIP; + + kref_get(&entry->ref); + + hlist_del_init(&entry->hnode); + atomic_long_dec(&cache->nent); + list_lru_isolate(lru, &entry->lru); + + spin_unlock(&bucket->lock); + } else { + /* Listxattr cache entry */ + if (!spin_trylock(&cache->listxattr_lock)) + return LRU_SKIP; + + kref_get(&entry->ref); + + cache->listxattr = NULL; + list_lru_isolate(lru, &entry->lru); + + spin_unlock(&cache->listxattr_lock); + } + + list_add_tail(&entry->dispose, dispose); + return LRU_REMOVED; +} + +static unsigned long +nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + struct nfs4_xattr_entry *entry; + struct list_lru *lru; + + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); + + while (!list_empty(&dispose)) { + entry = list_first_entry(&dispose, struct nfs4_xattr_entry, + dispose); + list_del_init(&entry->dispose); + + /* + * Drop two references: the one that we just grabbed + * in entry_lru_isolate, and the one that was set + * when the entry was first allocated. + */ + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); + } + + return freed; +} + +static unsigned long +nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long count; + struct list_lru *lru; + + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; + + count = list_lru_count(lru); + return vfs_pressure_ratio(count); +} + + +static void nfs4_xattr_cache_init_once(void *p) +{ + struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; + + spin_lock_init(&cache->listxattr_lock); + atomic_long_set(&cache->nent, 0); + nfs4_xattr_hash_init(cache); + cache->listxattr = NULL; + INIT_WORK(&cache->work, nfs4_xattr_discard_cache_worker); + INIT_LIST_HEAD(&cache->lru); + INIT_LIST_HEAD(&cache->dispose); +} + +int __init nfs4_xattr_cache_init(void) +{ + int ret = 0; + + nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", + sizeof(struct nfs4_xattr_cache), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), + nfs4_xattr_cache_init_once); + if (nfs4_xattr_cache_cachep == NULL) + return -ENOMEM; + + ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, + &nfs4_xattr_large_entry_shrinker); + if (ret) + goto out4; + + ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, + &nfs4_xattr_entry_shrinker); + if (ret) + goto out3; + + ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, + &nfs4_xattr_cache_shrinker); + if (ret) + goto out2; + + nfs4_xattr_cache_wq = alloc_workqueue("nfs4_xattr", WQ_MEM_RECLAIM, 0); + if (nfs4_xattr_cache_wq == NULL) + goto out1; + + ret = register_shrinker(&nfs4_xattr_cache_shrinker); + if (ret) + goto out0; + + ret = register_shrinker(&nfs4_xattr_entry_shrinker); + if (ret) + goto out; + + ret = register_shrinker(&nfs4_xattr_large_entry_shrinker); + if (!ret) + return 0; + + unregister_shrinker(&nfs4_xattr_entry_shrinker); +out: + unregister_shrinker(&nfs4_xattr_cache_shrinker); +out0: + destroy_workqueue(nfs4_xattr_cache_wq); +out1: + list_lru_destroy(&nfs4_xattr_cache_lru); +out2: + list_lru_destroy(&nfs4_xattr_entry_lru); +out3: + list_lru_destroy(&nfs4_xattr_large_entry_lru); +out4: + kmem_cache_destroy(nfs4_xattr_cache_cachep); + + return ret; +} + +void nfs4_xattr_cache_exit(void) +{ + unregister_shrinker(&nfs4_xattr_entry_shrinker); + unregister_shrinker(&nfs4_xattr_cache_shrinker); + list_lru_destroy(&nfs4_xattr_entry_lru); + list_lru_destroy(&nfs4_xattr_cache_lru); + kmem_cache_destroy(nfs4_xattr_cache_cachep); + destroy_workqueue(nfs4_xattr_cache_wq); +} diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2fa9e4ea98d2..7e16a586f3fc 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -626,12 +626,34 @@ static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state * nfs4_stateid_match_other(&state->open_stateid, stateid); } +/* nfs42xattr.c */ +#ifdef CONFIG_NFS_V4_2 +extern int __init nfs4_xattr_cache_init(void); +extern void nfs4_xattr_cache_exit(void); +extern void nfs4_xattr_cache_add(struct inode *inode, const char *name, + const char *buf, struct page **pages, + ssize_t buflen); +extern void nfs4_xattr_cache_remove(struct inode *inode, const char *name); +extern ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, + char *buf, ssize_t buflen); +extern void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, + ssize_t buflen); +extern ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, + ssize_t buflen); +extern void nfs4_xattr_cache_zap(struct inode *inode); #else +static inline void nfs4_xattr_cache_zap(struct inode *inode) +{ +} +#endif /* CONFIG_NFS_V4_2 */ + +#else /* CONFIG_NFS_V4 */ #define nfs4_close_state(a, b) do { } while (0) #define nfs4_close_sync(a, b) do { } while (0) #define nfs4_state_protect(a, b, c, d) do { } while (0) #define nfs4_state_protect_write(a, b, c, d) do { } while (0) + #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92a07956f07b..f670ff64b31e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7448,6 +7448,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, size_t buflen, int flags) { struct nfs_access_entry cache; + int ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; @@ -7466,10 +7467,17 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, return -EACCES; } - if (buf == NULL) - return nfs42_proc_removexattr(inode, key); - else - return nfs42_proc_setxattr(inode, key, buf, buflen, flags); + if (buf == NULL) { + ret = nfs42_proc_removexattr(inode, key); + if (!ret) + nfs4_xattr_cache_remove(inode, key); + } else { + ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags); + if (!ret) + nfs4_xattr_cache_add(inode, key, buf, NULL, buflen); + } + + return ret; } static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, @@ -7477,6 +7485,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, const char *key, void *buf, size_t buflen) { struct nfs_access_entry cache; + ssize_t ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; @@ -7486,7 +7495,17 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, return -EACCES; } - return nfs42_proc_getxattr(inode, key, buf, buflen); + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret) + return ret; + + ret = nfs4_xattr_cache_get(inode, key, buf, buflen); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + + ret = nfs42_proc_getxattr(inode, key, buf, buflen); + + return ret; } static ssize_t @@ -7494,7 +7513,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) { u64 cookie; bool eof; - int ret, size; + ssize_t ret, size; char *buf; size_t buflen; struct nfs_access_entry cache; @@ -7507,6 +7526,14 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) return 0; } + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret) + return ret; + + ret = nfs4_xattr_cache_list(inode, list, list_len); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + cookie = 0; eof = false; buflen = list_len ? list_len : XATTR_LIST_MAX; @@ -7526,6 +7553,9 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) size += ret; } + if (list_len) + nfs4_xattr_cache_set_list(inode, list, size); + return size; } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1475f932d7da..0c1ab846b83d 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -69,6 +69,7 @@ static void nfs4_evict_inode(struct inode *inode) pnfs_destroy_layout(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); + nfs4_xattr_cache_zap(inode); } struct nfs_referral_count { @@ -268,6 +269,12 @@ static int __init init_nfs_v4(void) if (err) goto out1; +#ifdef CONFIG_NFS_V4_2 + err = nfs4_xattr_cache_init(); + if (err) + goto out2; +#endif + err = nfs4_register_sysctl(); if (err) goto out2; @@ -288,6 +295,9 @@ static void __exit exit_nfs_v4(void) nfs4_pnfs_v3_ds_connect_unload(); unregister_nfs_version(&nfs_v4); +#ifdef CONFIG_NFS_V4_2 + nfs4_xattr_cache_exit(); +#endif nfs4_unregister_sysctl(); nfs_idmap_quit(); nfs_dns_resolver_destroy(); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 943ee750d68c..a2c6455ea3fa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -102,6 +102,8 @@ struct nfs_delegation; struct posix_acl; +struct nfs4_xattr_cache; + /* * nfs fs inode data in memory */ @@ -188,6 +190,10 @@ struct nfs_inode { struct fscache_cookie *fscache; #endif struct inode vfs_inode; + +#ifdef CONFIG_NFS_V4_2 + struct nfs4_xattr_cache *xattr_cache; +#endif }; struct nfs4_copy_state { diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 7bcc8cd6831d..3afe3767c55d 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -56,6 +56,7 @@ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 +#define NFSDBG_XATTRCACHE 0x8000 #define NFSDBG_ALL 0xFFFF -- cgit From 46b5780688c0d825b6b8d49b267b13102bea512d Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Fri, 6 Mar 2020 16:55:13 +0800 Subject: dt-bindings: mfd: Convert ChromeOS EC bindings to json-schema Convert the ChromeOS EC bindings to json-schema. Signed-off-by: Ikjoon Jang Reviewed-by: Enric Balletbo i Serra Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/cros-ec.txt | 76 ------------ .../devicetree/bindings/mfd/google,cros-ec.yaml | 129 +++++++++++++++++++++ 2 files changed, 129 insertions(+), 76 deletions(-) delete mode 100644 Documentation/devicetree/bindings/mfd/cros-ec.txt create mode 100644 Documentation/devicetree/bindings/mfd/google,cros-ec.yaml diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt deleted file mode 100644 index 4860eabd0f72..000000000000 --- a/Documentation/devicetree/bindings/mfd/cros-ec.txt +++ /dev/null @@ -1,76 +0,0 @@ -ChromeOS Embedded Controller - -Google's ChromeOS EC is a Cortex-M device which talks to the AP and -implements various function such as keyboard and battery charging. - -The EC can be connect through various means (I2C, SPI, LPC, RPMSG) and the -compatible string used depends on the interface. Each connection method has -its own driver which connects to the top level interface-agnostic EC driver. -Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to -the top-level driver. - -Required properties (I2C): -- compatible: "google,cros-ec-i2c" -- reg: I2C slave address - -Required properties (SPI): -- compatible: "google,cros-ec-spi" -- reg: SPI chip select - -Required properties (RPMSG): -- compatible: "google,cros-ec-rpmsg" - -Optional properties (SPI): -- google,cros-ec-spi-pre-delay: Some implementations of the EC need a little - time to wake up from sleep before they can receive SPI transfers at a high - clock rate. This property specifies the delay, in usecs, between the - assertion of the CS to the start of the first clock pulse. -- google,cros-ec-spi-msg-delay: Some implementations of the EC require some - additional processing time in order to accept new transactions. If the delay - between transactions is not long enough the EC may not be able to respond - properly to subsequent transactions and cause them to hang. This property - specifies the delay, in usecs, introduced between transactions to account - for the time required by the EC to get back into a state in which new data - can be accepted. - -Required properties (LPC): -- compatible: "google,cros-ec-lpc" -- reg: List of (IO address, size) pairs defining the interface uses - -Optional properties (all): -- google,has-vbc-nvram: Some implementations of the EC include a small - nvram space used to store verified boot context data. This boolean flag - is used to specify whether this nvram is present or not. - -Example for I2C: - -i2c@12ca0000 { - cros-ec@1e { - reg = <0x1e>; - compatible = "google,cros-ec-i2c"; - interrupts = <14 0>; - interrupt-parent = <&wakeup_eint>; - wakeup-source; - }; - - -Example for SPI: - -spi@131b0000 { - ec@0 { - compatible = "google,cros-ec-spi"; - reg = <0x0>; - interrupts = <14 0>; - interrupt-parent = <&wakeup_eint>; - wakeup-source; - spi-max-frequency = <5000000>; - controller-data { - cs-gpio = <&gpf0 3 4 3 0>; - samsung,spi-cs; - samsung,spi-feedback-delay = <2>; - }; - }; -}; - - -Example for LPC is not supplied as it is not yet implemented. diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml new file mode 100644 index 000000000000..6a7279a85ec1 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/google,cros-ec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ChromeOS Embedded Controller + +maintainers: + - Benson Leung + - Enric Balletbo i Serra + - Guenter Roeck + +description: + Google's ChromeOS EC is a microcontroller which talks to the AP and + implements various functions such as keyboard and battery charging. + The EC can be connected through various interfaces (I2C, SPI, and others) + and the compatible string specifies which interface is being used. + +properties: + compatible: + oneOf: + - description: + For implementations of the EC is connected through I2C. + const: google,cros-ec-i2c + - description: + For implementations of the EC is connected through SPI. + const: google,cros-ec-spi + - description: + For implementations of the EC is connected through RPMSG. + const: google,cros-ec-rpmsg + + google,cros-ec-spi-pre-delay: + description: + This property specifies the delay in usecs between the + assertion of the CS and the first clock pulse. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - default: 0 + - minimum: 0 + + google,cros-ec-spi-msg-delay: + description: + This property specifies the delay in usecs between messages. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - default: 0 + - minimum: 0 + + google,has-vbc-nvram: + description: + Some implementations of the EC include a small nvram space used to + store verified boot context data. This boolean flag is used to specify + whether this nvram is present or not. + type: boolean + + spi-max-frequency: + description: Maximum SPI frequency of the device in Hz. + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + +if: + properties: + compatible: + contains: + enum: + - google,cros-ec-i2c + - google,cros-ec-rpmsg +then: + properties: + google,cros-ec-spi-pre-delay: false + google,cros-ec-spi-msg-delay: false + spi-max-frequency: false + +additionalProperties: false + +examples: + # Example for I2C + - | + #include + #include + + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + cros-ec@1e { + compatible = "google,cros-ec-i2c"; + reg = <0x1e>; + interrupts = <6 0>; + interrupt-parent = <&gpio0>; + }; + }; + + # Example for SPI + - | + #include + #include + + spi0 { + #address-cells = <1>; + #size-cells = <0>; + + cros-ec@0 { + compatible = "google,cros-ec-spi"; + reg = <0x0>; + google,cros-ec-spi-msg-delay = <30>; + google,cros-ec-spi-pre-delay = <10>; + interrupts = <99 0>; + interrupt-parent = <&gpio7>; + spi-max-frequency = <5000000>; + }; + }; + + # Example for RPMSG + - | + scp0 { + cros-ec { + compatible = "google,cros-ec-rpmsg"; + }; + }; +... -- cgit From d8f090dbeafdcc3d30761aa0062f19d1adf9ef08 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 15 Jun 2020 18:14:55 +0200 Subject: rtc: max77686: Do not allow interrupt to fire before system resume The rtc-max77686 device shares the main interrupt line with parent MFD device (max77686 driver). During the system suspend, the parent MFD device disables this IRQ to prevent an early event happening before resuming I2C bus controller. The same should be done by rtc-max77686 driver because otherwise the interrupt handler max77686_rtc_alarm_irq() will be called before its resume function (max77686_rtc_resume()). Such issue is not fatal but disabling shared IRQ by all users ensures correct behavior. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200615161455.4420-1-krzk@kernel.org --- drivers/rtc/rtc-max77686.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 03ebcf1c0f3d..645de5af707b 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -805,17 +805,33 @@ static int max77686_rtc_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int max77686_rtc_suspend(struct device *dev) { + struct max77686_rtc_info *info = dev_get_drvdata(dev); + int ret = 0; + if (device_may_wakeup(dev)) { struct max77686_rtc_info *info = dev_get_drvdata(dev); - return enable_irq_wake(info->virq); + ret = enable_irq_wake(info->virq); } - return 0; + /* + * Main IRQ (not virtual) must be disabled during suspend because if it + * happens while suspended it will be handled before resuming I2C. + * + * Since Main IRQ is shared, all its users should disable it to be sure + * it won't fire while one of them is still suspended. + */ + disable_irq(info->rtc_irq); + + return ret; } static int max77686_rtc_resume(struct device *dev) { + struct max77686_rtc_info *info = dev_get_drvdata(dev); + + enable_irq(info->rtc_irq); + if (device_may_wakeup(dev)) { struct max77686_rtc_info *info = dev_get_drvdata(dev); -- cgit From 22f8d5a1bf230cf8567a4121fc3789babb46336d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 20 Jun 2020 20:04:43 +0800 Subject: rtc: goldfish: Enable interrupt in set_alarm() when necessary When use goldfish rtc, the "hwclock" command fails with "select() to /dev/rtc to wait for clock tick timed out". This is because "hwclock" need the set_alarm() hook to enable interrupt when alrm->enabled is true. This operation is missing in goldfish rtc (but other rtc drivers, such as cmos rtc, enable interrupt here), so add it. Signed-off-by: Huacai Chen Signed-off-by: Jiaxun Yang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1592654683-31314-1-git-send-email-chenhc@lemote.com --- drivers/rtc/rtc-goldfish.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-goldfish.c b/drivers/rtc/rtc-goldfish.c index 27797157fcb3..6349d2cd3680 100644 --- a/drivers/rtc/rtc-goldfish.c +++ b/drivers/rtc/rtc-goldfish.c @@ -73,6 +73,7 @@ static int goldfish_rtc_set_alarm(struct device *dev, rtc_alarm64 = rtc_tm_to_time64(&alrm->time) * NSEC_PER_SEC; writel((rtc_alarm64 >> 32), base + TIMER_ALARM_HIGH); writel(rtc_alarm64, base + TIMER_ALARM_LOW); + writel(1, base + TIMER_IRQ_ENABLED); } else { /* * if this function was called with enabled=0 -- cgit From 3567d3d1477289d2ee988ee007507fca69f97eb8 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 6 Jul 2020 08:27:27 +0200 Subject: rtc: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200706062727.18481-1-grandmaster@al2klimov.de --- drivers/rtc/rtc-ab-b5ze-s3.c | 2 +- drivers/rtc/rtc-bq32k.c | 2 +- drivers/rtc/rtc-mcp795.c | 2 +- drivers/rtc/rtc-pcf85063.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c index 811fe2005488..2370ac0cdb5f 100644 --- a/drivers/rtc/rtc-ab-b5ze-s3.c +++ b/drivers/rtc/rtc-ab-b5ze-s3.c @@ -7,7 +7,7 @@ * * Detailed datasheet of the chip is available here: * - * http://www.abracon.com/realtimeclock/AB-RTCMC-32.768kHz-B5ZE-S3-Application-Manual.pdf + * https://www.abracon.com/realtimeclock/AB-RTCMC-32.768kHz-B5ZE-S3-Application-Manual.pdf * * This work is based on ISL12057 driver (drivers/rtc/rtc-isl12057.c). * diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index 4a63f0cd2321..933e4237237d 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -6,7 +6,7 @@ * Copyright (C) 2014 Pavel Machek * * You can get hardware description at - * http://www.ti.com/lit/ds/symlink/bq32000.pdf + * https://www.ti.com/lit/ds/symlink/bq32000.pdf */ #include diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c index 1660d5e79582..21cbf7f892e8 100644 --- a/drivers/rtc/rtc-mcp795.c +++ b/drivers/rtc/rtc-mcp795.c @@ -7,7 +7,7 @@ * based on other Linux RTC drivers * * Device datasheet: - * http://ww1.microchip.com/downloads/en/DeviceDoc/22280A.pdf + * https://ww1.microchip.com/downloads/en/DeviceDoc/22280A.pdf */ #include diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 7a87f461bec8..ca55ba975aeb 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -21,8 +21,8 @@ /* * Information for this driver was pulled from the following datasheets. * - * http://www.nxp.com/documents/data_sheet/PCF85063A.pdf - * http://www.nxp.com/documents/data_sheet/PCF85063TP.pdf + * https://www.nxp.com/documents/data_sheet/PCF85063A.pdf + * https://www.nxp.com/documents/data_sheet/PCF85063TP.pdf * * PCF85063A -- Rev. 6 — 18 November 2015 * PCF85063TP -- Rev. 4 — 6 May 2015 -- cgit From d3de4beb14a887754b83843bf23b6dbe8d022764 Mon Sep 17 00:00:00 2001 From: "Johnson CH Chen (陳昭勳)" Date: Thu, 9 Jul 2020 06:34:06 +0000 Subject: rtc: ds1374: wdt: Use watchdog core for watchdog part Let ds1374 watchdog use watchdog core functions. It also includes improving watchdog timer setting and nowayout, and just uses ioctl() of watchdog core. Signed-off-by: Johnson Chen Signed-off-by: Alexandre Belloni Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/HK2PR01MB328182D5A54BFAA8A22E448AFA640@HK2PR01MB3281.apcprd01.prod.exchangelabs.com --- drivers/rtc/Kconfig | 1 + drivers/rtc/rtc-ds1374.c | 258 +++++++++++------------------------------------ 2 files changed, 62 insertions(+), 197 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index b54d87d45c89..c25d51f35f0c 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -282,6 +282,7 @@ config RTC_DRV_DS1374 config RTC_DRV_DS1374_WDT bool "Dallas/Maxim DS1374 watchdog timer" depends on RTC_DRV_DS1374 + select WATCHDOG_CORE if WATCHDOG help If you say Y here you will get support for the watchdog timer in the Dallas Semiconductor DS1374 diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 9c51a12cf70f..c71065d26cd2 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -46,6 +46,7 @@ #define DS1374_REG_WDALM2 0x06 #define DS1374_REG_CR 0x07 /* Control */ #define DS1374_REG_CR_AIE 0x01 /* Alarm Int. Enable */ +#define DS1374_REG_CR_WDSTR 0x08 /* 1=INT, 0=RST */ #define DS1374_REG_CR_WDALM 0x20 /* 1=Watchdog, 0=Alarm */ #define DS1374_REG_CR_WACE 0x40 /* WD/Alarm counter enable */ #define DS1374_REG_SR 0x08 /* Status */ @@ -71,7 +72,9 @@ struct ds1374 { struct i2c_client *client; struct rtc_device *rtc; struct work_struct work; - +#ifdef CONFIG_RTC_DRV_DS1374_WDT + struct watchdog_device wdt; +#endif /* The mutex protects alarm operations, and prevents a race * between the enable_irq() in the workqueue and the free_irq() * in the remove function. @@ -369,238 +372,98 @@ static const struct rtc_class_ops ds1374_rtc_ops = { * ***************************************************************************** */ -static struct i2c_client *save_client; /* Default margin */ -#define WD_TIMO 131762 +#define TIMER_MARGIN_DEFAULT 32 +#define TIMER_MARGIN_MIN 1 +#define TIMER_MARGIN_MAX 4095 /* 24-bit value */ #define DRV_NAME "DS1374 Watchdog" -static int wdt_margin = WD_TIMO; -static unsigned long wdt_is_open; +static int wdt_margin; module_param(wdt_margin, int, 0); MODULE_PARM_DESC(wdt_margin, "Watchdog timeout in seconds (default 32s)"); +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default =" + __MODULE_STRING(WATCHDOG_NOWAYOUT)")"); + static const struct watchdog_info ds1374_wdt_info = { .identity = "DS1374 WTD", .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, }; -static int ds1374_wdt_settimeout(unsigned int timeout) +static int ds1374_wdt_settimeout(struct watchdog_device *wdt, unsigned int timeout) { - int ret = -ENOIOCTLCMD; - int cr; + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); + struct i2c_client *client = ds1374->client; + int ret, cr; - ret = cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR); - if (ret < 0) - goto out; + wdt->timeout = timeout; + + cr = i2c_smbus_read_byte_data(client, DS1374_REG_CR); + if (cr < 0) + return cr; /* Disable any existing watchdog/alarm before setting the new one */ cr &= ~DS1374_REG_CR_WACE; - ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); + ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); if (ret < 0) - goto out; + return ret; /* Set new watchdog time */ - ret = ds1374_write_rtc(save_client, timeout, DS1374_REG_WDALM0, 3); - if (ret) { - pr_info("couldn't set new watchdog time\n"); - goto out; - } + timeout = timeout * 4096; + ret = ds1374_write_rtc(client, timeout, DS1374_REG_WDALM0, 3); + if (ret) + return ret; /* Enable watchdog timer */ cr |= DS1374_REG_CR_WACE | DS1374_REG_CR_WDALM; + cr &= ~DS1374_REG_CR_WDSTR;/* for RST PIN */ cr &= ~DS1374_REG_CR_AIE; - ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); + ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); if (ret < 0) - goto out; + return ret; return 0; -out: - return ret; } - /* * Reload the watchdog timer. (ie, pat the watchdog) */ -static void ds1374_wdt_ping(void) +static int ds1374_wdt_start(struct watchdog_device *wdt) { + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); u32 val; - int ret = 0; - ret = ds1374_read_rtc(save_client, &val, DS1374_REG_WDALM0, 3); - if (ret) - pr_info("WD TICK FAIL!!!!!!!!!! %i\n", ret); + return ds1374_read_rtc(ds1374->client, &val, DS1374_REG_WDALM0, 3); } -static void ds1374_wdt_disable(void) +static int ds1374_wdt_stop(struct watchdog_device *wdt) { + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); + struct i2c_client *client = ds1374->client; int cr; - cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR); + cr = i2c_smbus_read_byte_data(client, DS1374_REG_CR); + if (cr < 0) + return cr; + /* Disable watchdog timer */ cr &= ~DS1374_REG_CR_WACE; - i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); -} - -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int ds1374_wdt_open(struct inode *inode, struct file *file) -{ - struct ds1374 *ds1374 = i2c_get_clientdata(save_client); - - if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) { - mutex_lock(&ds1374->mutex); - if (test_and_set_bit(0, &wdt_is_open)) { - mutex_unlock(&ds1374->mutex); - return -EBUSY; - } - /* - * Activate - */ - wdt_is_open = 1; - mutex_unlock(&ds1374->mutex); - return stream_open(inode, file); - } - return -ENODEV; -} - -/* - * Close the watchdog device. - */ -static int ds1374_wdt_release(struct inode *inode, struct file *file) -{ - if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) - clear_bit(0, &wdt_is_open); - - return 0; + return i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); } -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t ds1374_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (len) { - ds1374_wdt_ping(); - return 1; - } - return 0; -} - -static ssize_t ds1374_wdt_read(struct file *file, char __user *data, - size_t len, loff_t *ppos) -{ - return 0; -} - -/* - * Handle commands from user-space. - */ -static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int new_margin, options; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user((struct watchdog_info __user *)arg, - &ds1374_wdt_info, sizeof(ds1374_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, (int __user *)arg); - case WDIOC_KEEPALIVE: - ds1374_wdt_ping(); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, (int __user *)arg)) - return -EFAULT; - - /* the hardware's tick rate is 4096 Hz, so - * the counter value needs to be scaled accordingly - */ - new_margin <<= 12; - if (new_margin < 1 || new_margin > 16777216) - return -EINVAL; - - wdt_margin = new_margin; - ds1374_wdt_settimeout(new_margin); - ds1374_wdt_ping(); - /* fallthrough */ - case WDIOC_GETTIMEOUT: - /* when returning ... inverse is true */ - return put_user((wdt_margin >> 12), (int __user *)arg); - case WDIOC_SETOPTIONS: - if (copy_from_user(&options, (int __user *)arg, sizeof(int))) - return -EFAULT; - - if (options & WDIOS_DISABLECARD) { - pr_info("disable watchdog\n"); - ds1374_wdt_disable(); - return 0; - } - - if (options & WDIOS_ENABLECARD) { - pr_info("enable watchdog\n"); - ds1374_wdt_settimeout(wdt_margin); - ds1374_wdt_ping(); - return 0; - } - return -EINVAL; - } - return -ENOTTY; -} - -static long ds1374_wdt_unlocked_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret; - struct ds1374 *ds1374 = i2c_get_clientdata(save_client); - - mutex_lock(&ds1374->mutex); - ret = ds1374_wdt_ioctl(file, cmd, arg); - mutex_unlock(&ds1374->mutex); - - return ret; -} - -static int ds1374_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) -{ - if (code == SYS_DOWN || code == SYS_HALT) - /* Disable Watchdog */ - ds1374_wdt_disable(); - return NOTIFY_DONE; -} - -static const struct file_operations ds1374_wdt_fops = { - .owner = THIS_MODULE, - .read = ds1374_wdt_read, - .unlocked_ioctl = ds1374_wdt_unlocked_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .write = ds1374_wdt_write, - .open = ds1374_wdt_open, - .release = ds1374_wdt_release, - .llseek = no_llseek, -}; - -static struct miscdevice ds1374_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ds1374_wdt_fops, -}; - -static struct notifier_block ds1374_wdt_notifier = { - .notifier_call = ds1374_wdt_notify_sys, +static const struct watchdog_ops ds1374_wdt_ops = { + .owner = THIS_MODULE, + .start = ds1374_wdt_start, + .stop = ds1374_wdt_stop, + .set_timeout = ds1374_wdt_settimeout, }; - #endif /*CONFIG_RTC_DRV_DS1374_WDT*/ /* ***************************************************************************** @@ -652,16 +515,22 @@ static int ds1374_probe(struct i2c_client *client, return ret; #ifdef CONFIG_RTC_DRV_DS1374_WDT - save_client = client; - ret = misc_register(&ds1374_miscdev); + ds1374->wdt.info = &ds1374_wdt_info; + ds1374->wdt.ops = &ds1374_wdt_ops; + ds1374->wdt.timeout = TIMER_MARGIN_DEFAULT; + ds1374->wdt.min_timeout = TIMER_MARGIN_MIN; + ds1374->wdt.max_timeout = TIMER_MARGIN_MAX; + + watchdog_init_timeout(&ds1374->wdt, wdt_margin, &client->dev); + watchdog_set_nowayout(&ds1374->wdt, nowayout); + watchdog_stop_on_reboot(&ds1374->wdt); + watchdog_stop_on_unregister(&ds1374->wdt); + watchdog_set_drvdata(&ds1374->wdt, ds1374); + ds1374_wdt_settimeout(&ds1374->wdt, ds1374->wdt.timeout); + + ret = devm_watchdog_register_device(&client->dev, &ds1374->wdt); if (ret) return ret; - ret = register_reboot_notifier(&ds1374_wdt_notifier); - if (ret) { - misc_deregister(&ds1374_miscdev); - return ret; - } - ds1374_wdt_settimeout(131072); #endif return 0; @@ -670,11 +539,6 @@ static int ds1374_probe(struct i2c_client *client, static int ds1374_remove(struct i2c_client *client) { struct ds1374 *ds1374 = i2c_get_clientdata(client); -#ifdef CONFIG_RTC_DRV_DS1374_WDT - misc_deregister(&ds1374_miscdev); - ds1374_miscdev.parent = NULL; - unregister_reboot_notifier(&ds1374_wdt_notifier); -#endif if (client->irq > 0) { mutex_lock(&ds1374->mutex); -- cgit From 4df2ef85f0efe44505f511ca5e4455585f53a2da Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 14 Jul 2020 13:45:56 +0100 Subject: rtc: pl031: fix set_alarm by adding back call to alarm_irq_enable Commit c8ff5841a90b ("rtc: pl031: switch to rtc_time64_to_tm/rtc_tm_to_time64") seemed to have accidentally removed the call to pl031_alarm_irq_enable from pl031_set_alarm while switching to 64-bit apis. Let us add back the same to get the set alarm functionality back. Fixes: c8ff5841a90b ("rtc: pl031: switch to rtc_time64_to_tm/rtc_tm_to_time64") Signed-off-by: Sudeep Holla Signed-off-by: Alexandre Belloni Tested-by: Valentin Schneider Cc: Linus Walleij Cc: Alexandre Belloni Link: https://lore.kernel.org/r/20200714124556.20294-1-sudeep.holla@arm.com --- drivers/rtc/rtc-pl031.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 40d7450a1ce4..c6b89273feba 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -275,6 +275,7 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) struct pl031_local *ldata = dev_get_drvdata(dev); writel(rtc_tm_to_time64(&alarm->time), ldata->base + RTC_MR); + pl031_alarm_irq_enable(dev, alarm->enabled); return 0; } -- cgit From a5e6f964bb2c613933de58a35ddfa306128ba004 Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Fri, 10 Jul 2020 17:00:03 +0900 Subject: rtc: cleanup obsolete comment about struct rtc_class_ops Commit ea369ea6d828 ("rtc: remove .open() and .release()") removes open/release callback from struct rtc_class_ops. Also commit 80d4bb515b78 ("RTC: Cleanup rtc_class_ops->irq_set_state") and commit 696160fec162 ("RTC: Cleanup rtc_class_ops->irq_set_freq()") removes irq callbacks. So, just remove related comments so that readers will not be confused. Signed-off-by: Misono Tomohiro Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200710080003.7986-1-misono.tomohiro@jp.fujitsu.com --- include/linux/rtc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/rtc.h b/include/linux/rtc.h index bba3db3f7efa..22d1575e4991 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -55,10 +55,6 @@ extern struct class *rtc_class; * * The (current) exceptions are mostly filesystem hooks: * - the proc() hook for procfs - * - non-ioctl() chardev hooks: open(), release() - * - * REVISIT those periodic irq calls *do* have ops_lock when they're - * issued through ioctl() ... */ struct rtc_class_ops { int (*ioctl)(struct device *, unsigned int, unsigned long); -- cgit From 4a400f0d08308e3e446a07d3f4590f243e44b7db Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Fri, 26 Jun 2020 10:23:16 +0300 Subject: sunrpc: destroy rpc_inode_cachep after unregister_filesystem Better to unregister the file system before destroying the kmem_cache cache of the inodes, so that the inodes are freed before we are trying to destroy it. Otherwise, kmem_cache yells that some objects are live. Signed-off-by: Dan Aloni Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e9d0953522f0..eadc0ede928c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1510,6 +1510,6 @@ err_notifier: void unregister_rpc_pipefs(void) { rpc_clients_notifier_unregister(); - kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); + kmem_cache_destroy(rpc_inode_cachep); } -- cgit From ab91e7a6da7eeb8aa54843748652c186daee43eb Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 6 Jul 2020 17:52:24 +0800 Subject: freezer: Add unsafe versions of freezable_schedule_timeout_interruptible for NFS commit 0688e64bc600 ("NFS: Allow signal interruption of NFS4ERR_DELAYed operations") introduces nfs4_delay_interruptible which also needs an _unsafe version to avoid the following call trace for the same reason explained in commit 416ad3c9c006 ("freezer: add unsafe versions of freezable helpers for NFS") CPU: 4 PID: 3968 Comm: rm Tainted: G W 5.8.0-rc4 #1 Hardware name: Marvell OcteonTX CN96XX board (DT) Call trace: dump_backtrace+0x0/0x1dc show_stack+0x20/0x30 dump_stack+0xdc/0x150 debug_check_no_locks_held+0x98/0xa0 nfs4_delay_interruptible+0xd8/0x120 nfs4_handle_exception+0x130/0x170 nfs4_proc_rmdir+0x8c/0x220 nfs_rmdir+0xa4/0x360 vfs_rmdir.part.0+0x6c/0x1b0 do_rmdir+0x18c/0x210 __arm64_sys_unlinkat+0x64/0x7c el0_svc_common.constprop.0+0x7c/0x110 do_el0_svc+0x24/0xa0 el0_sync_handler+0x13c/0x1b8 el0_sync+0x158/0x180 Signed-off-by: He Zhe Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- include/linux/freezer.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f670ff64b31e..113e0d6dd3d3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -416,7 +416,7 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); - freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout)); + freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout)); if (!signal_pending(current)) return 0; return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 21f5aa0b217f..27828145ca09 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -207,6 +207,17 @@ static inline long freezable_schedule_timeout_interruptible(long timeout) return __retval; } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) +{ + long __retval; + + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count_unsafe(); + return __retval; +} + /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { @@ -285,6 +296,9 @@ static inline void set_freezable(void) {} #define freezable_schedule_timeout_interruptible(timeout) \ schedule_timeout_interruptible(timeout) +#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) -- cgit From dbc4fec6b6dd2d23e161b250d51cbd28bd9c8497 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Jun 2020 10:35:28 -0400 Subject: NFSv4.0 allow nconnect for v4.0 It looks like this "else" is just a typo. It turns off nconnect for NFSv4.0 even though it works for every other version. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c41cbd86612c..daacc78a3d48 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -880,7 +880,7 @@ static int nfs4_set_client(struct nfs_server *server, if (minorversion == 0) __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); - else if (proto == XPRT_TRANSPORT_TCP) + if (proto == XPRT_TRANSPORT_TCP) cl_init.nconnect = nconnect; if (server->flags & NFS_MOUNT_NORESVPORT) -- cgit From 66246641609b3cbfc87e805ded1f018b5c9cecf4 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Fri, 10 Jul 2020 18:15:48 +0800 Subject: 9p: retrieve fid from file when file instance exist. In the current setattr implementation in 9p, fid is always retrieved from dentry no matter file instance exists or not. If so, there may be some info related to opened file instance dropped. So it's better to retrieve fid from file instance when it is passed to setattr. for example: fd=open("tmp", O_RDWR); ftruncate(fd, 10); The file context related with the fd will be lost as fid is always retrieved from dentry, then the backend can't get the info of file context. It is against the original intention of user and may lead to bug. Link: http://lkml.kernel.org/r/20200710101548.10108-1-jianyong.wu@arm.com Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/vfs_inode.c | 9 +++++++-- fs/9p/vfs_inode_dotl.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c9255d399917..cd004dee2214 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1090,7 +1090,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { int retval; struct v9fs_session_info *v9ses; - struct p9_fid *fid; + struct p9_fid *fid = NULL; struct p9_wstat wstat; p9_debug(P9_DEBUG_VFS, "\n"); @@ -1100,7 +1100,12 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); - fid = v9fs_fid_lookup(dentry); + if (iattr->ia_valid & ATTR_FILE) { + fid = iattr->ia_file->private_data; + WARN_ON(!fid); + } + if (!fid) + fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 60328b21c5fb..0028eccb665a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -540,7 +540,7 @@ static int v9fs_mapped_iattr_valid(int iattr_valid) int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) { int retval; - struct p9_fid *fid; + struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr; struct inode *inode = d_inode(dentry); @@ -560,7 +560,12 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) p9attr.mtime_sec = iattr->ia_mtime.tv_sec; p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; - fid = v9fs_fid_lookup(dentry); + if (iattr->ia_valid & ATTR_FILE) { + fid = iattr->ia_file->private_data; + WARN_ON(!fid); + } + if (!fid) + fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); -- cgit From cb0aae0e31c632c407a2cab4307be85a001d4d98 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Mon, 15 Jun 2020 09:21:53 +0800 Subject: 9p: Fix memory leak in v9fs_mount v9fs_mount v9fs_session_init v9fs_cache_session_get_cookie v9fs_random_cachetag -->alloc cachetag v9ses->fscache = fscache_acquire_cookie -->maybe NULL sb = sget -->fail, goto clunk clunk_fid: v9fs_session_close if (v9ses->fscache) -->NULL kfree(v9ses->cachetag) Thus memleak happens. Link: http://lkml.kernel.org/r/20200615012153.89538-1-zhengbin13@huawei.com Fixes: 60e78d2c993e ("9p: Add fscache support to 9p") Cc: # v2.6.32+ Signed-off-by: Zheng Bin Signed-off-by: Dominique Martinet --- fs/9p/v9fs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 15a99f9c7253..39def020a074 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -500,10 +500,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) } #ifdef CONFIG_9P_FSCACHE - if (v9ses->fscache) { + if (v9ses->fscache) v9fs_cache_session_put_cookie(v9ses); - kfree(v9ses->cachetag); - } + kfree(v9ses->cachetag); #endif kfree(v9ses->uname); kfree(v9ses->aname); -- cgit From 6db6ea79c525f3d753e270c84d72b3a900ad33fb Mon Sep 17 00:00:00 2001 From: Alexander Kapshuk Date: Thu, 18 Jun 2020 21:34:17 +0300 Subject: net/9p: Fix sparse endian warning in trans_fd.c Address sparse endian warning: net/9p/trans_fd.c:932:28: warning: incorrect type in assignment (different base types) net/9p/trans_fd.c:932:28: expected restricted __be32 [addressable] [assigned] [usertype] s_addr net/9p/trans_fd.c:932:28: got unsigned long Link: http://lkml.kernel.org/r/20200618183417.5423-1-alexander.kapshuk@gmail.com Signed-off-by: Alexander Kapshuk Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 12ecacf0c55f..c0762a302162 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -950,7 +950,7 @@ static int p9_bind_privport(struct socket *sock) memset(&cl, 0, sizeof(cl)); cl.sin_family = AF_INET; - cl.sin_addr.s_addr = INADDR_ANY; + cl.sin_addr.s_addr = htonl(INADDR_ANY); for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { cl.sin_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); -- cgit From aab6c873cf9b7afa9ca1ad82e48734178080f534 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Sun, 28 Jun 2020 15:43:37 +0800 Subject: 9p: remove unused code in 9p These codes have been commented out since 2007 and lay in kernel since then. So, it's better to remove them. Link: http://lkml.kernel.org/r/20200628074337.45895-1-jianyong.wu@arm.com Signed-off-by: Jianyong Wu Signed-off-by: Dominique Martinet --- fs/9p/vfs_inode.c | 53 ----------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index cd004dee2214..0fd5bf29880e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -368,59 +368,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev) return inode; } -/* -static struct v9fs_fid* -v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) -{ - int err; - int nfid; - struct v9fs_fid *ret; - struct v9fs_fcall *fcall; - - nfid = v9fs_get_idpool(&v9ses->fidpool); - if (nfid < 0) { - eprintk(KERN_WARNING, "no free fids available\n"); - return ERR_PTR(-ENOSPC); - } - - err = v9fs_t_walk(v9ses, fid, nfid, (char *) dentry->d_name.name, - &fcall); - - if (err < 0) { - if (fcall && fcall->id == RWALK) - goto clunk_fid; - - PRINT_FCALL_ERROR("walk error", fcall); - v9fs_put_idpool(nfid, &v9ses->fidpool); - goto error; - } - - kfree(fcall); - fcall = NULL; - ret = v9fs_fid_create(v9ses, nfid); - if (!ret) { - err = -ENOMEM; - goto clunk_fid; - } - - err = v9fs_fid_insert(ret, dentry); - if (err < 0) { - v9fs_fid_destroy(ret); - goto clunk_fid; - } - - return ret; - -clunk_fid: - v9fs_t_clunk(v9ses, nfid); - -error: - kfree(fcall); - return ERR_PTR(err); -} -*/ - - /** * v9fs_clear_inode - release an inode * @inode: inode to release -- cgit From c6a8b84da4c28bda61b842a089651c3ec9d89a48 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:36:50 -0700 Subject: modules: linux/moduleparam.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Cc: Jessica Yu Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3ef917ff0964..1ad5aa3b86d9 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -108,7 +108,7 @@ struct kparam_array * ".") the kernel commandline parameter. Note that - is changed to _, so * the user can use "foo-bar=1" even for variable "foo_bar". * - * @perm is 0 if the the variable is not to appear in sysfs, or 0444 + * @perm is 0 if the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). -- cgit From 4cb4ade19b4219af8f5cda9313dd99b0004c8b3c Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 2 Jun 2020 15:53:25 +1000 Subject: KVM: PPC: Book3SHV: Enable support for ISA v3.1 guests Adds support for emulating ISAv3.1 guests by adding the appropriate PCR and FSCR bits. Signed-off-by: Alistair Popple Reported-by: kbuild test robot Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 88e6c78100d9..28311e52d7e8 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1354,6 +1354,7 @@ #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 #define PVR_ARCH_300 0x0f000005 +#define PVR_ARCH_31 0x0f000006 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6bf66649ab92..d64a2dc1ccca 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -342,13 +342,18 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_31 (PCR_ARCH_300 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + host_pcr_bit = PCR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; else if (cpu_has_feature(CPU_FTR_ARCH_207S)) host_pcr_bit = PCR_ARCH_207; @@ -374,6 +379,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; break; + case PVR_ARCH_31: + guest_pcr_bit = PCR_ARCH_31; + break; default: return -EINVAL; } @@ -2318,7 +2326,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * to trap and then we emulate them. */ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) -- cgit From e55f4d5898a9855350834535037f62732bbd320a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 8 Jun 2020 13:57:14 +0200 Subject: KVM: PPC: Book3S HV: Increase KVMPPC_NR_LPIDS on POWER8 and POWER9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to (4096 - 2) guests on these processors. POWER7 is kept the same with a limitation of (1024 - 2), but it might be time to drop KVM support for POWER7. Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G RAM and a bit of swap. Signed-off-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/reg.h | 3 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 28311e52d7e8..caef0483e9bb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -473,7 +473,8 @@ #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7c5a1812a1c3..38ea396a23d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = 0; if (cpu_has_feature(CPU_FTR_HVMODE)) host_lpid = mfspr(SPRN_LPID); - rsvd_lpid = LPID_RSVD; + + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + rsvd_lpid = LPID_RSVD; + else + rsvd_lpid = LPID_RSVD_POWER7; kvmppc_init_lpid(rsvd_lpid + 1); -- cgit From 1508c22f112ce11944cea84efb128a623e8ff03b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 9 Jun 2020 12:12:29 +1000 Subject: KVM: PPC: Protect kvm_vcpu_read_guest with srcu locks The kvm_vcpu_read_guest/kvm_vcpu_write_guest used for nested guests eventually call srcu_dereference_check to dereference a memslot and lockdep produces a warning as neither kvm->slots_lock nor kvm->srcu lock is held and kvm->users_count is above zero (>100 in fact). This wraps mentioned VCPU read/write helpers in srcu read lock/unlock as it is done in other places. This uses vcpu->srcu_idx when possible. These helpers are only used for nested KVM so this may explain why we did not see these before. Here is an example of a warning: ============================= WARNING: suspicious RCU usage 5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897 Not tainted ----------------------------- include/linux/kvm_host.h:633 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by qemu-system-ppc/2752: #0: c000200359016be0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x144/0xd80 [kvm] stack backtrace: CPU: 80 PID: 2752 Comm: qemu-system-ppc Not tainted 5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897 Call Trace: [c0002003591ab240] [c000000000b23ab4] dump_stack+0x190/0x25c (unreliable) [c0002003591ab2b0] [c00000000023f954] lockdep_rcu_suspicious+0x140/0x164 [c0002003591ab330] [c008000004a445f8] kvm_vcpu_gfn_to_memslot+0x4c0/0x510 [kvm] [c0002003591ab3a0] [c008000004a44c18] kvm_vcpu_read_guest+0xa0/0x180 [kvm] [c0002003591ab410] [c008000004ff9bd8] kvmhv_enter_nested_guest+0x90/0xb80 [kvm_hv] [c0002003591ab980] [c008000004fe07bc] kvmppc_pseries_do_hcall+0x7b4/0x1c30 [kvm_hv] [c0002003591aba10] [c008000004fe5d30] kvmppc_vcpu_run_hv+0x10a8/0x1a30 [kvm_hv] [c0002003591abae0] [c008000004a5d954] kvmppc_vcpu_run+0x4c/0x70 [kvm] [c0002003591abb10] [c008000004a56e54] kvm_arch_vcpu_ioctl_run+0x56c/0x7c0 [kvm] [c0002003591abba0] [c008000004a3ddc4] kvm_vcpu_ioctl+0x4ac/0xd80 [kvm] [c0002003591abd20] [c0000000006ebb58] ksys_ioctl+0x188/0x210 [c0002003591abd70] [c0000000006ebc28] sys_ioctl+0x48/0xb0 [c0002003591abdb0] [c000000000042764] system_call_exception+0x1d4/0x2e0 [c0002003591abe20] [c00000000000cce8] system_call_common+0xe8/0x214 Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++++ arch/powerpc/kvm/book3s_hv_nested.c | 30 +++++++++++++++++++----------- arch/powerpc/kvm/book3s_rtas.c | 2 ++ arch/powerpc/kvm/powerpc.c | 5 ++++- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..c6b3390e0de6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -160,7 +160,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, return -EINVAL; /* Read the entry from guest memory */ addr = base + (index * sizeof(rpte)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) { if (pte_ret_p) *pte_ret_p = addr; @@ -236,7 +238,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, /* Read the table to find the root of the radix tree */ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) return ret; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 2c849a65db77..6822d23a2da4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); + regs_ptr = kvmppc_get_gpr(vcpu, 5); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); + sizeof(struct hv_guest_state)) || + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; + if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); if (l2_hv.version != HV_GUEST_STATE_VERSION) return H_P2; - regs_ptr = kvmppc_get_gpr(vcpu, 5); - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); - if (err) - return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_pt_regs(&l2_regs); if (l2_hv.vcpu_token >= NR_CPUS) @@ -323,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_hv_regs(&l2_hv); byteswap_pt_regs(&l2_regs); } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); - if (err) - return H_AUTHORITY; - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct hv_guest_state)) || + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; @@ -508,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) goto not_found; /* Write what was loaded into our buffer back to the L1 guest */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; } else { /* Load the data to be stored from the L1 guest into our buf */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; @@ -548,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) ret = -EFAULT; ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { + int srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl_addr, &ptbl_entry, sizeof(ptbl_entry)); + srcu_read_unlock(&kvm->srcu, srcu_idx); + } if (ret) { gp->l1_gr_to_hr = 0; gp->process_table = 0; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c969..c5e677508d3b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) */ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto fail; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dd7d141e33e8..d7b69784bb7e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (rc) return EMULATE_DO_MMIO; return EMULATE_DONE; -- cgit From c7f3be4c94d3686ec67b7e7b4fb47e4c225a2789 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 20 Jul 2020 09:52:49 +0200 Subject: rtc: ds1374: fix RTC_DRV_DS1374_WDT dependencies It is not enough to select WATCHDOG_CORE, the watchdog part of the driver now also depends on WATCHDOG. This is currently the best we can do because alarm support and watchdog support are mutually exclusive. Reported-by: Randy Dunlap Signed-off-by: Alexandre Belloni Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20200720075250.1019172-1-alexandre.belloni@bootlin.com --- drivers/rtc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index c25d51f35f0c..2753e0f54cc3 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -281,8 +281,8 @@ config RTC_DRV_DS1374 config RTC_DRV_DS1374_WDT bool "Dallas/Maxim DS1374 watchdog timer" - depends on RTC_DRV_DS1374 - select WATCHDOG_CORE if WATCHDOG + depends on RTC_DRV_DS1374 && WATCHDOG + select WATCHDOG_CORE help If you say Y here you will get support for the watchdog timer in the Dallas Semiconductor DS1374 -- cgit From 3d6cfb36ed719e5efdc2a6a8e8ee07f185c88da1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 20 Jul 2020 09:52:50 +0200 Subject: rtc: ds1374: remove unused define Remove unused define and fix typo where it should have been used. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200720075250.1019172-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-ds1374.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index c71065d26cd2..177d870bda0d 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -377,8 +377,6 @@ static const struct rtc_class_ops ds1374_rtc_ops = { #define TIMER_MARGIN_MIN 1 #define TIMER_MARGIN_MAX 4095 /* 24-bit value */ -#define DRV_NAME "DS1374 Watchdog" - static int wdt_margin; module_param(wdt_margin, int, 0); MODULE_PARM_DESC(wdt_margin, "Watchdog timeout in seconds (default 32s)"); @@ -389,7 +387,7 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default =" __MODULE_STRING(WATCHDOG_NOWAYOUT)")"); static const struct watchdog_info ds1374_wdt_info = { - .identity = "DS1374 WTD", + .identity = "DS1374 Watchdog", .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, }; -- cgit From f47ee279d25fb0e010cae5d6e758e39b40eb6378 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:07 +0300 Subject: clk: actions: Fix h_clk for Actions S500 SoC The h_clk clock in the Actions Semi S500 SoC clock driver has an invalid parent. Replace with the correct one. Fixes: ed6b4795ece4 ("clk: actions: Add clock driver for S500 SoC") Signed-off-by: Cristian Ciocaltea Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/c57e7ebabfa970014f073b92fe95b47d3e5a70b1.1593788312.git.cristian.ciocaltea@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/actions/owl-s500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c index e2007ac4d235..0eb83a0b70bc 100644 --- a/drivers/clk/actions/owl-s500.c +++ b/drivers/clk/actions/owl-s500.c @@ -183,7 +183,7 @@ static OWL_GATE(timer_clk, "timer_clk", "hosc", CMU_DEVCLKEN1, 27, 0, 0); static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", CMU_DEVCLKEN1, 3, 0, 0); /* divider clocks */ -static OWL_DIVIDER(h_clk, "h_clk", "ahbprevdiv_clk", CMU_BUSCLK1, 12, 2, NULL, 0, 0); +static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 12, 2, NULL, 0, 0); static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0); /* factor clocks */ -- cgit From 1a4ae4138f386600fc539747bb978873299017f8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:08 +0300 Subject: dt-bindings: clock: Add APB, DMAC, GPIO bindings for Actions S500 SoC Add the missing APB, DMAC and GPIO clock bindings constants for Actions Semi S500 SoC. Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/67112af4f5bc0cc5e70ce8410feb369cc72972b8.1593788312.git.cristian.ciocaltea@gmail.com Reviewed-by: Manivannan Sadhasivam Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/actions,s500-cmu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/dt-bindings/clock/actions,s500-cmu.h b/include/dt-bindings/clock/actions,s500-cmu.h index 030981cd2d56..a250a52a6192 100644 --- a/include/dt-bindings/clock/actions,s500-cmu.h +++ b/include/dt-bindings/clock/actions,s500-cmu.h @@ -72,7 +72,12 @@ #define CLK_NAND 52 #define CLK_ECC 53 #define CLK_RMII_REF 54 +#define CLK_GPIO 55 -#define CLK_NR_CLKS (CLK_RMII_REF + 1) +/* system clock (part 2) */ +#define CLK_APB 56 +#define CLK_DMAC 57 + +#define CLK_NR_CLKS (CLK_DMAC + 1) #endif /* __DT_BINDINGS_CLOCK_S500_CMU_H */ -- cgit From b81e88dead64b4d3725b02bf275d5594943c125a Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:09 +0300 Subject: clk: actions: Add APB, DMAC, GPIO clock support for Actions S500 SoC Add support for the missing APB, DMAC and GPIO clocks in the Actions Semi S500 SoC clock driver. Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/87964ae012e513597b8b4c7be2e7ac332a70087a.1593788312.git.cristian.ciocaltea@gmail.com Reviewed-by: Manivannan Sadhasivam Signed-off-by: Stephen Boyd --- drivers/clk/actions/owl-s500.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c index 0eb83a0b70bc..025a8f6d6482 100644 --- a/drivers/clk/actions/owl-s500.c +++ b/drivers/clk/actions/owl-s500.c @@ -175,6 +175,8 @@ static OWL_MUX(dev_clk, "dev_clk", dev_clk_mux_p, CMU_DEVPLL, 12, 1, CLK_SET_RAT static OWL_MUX(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, CMU_BUSCLK1, 8, 3, CLK_SET_RATE_PARENT); /* gate clocks */ +static OWL_GATE(gpio_clk, "gpio_clk", "apb_clk", CMU_DEVCLKEN0, 18, 0, 0); +static OWL_GATE(dmac_clk, "dmac_clk", "h_clk", CMU_DEVCLKEN0, 1, 0, 0); static OWL_GATE(spi0_clk, "spi0_clk", "ahb_clk", CMU_DEVCLKEN1, 10, 0, CLK_IGNORE_UNUSED); static OWL_GATE(spi1_clk, "spi1_clk", "ahb_clk", CMU_DEVCLKEN1, 11, 0, CLK_IGNORE_UNUSED); static OWL_GATE(spi2_clk, "spi2_clk", "ahb_clk", CMU_DEVCLKEN1, 12, 0, CLK_IGNORE_UNUSED); @@ -184,6 +186,7 @@ static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", CMU_DEVCLKEN1, 3, 0, 0); /* divider clocks */ static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 12, 2, NULL, 0, 0); +static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, 0, 0); static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0); /* factor clocks */ @@ -428,6 +431,9 @@ static struct owl_clk_common *s500_clks[] = { &spdif_clk.common, &nand_clk.common, &ecc_clk.common, + &apb_clk.common, + &dmac_clk.common, + &gpio_clk.common, }; static struct clk_hw_onecell_data s500_hw_clks = { @@ -484,6 +490,9 @@ static struct clk_hw_onecell_data s500_hw_clks = { [CLK_SPDIF] = &spdif_clk.common.hw, [CLK_NAND] = &nand_clk.common.hw, [CLK_ECC] = &ecc_clk.common.hw, + [CLK_APB] = &apb_clk.common.hw, + [CLK_DMAC] = &dmac_clk.common.hw, + [CLK_GPIO] = &gpio_clk.common.hw, }, .num = CLK_NR_CLKS, }; -- cgit From fac1d443a2b73dfb5b277d4e3c202609f0927eb5 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:10 +0300 Subject: dt-bindings: reset: Add binding constants for Actions S500 RMU Add device tree binding constants for Actions Semi S500 SoC Reset Management Unit (RMU). Signed-off-by: Cristian Ciocaltea Acked-by: Philipp Zabel Link: https://lore.kernel.org/r/daf615160b3be9f38dcf7926cc82128c9c2d73e3.1593788312.git.cristian.ciocaltea@gmail.com Reviewed-by: Manivannan Sadhasivam Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/reset/actions,s500-reset.h | 67 ++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 include/dt-bindings/reset/actions,s500-reset.h diff --git a/include/dt-bindings/reset/actions,s500-reset.h b/include/dt-bindings/reset/actions,s500-reset.h new file mode 100644 index 000000000000..f5d94176d10b --- /dev/null +++ b/include/dt-bindings/reset/actions,s500-reset.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Device Tree binding constants for Actions Semi S500 Reset Management Unit + * + * Copyright (c) 2014 Actions Semi Inc. + * Copyright (c) 2020 Cristian Ciocaltea + */ + +#ifndef __DT_BINDINGS_ACTIONS_S500_RESET_H +#define __DT_BINDINGS_ACTIONS_S500_RESET_H + +#define RESET_DMAC 0 +#define RESET_NORIF 1 +#define RESET_DDR 2 +#define RESET_NANDC 3 +#define RESET_SD0 4 +#define RESET_SD1 5 +#define RESET_PCM1 6 +#define RESET_DE 7 +#define RESET_LCD 8 +#define RESET_SD2 9 +#define RESET_DSI 10 +#define RESET_CSI 11 +#define RESET_BISP 12 +#define RESET_KEY 13 +#define RESET_GPIO 14 +#define RESET_AUDIO 15 +#define RESET_PCM0 16 +#define RESET_VDE 17 +#define RESET_VCE 18 +#define RESET_GPU3D 19 +#define RESET_NIC301 20 +#define RESET_LENS 21 +#define RESET_PERIPHRESET 22 +#define RESET_USB2_0 23 +#define RESET_TVOUT 24 +#define RESET_HDMI 25 +#define RESET_HDCP2TX 26 +#define RESET_UART6 27 +#define RESET_UART0 28 +#define RESET_UART1 29 +#define RESET_UART2 30 +#define RESET_SPI0 31 +#define RESET_SPI1 32 +#define RESET_SPI2 33 +#define RESET_SPI3 34 +#define RESET_I2C0 35 +#define RESET_I2C1 36 +#define RESET_USB3 37 +#define RESET_UART3 38 +#define RESET_UART4 39 +#define RESET_UART5 40 +#define RESET_I2C2 41 +#define RESET_I2C3 42 +#define RESET_ETHERNET 43 +#define RESET_CHIPID 44 +#define RESET_USB2_1 45 +#define RESET_WD0RESET 46 +#define RESET_WD1RESET 47 +#define RESET_WD2RESET 48 +#define RESET_WD3RESET 49 +#define RESET_DBG0RESET 50 +#define RESET_DBG1RESET 51 +#define RESET_DBG2RESET 52 +#define RESET_DBG3RESET 53 + +#endif /* __DT_BINDINGS_ACTIONS_S500_RESET_H */ -- cgit From 9fb961da91393e33b09a87582ee526e6328869a1 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:11 +0300 Subject: clk: actions: Add Actions S500 SoC Reset Management Unit support Add Reset Management Unit (RMU) support for Actions Semi S500 SoC. Signed-off-by: Cristian Ciocaltea Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/3fba2e6f0edfac97cb8c6ce60f95c24852578eac.1593788312.git.cristian.ciocaltea@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/actions/owl-s500.c | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c index 025a8f6d6482..61bb224f6330 100644 --- a/drivers/clk/actions/owl-s500.c +++ b/drivers/clk/actions/owl-s500.c @@ -23,8 +23,10 @@ #include "owl-gate.h" #include "owl-mux.h" #include "owl-pll.h" +#include "owl-reset.h" #include +#include #define CMU_COREPLL (0x0000) #define CMU_DEVPLL (0x0004) @@ -497,20 +499,96 @@ static struct clk_hw_onecell_data s500_hw_clks = { .num = CLK_NR_CLKS, }; +static const struct owl_reset_map s500_resets[] = { + [RESET_DMAC] = { CMU_DEVRST0, BIT(0) }, + [RESET_NORIF] = { CMU_DEVRST0, BIT(1) }, + [RESET_DDR] = { CMU_DEVRST0, BIT(2) }, + [RESET_NANDC] = { CMU_DEVRST0, BIT(3) }, + [RESET_SD0] = { CMU_DEVRST0, BIT(4) }, + [RESET_SD1] = { CMU_DEVRST0, BIT(5) }, + [RESET_PCM1] = { CMU_DEVRST0, BIT(6) }, + [RESET_DE] = { CMU_DEVRST0, BIT(7) }, + [RESET_LCD] = { CMU_DEVRST0, BIT(8) }, + [RESET_SD2] = { CMU_DEVRST0, BIT(9) }, + [RESET_DSI] = { CMU_DEVRST0, BIT(10) }, + [RESET_CSI] = { CMU_DEVRST0, BIT(11) }, + [RESET_BISP] = { CMU_DEVRST0, BIT(12) }, + [RESET_KEY] = { CMU_DEVRST0, BIT(14) }, + [RESET_GPIO] = { CMU_DEVRST0, BIT(15) }, + [RESET_AUDIO] = { CMU_DEVRST0, BIT(17) }, + [RESET_PCM0] = { CMU_DEVRST0, BIT(18) }, + [RESET_VDE] = { CMU_DEVRST0, BIT(19) }, + [RESET_VCE] = { CMU_DEVRST0, BIT(20) }, + [RESET_GPU3D] = { CMU_DEVRST0, BIT(22) }, + [RESET_NIC301] = { CMU_DEVRST0, BIT(23) }, + [RESET_LENS] = { CMU_DEVRST0, BIT(26) }, + [RESET_PERIPHRESET] = { CMU_DEVRST0, BIT(27) }, + [RESET_USB2_0] = { CMU_DEVRST1, BIT(0) }, + [RESET_TVOUT] = { CMU_DEVRST1, BIT(1) }, + [RESET_HDMI] = { CMU_DEVRST1, BIT(2) }, + [RESET_HDCP2TX] = { CMU_DEVRST1, BIT(3) }, + [RESET_UART6] = { CMU_DEVRST1, BIT(4) }, + [RESET_UART0] = { CMU_DEVRST1, BIT(5) }, + [RESET_UART1] = { CMU_DEVRST1, BIT(6) }, + [RESET_UART2] = { CMU_DEVRST1, BIT(7) }, + [RESET_SPI0] = { CMU_DEVRST1, BIT(8) }, + [RESET_SPI1] = { CMU_DEVRST1, BIT(9) }, + [RESET_SPI2] = { CMU_DEVRST1, BIT(10) }, + [RESET_SPI3] = { CMU_DEVRST1, BIT(11) }, + [RESET_I2C0] = { CMU_DEVRST1, BIT(12) }, + [RESET_I2C1] = { CMU_DEVRST1, BIT(13) }, + [RESET_USB3] = { CMU_DEVRST1, BIT(14) }, + [RESET_UART3] = { CMU_DEVRST1, BIT(15) }, + [RESET_UART4] = { CMU_DEVRST1, BIT(16) }, + [RESET_UART5] = { CMU_DEVRST1, BIT(17) }, + [RESET_I2C2] = { CMU_DEVRST1, BIT(18) }, + [RESET_I2C3] = { CMU_DEVRST1, BIT(19) }, + [RESET_ETHERNET] = { CMU_DEVRST1, BIT(20) }, + [RESET_CHIPID] = { CMU_DEVRST1, BIT(21) }, + [RESET_USB2_1] = { CMU_DEVRST1, BIT(22) }, + [RESET_WD0RESET] = { CMU_DEVRST1, BIT(24) }, + [RESET_WD1RESET] = { CMU_DEVRST1, BIT(25) }, + [RESET_WD2RESET] = { CMU_DEVRST1, BIT(26) }, + [RESET_WD3RESET] = { CMU_DEVRST1, BIT(27) }, + [RESET_DBG0RESET] = { CMU_DEVRST1, BIT(28) }, + [RESET_DBG1RESET] = { CMU_DEVRST1, BIT(29) }, + [RESET_DBG2RESET] = { CMU_DEVRST1, BIT(30) }, + [RESET_DBG3RESET] = { CMU_DEVRST1, BIT(31) }, +}; + static struct owl_clk_desc s500_clk_desc = { .clks = s500_clks, .num_clks = ARRAY_SIZE(s500_clks), .hw_clks = &s500_hw_clks, + + .resets = s500_resets, + .num_resets = ARRAY_SIZE(s500_resets), }; static int s500_clk_probe(struct platform_device *pdev) { struct owl_clk_desc *desc; + struct owl_reset *reset; + int ret; desc = &s500_clk_desc; owl_clk_regmap_init(pdev, desc); + reset = devm_kzalloc(&pdev->dev, sizeof(*reset), GFP_KERNEL); + if (!reset) + return -ENOMEM; + + reset->rcdev.of_node = pdev->dev.of_node; + reset->rcdev.ops = &owl_reset_ops; + reset->rcdev.nr_resets = desc->num_resets; + reset->reset_map = desc->resets; + reset->regmap = desc->regmap; + + ret = devm_reset_controller_register(&pdev->dev, &reset->rcdev); + if (ret) + dev_err(&pdev->dev, "Failed to register reset controller\n"); + return owl_clk_probe(&pdev->dev, desc->hw_clks); } -- cgit From cb7c6677bfb5c92526df73b20760d65bd9189344 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Jul 2020 20:05:12 +0300 Subject: MAINTAINERS: Add reset binding entry for Actions Semi Owl SoCs Add a reset binding entry to match all members of Actions Semi Owl SoCs. Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/78d63d97e3a8a8f7a9048b6eec74a9d158578833.1593788312.git.cristian.ciocaltea@gmail.com Reviewed-by: Manivannan Sadhasivam Signed-off-by: Stephen Boyd --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68f21d46614c..931c78a5e035 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1533,6 +1533,7 @@ F: drivers/mmc/host/owl-mmc.c F: drivers/pinctrl/actions/* F: drivers/soc/actions/ F: include/dt-bindings/power/owl-* +F: include/dt-bindings/reset/actions,* F: include/linux/soc/actions/ N: owl -- cgit From 3f11011573f9bab446b635763c5b6eb92ffdc1e2 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 14 Jul 2020 15:18:39 +0800 Subject: dt-bindings: i2c: mv64xxx: Add compatible for the A100 i2c node. Allwinner A100 have a mv64xxx i2c interface available to be used. Signed-off-by: Yangtao Li Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml index 2ceb05ba2df5..5b5ae402f97a 100644 --- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml @@ -26,6 +26,9 @@ properties: - items: - const: allwinner,sun50i-a64-i2c - const: allwinner,sun6i-a31-i2c + - items: + - const: allwinner,sun50i-a100-i2c + - const: allwinner,sun6i-a31-i2c - items: - const: allwinner,sun50i-h6-i2c - const: allwinner,sun6i-a31-i2c -- cgit From ef990bcad58cf1d13c5a49191a2c2342eb8d6709 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Wed, 22 Jul 2020 18:18:20 +0200 Subject: clk: rockchip: add sclk_mac_lbtest to rk3188_critical_clocks Since the loopbacktest clock is not exported and is not touched in the driver, it has to be added to rk3188_critical_clocks to be protected from being disabled and in order to get the emac working. Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20200722161820.5316-1-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3188.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 77aebfb1d6d5..730020fcc7fe 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -751,6 +751,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "pclk_peri", "hclk_cpubus", "hclk_vio_bus", + "sclk_mac_lbtest", }; static struct rockchip_clk_provider *__init rk3188_common_clk_init(struct device_node *np) -- cgit From ba937f51090d04bde3c2b9bf1213dabac706461d Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 21 Jul 2020 12:18:34 +0200 Subject: dt-bindings: clock: Fix qcom,msm8996-apcc yaml syntax Fix errors reported by dt_binding_check. - Fix literal block scalar for dts example - Fix schema identifier URI Signed-off-by: Loic Poulain Link: https://lore.kernel.org/r/1595326714-20485-1-git-send-email-loic.poulain@linaro.org Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml index d673edeed98d..a20cb10636dd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only %YAML 1.2 --- -$id: http://devicetree.org/schemas/clock/qcom,kryocc.yaml# +$id: http://devicetree.org/schemas/clock/qcom,msm8996-apcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm clock controller for MSM8996 CPUs @@ -46,11 +46,9 @@ required: additionalProperties: false examples: - # Example for msm8996 - | kryocc: clock-controller@6400000 { compatible = "qcom,msm8996-apcc"; reg = <0x6400000 0x90000>; #clock-cells = <1>; - }; -... + }; -- cgit From 8200597fb16651e5b2280b694dd86352b738657b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Jun 2020 14:27:36 +0100 Subject: clk: vc5: fix use of memory after it has been kfree'd There are a several places where printing an error message of init.name occurs after init.name has been kfree'd. Also the failure message is duplicated each time in the code. Fix this by adding a registration error failure path for these cases, moving the duplicated error messages to one common point and kfree'ing init.name only after it has been used. Changes also shrink the object code size by 171 bytes (x86-64, gcc 9.3): Before: text data bss dec hex filename 21057 3960 64 25081 61f9 drivers/clk/clk-versaclock5.o After: text data bss dec hex filename 20886 3960 64 24910 614e drivers/clk/clk-versaclock5.o Addresses-Coverity: ("Use after free") Fixes: f491276a5168 ("clk: vc5: Allow Versaclock driver to support multiple instances") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200625132736.88832-1-colin.king@canonical.com Reviewed-by: Luca Ceresoli [sboyd@kernel.org: Drop stray newline] Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock5.c | 50 ++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index 9a5fb3834b9a..d6fa15b6f37f 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -882,11 +882,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.parent_names = parent_names; vc5->clk_mux.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_mux); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } if (vc5->chip_info->flags & VC5_HAS_PFD_FREQ_DBL) { /* Register frequency doubler */ @@ -900,12 +898,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.num_parents = 1; vc5->clk_mul.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_mul); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } } /* Register PFD */ @@ -921,11 +916,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.num_parents = 1; vc5->clk_pfd.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_pfd); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } /* Register PLL */ memset(&init, 0, sizeof(init)); @@ -939,11 +932,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_pll.vc5 = vc5; vc5->clk_pll.hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_pll.hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } /* Register FODs */ for (n = 0; n < vc5->chip_info->clk_fod_cnt; n++) { @@ -960,12 +951,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_fod[n].vc5 = vc5; vc5->clk_fod[n].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_fod[n].hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } } /* Register MUX-connected OUT0_I2C_SELB output */ @@ -981,11 +969,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_out[0].vc5 = vc5; vc5->clk_out[0].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_out[0].hw); - kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } + if (ret) + goto err_clk_register; + kfree(init.name); /* clock framework made a copy of the name */ /* Register FOD-connected OUTx outputs */ for (n = 1; n < vc5->chip_info->clk_out_cnt; n++) { @@ -1008,12 +994,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_out[n].vc5 = vc5; vc5->clk_out[n].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_out[n].hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } /* Fetch Clock Output configuration from DT (if specified) */ ret = vc5_get_output_config(client, &vc5->clk_out[n]); @@ -1029,6 +1012,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) return 0; +err_clk_register: + dev_err(&client->dev, "unable to register %s\n", init.name); + kfree(init.name); /* clock framework made a copy of the name */ err_clk: if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL) clk_unregister_fixed_rate(vc5->pin_xin); -- cgit From faf29338f3cb6ebcbff7297471b0fa56639a98bc Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 16 Jul 2020 07:26:20 -0500 Subject: clk: vc5: Add memory check to prevent oops When getting the names of the child nodes, kasprintf is used to allocate memory which is used to create the string for the node name. Unfortunately, there is no memory check to determine if this allocation fails, it may cause an error when trying to get child node name. This patch will check if the memory allocation fails, and returns and -ENOMEM error instead of blindly moving on. Fixes: 260249f929e8 ("clk: vc5: Enable addition output configurations of the Versaclock") Suggested-by: Dan Carpenter Signed-off-by: Adam Ford Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20200716122620.4538-1-aford173@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock5.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index d6fa15b6f37f..32f0aa64f062 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -789,10 +789,13 @@ static int vc5_get_output_config(struct i2c_client *client, int ret = 0; child_name = kasprintf(GFP_KERNEL, "OUT%d", clk_out->num + 1); + if (!child_name) + return -ENOMEM; + np_output = of_get_child_by_name(client->dev.of_node, child_name); kfree(child_name); if (!np_output) - goto output_done; + return 0; ret = vc5_update_mode(np_output, clk_out); if (ret) @@ -813,7 +816,6 @@ output_error: of_node_put(np_output); -output_done: return ret; } @@ -828,7 +830,7 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) int ret; vc5 = devm_kzalloc(&client->dev, sizeof(*vc5), GFP_KERNEL); - if (vc5 == NULL) + if (!vc5) return -ENOMEM; i2c_set_clientdata(client, vc5); -- cgit From 7ec21d9da57e1ea770fae48e8fabb06c579487a7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 23 Jun 2020 21:14:16 +0800 Subject: KVM: PPC: Clean up redundant kvm_run parameters in assembly In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu' structure. For historical reasons, many kvm-related function parameters retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This patch does a unified cleanup of these remaining redundant parameters. [paulus@ozlabs.org - Fixed places that were missed in book3s_interrupts.S] Signed-off-by: Tianjia Zhang Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_interrupts.S | 56 +++++++++++++++++------------------ arch/powerpc/kvm/book3s_pr.c | 9 +++--- arch/powerpc/kvm/booke.c | 9 +++--- arch/powerpc/kvm/booke_interrupts.S | 9 +++--- arch/powerpc/kvm/bookehv_interrupts.S | 10 +++---- 6 files changed, 45 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ccf66b3a4c1d..0a056c64c317 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -59,7 +59,7 @@ enum xlate_readwrite { }; extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); -extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f7ad99d972ce..17371e197994 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -55,8 +55,7 @@ ****************************************************************************/ /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) @@ -68,8 +67,8 @@ kvm_start_entry: /* Save host state to the stack */ PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - /* Save r3 (kvm_run) and r4 (vcpu) */ - SAVE_2GPRS(3, r1) + /* Save r3 (vcpu) */ + SAVE_GPR(3, r1) /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) @@ -82,47 +81,46 @@ kvm_start_entry: PPC_STL r0, _LINK(r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) kvm_start_lightweight: /* Copy registers into shadow vcpu so we can access them in real mode */ - mr r3, r4 bl FUNC(kvmppc_copy_to_svcpu) nop - REST_GPR(4, r1) + REST_GPR(3, r1) #ifdef CONFIG_PPC_BOOK3S_64 /* Get the dcbz32 flag */ - PPC_LL r3, VCPU_HFLAGS(r4) - rldicl r3, r3, 0, 63 /* r3 &= 1 */ - stb r3, HSTATE_RESTORE_HID5(r13) + PPC_LL r0, VCPU_HFLAGS(r3) + rldicl r0, r0, 0, 63 /* r3 &= 1 */ + stb r0, HSTATE_RESTORE_HID5(r13) /* Load up guest SPRG3 value, since it's user readable */ - lwz r3, VCPU_SHAREDBE(r4) - cmpwi r3, 0 - ld r5, VCPU_SHARED(r4) + lbz r4, VCPU_SHAREDBE(r3) + cmpwi r4, 0 + ld r5, VCPU_SHARED(r3) beq sprg3_little_endian sprg3_big_endian: #ifdef __BIG_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif b after_sprg3_load sprg3_little_endian: #ifdef __LITTLE_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif after_sprg3_load: - mtspr SPRN_SPRG3, r3 + mtspr SPRN_SPRG3, r4 #endif /* CONFIG_PPC_BOOK3S_64 */ - PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ + PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */ /* Jump to segment patching handler and into our guest */ bl FUNC(kvmppc_entry_trampoline) @@ -146,7 +144,7 @@ after_sprg3_load: * */ - PPC_LL r3, GPR4(r1) /* vcpu pointer */ + PPC_LL r3, GPR3(r1) /* vcpu pointer */ /* * kvmppc_copy_from_svcpu can clobber volatile registers, save @@ -169,7 +167,7 @@ after_sprg3_load: #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ - PPC_LL r7, GPR4(r1) + PPC_LL r7, GPR3(r1) PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) @@ -190,11 +188,11 @@ after_sprg3_load: PPC_STL r30, VCPU_GPR(R30)(r7) PPC_STL r31, VCPU_GPR(R31)(r7) - /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ - lwz r5, VCPU_TRAP(r7) + /* Pass the exit number as 2nd argument to kvmppc_handle_exit */ + lwz r4, VCPU_TRAP(r7) - /* Restore r3 (kvm_run) and r4 (vcpu) */ - REST_2GPRS(3, r1) + /* Restore r3 (vcpu) */ + REST_GPR(3, r1) bl FUNC(kvmppc_handle_exit_pr) /* If RESUME_GUEST, get back in the loop */ @@ -223,11 +221,11 @@ kvm_loop_heavyweight: PPC_LL r4, _LINK(r1) PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) - /* Load vcpu and cpu_run */ - REST_2GPRS(3, r1) + /* Load vcpu */ + REST_GPR(3, r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) /* Jump back into the beginning of this function */ b kvm_start_lightweight @@ -235,7 +233,7 @@ kvm_loop_heavyweight: kvm_loop_lightweight: /* We'll need the vcpu pointer */ - REST_GPR(4, r1) + REST_GPR(3, r1) /* Jump back into the beginning of this function */ b kvm_start_lightweight diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ef54f917bdaf..01c8fe5abe0d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr) return r; } -int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; @@ -1826,7 +1826,6 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret; #ifdef CONFIG_ALTIVEC unsigned long uninitialized_var(vrsave); @@ -1834,7 +1833,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = -EINVAL; goto out; } @@ -1861,7 +1860,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); kvmppc_clear_debug(vcpu); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c0d62a917e20..3e1c9f08e302 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret, s; struct debug_reg debug; if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } @@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ @@ -982,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu, * * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; int idx; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2e56ab5a5f55..6fa82efe833b 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host) /* Switch to kernel stack and jump to handler. */ LOAD_REG_ADDR(r3, kvmppc_handle_exit) mtctr r3 - lwz r3, HOST_RUN(r1) + mr r3, r4 lwz r2, HOST_R2(r1) mr r14, r4 /* Save vcpu pointer. */ @@ -337,15 +337,14 @@ heavyweight_exit: /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - stw r3, HOST_RUN(r1) + mr r4, r3 mflr r3 stw r3, HOST_STACK_LR(r1) mfcr r5 diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index c577ba4b3169..8262c14fc9e6 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host) #endif /* Switch to kernel stack and jump to handler. */ - PPC_LL r3, HOST_RUN(r1) + mr r3, r4 mr r5, r14 /* intno */ mr r14, r4 /* Save vcpu pointer. */ + mr r4, r5 bl kvmppc_handle_exit /* Restore vcpu pointer and the nonvolatiles we used. */ @@ -525,15 +526,14 @@ heavyweight_exit: blr /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - PPC_STL r3, HOST_RUN(r1) + mr r4, r3 mflr r3 mfcr r5 PPC_STL r3, HOST_STACK_LR(r1) -- cgit From 4db7e1786db505eee86e6301cd42967f4da43be8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Jul 2020 18:05:10 -0500 Subject: i2c: busses: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Acked-by: Baruch Siach Reviewed-by: Jean Delvare Reviewed-by: Brendan Higgins Reviewed-by: Gregory CLEMENT Reviewed-by: Andy Shevchenko Signed-off-by: Gustavo A. R. Silva Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd8111.c | 2 +- drivers/i2c/busses/i2c-aspeed.c | 4 ++-- drivers/i2c/busses/i2c-designware-pcidrv.c | 2 +- drivers/i2c/busses/i2c-digicolor.c | 2 +- drivers/i2c/busses/i2c-i801.c | 8 ++++---- drivers/i2c/busses/i2c-mv64xxx.c | 9 ++++----- drivers/i2c/busses/i2c-synquacer.c | 3 +-- drivers/i2c/busses/i2c-viapro.c | 2 +- drivers/i2c/busses/scx200_acb.c | 2 +- drivers/i2c/i2c-slave-eeprom.c | 2 +- 10 files changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 2b14fef5bf26..34862ad3423e 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -381,7 +381,7 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr, if (status) return status; len = min_t(u8, len, I2C_SMBUS_BLOCK_MAX); - /* fall through */ + fallthrough; case I2C_SMBUS_I2C_BLOCK_DATA: for (i = 0; i < len; i++) { status = amd_ec_read(smbus, AMD_SMB_DATA + i, diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index f51702d86a90..31268074c422 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -504,7 +504,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) goto error_and_stop; } irq_handled |= ASPEED_I2CD_INTR_TX_ACK; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_TX_FIRST: if (bus->buf_index < msg->len) { bus->master_state = ASPEED_I2C_MASTER_TX; @@ -520,7 +520,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) /* RX may not have completed yet (only address cycle) */ if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE)) goto out_no_complete; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_RX: if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) { dev_err(bus->dev, "master failed to RX\n"); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8522134f9ea9..55c83a7a24f3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -90,7 +90,7 @@ static int mfld_setup(struct pci_dev *pdev, struct dw_pci_controller *c) switch (pdev->device) { case 0x0817: dev->timings.bus_freq_hz = I2C_MAX_STANDARD_MODE_FREQ; - /* fall through */ + fallthrough; case 0x0818: case 0x0819: c->bus_num = pdev->device - 0x817 + 3; diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 332f00437479..f67639dc74b7 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -187,7 +187,7 @@ static irqreturn_t dc_i2c_irq(int irq, void *dev_id) break; } i2c->state = STATE_WRITE; - /* fall through */ + fallthrough; case STATE_WRITE: if (i2c->msgbuf_ptr < i2c->msg->len) dc_i2c_write_buf(i2c); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 1fc7ae77753d..638e7f7c66cc 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1765,19 +1765,19 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1: case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2: priv->features |= FEATURE_IDF; - /* fall through */ + fallthrough; default: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801DB_3: priv->features |= FEATURE_SMBUS_PEC; priv->features |= FEATURE_BLOCK_BUFFER; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801CA_3: priv->features |= FEATURE_HOST_NOTIFY; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801BA_2: case PCI_DEVICE_ID_INTEL_82801AB_3: case PCI_DEVICE_ID_INTEL_82801AA_3: diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 829b8c98ae51..8d9d4ffdcd24 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -251,7 +251,7 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_WR_ADDR_2_ACK: /* 0xd0 */ case MV64XXX_I2C_STATUS_MAST_WR_ACK: /* 0x28 */ if ((drv_data->bytes_left == 0) @@ -282,14 +282,14 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_ADDR_2_ACK: /* 0xe0 */ if (drv_data->bytes_left == 0) { drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; drv_data->state = MV64XXX_I2C_STATE_IDLE; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK: /* 0x50 */ if (status != MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK) drv_data->action = MV64XXX_I2C_ACTION_CONTINUE; @@ -417,8 +417,7 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data) "mv64xxx_i2c_do_action: Invalid action: %d\n", drv_data->action); drv_data->rc = -EIO; - - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_ACTION_SEND_STOP: drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP, diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index c9a3dba6a75d..31be1811d5e6 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -398,8 +398,7 @@ static irqreturn_t synquacer_i2c_isr(int irq, void *dev_id) if (i2c->state == STATE_READ) goto prepare_read; - - /* fall through */ + fallthrough; case STATE_WRITE: if (bsr & SYNQUACER_I2C_BSR_LRB) { diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 05aa92a3fbe0..970ccdcbb889 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -228,7 +228,7 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr, goto exit_unsupported; if (read_write == I2C_SMBUS_READ) outb_p(data->block[0], SMBHSTDAT0); - /* Fall through */ + fallthrough; case I2C_SMBUS_BLOCK_DATA: outb_p(command, SMBHSTCMD); if (read_write == I2C_SMBUS_WRITE) { diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index bd9afa383d12..7b42a18bd05c 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -151,7 +151,7 @@ static void scx200_acb_machine(struct scx200_acb_iface *iface, u8 status) case state_repeat_start: outb(inb(ACBCTL1) | ACBCTL1_START, ACBCTL1); - /* fallthrough */ + fallthrough; case state_quick: if (iface->address_byte & 1) { diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 593f2fd39d17..5c7ae421cacf 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -66,7 +66,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, case I2C_SLAVE_READ_PROCESSED: /* The previous byte made it to the bus, get next one */ eeprom->buffer_idx++; - /* fallthrough */ + fallthrough; case I2C_SLAVE_READ_REQUESTED: spin_lock(&eeprom->buffer_lock); *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask]; -- cgit From 2a08a9232ba2e6103843870e68edae8e381eb02a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 26 Jun 2020 13:54:33 +0200 Subject: dt-bindings: arm: bcm: Add a select to the RPI Firmware binding The RaspberryPi firmware binding uses two compatible, include simple-bus. The select statement generated by default will thus select any node that has simple-bus, not all of them being the raspberrypi firmware node. This results in warnings being wrongfully reported. Let's add a custom select statement to fix that. Fixes: d4c708c032df ("dt-bindings: arm: bcm: Convert BCM2835 firmware binding to YAML") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20200626115433.125735-1-maxime@cerno.tech Acked-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- .../bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml index b48ed875eb8e..17e4f20c8d39 100644 --- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml @@ -10,6 +10,15 @@ maintainers: - Eric Anholt - Stefan Wahren +select: + properties: + compatible: + contains: + const: raspberrypi,bcm2835-firmware + + required: + - compatible + properties: compatible: items: -- cgit From 3ba72c35cf1c79cb95eebd7fa20f416c4fa952eb Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Jul 2020 09:41:10 +0200 Subject: dt-bindings: clk: versaclock5: fix 'idt' prefix typos 'idt' is misspelled 'itd' in a few places, fix it. Fixes: 34662f6e3084 ("dt: Add additional option bindings for IDT VersaClock") Signed-off-by: Luca Ceresoli Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200723074112.3159-2-luca@lucaceresoli.net Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/idt,versaclock5.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt index 6165b6ddb1a9..9656d4cf221c 100644 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt @@ -35,7 +35,7 @@ For all output ports, a corresponding, optional child node named OUT1, OUT2, etc. can represent a each output, and the node can be used to specify the following: -- itd,mode: can be one of the following: +- idt,mode: can be one of the following: - VC5_LVPECL - VC5_CMOS - VC5_HCSL33 @@ -106,7 +106,7 @@ i2c-master-node { clock-names = "xin"; OUT1 { - itd,mode = ; + idt,mode = ; idt,voltage-microvolts = <1800000>; idt,slew-percent = <80>; }; -- cgit From db136ac978ffbcc9a07b69906d7f6d2c49e033c0 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Jul 2020 09:41:11 +0200 Subject: MAINTAINERS: take over IDT VersaClock 5 clock driver Marek has been the primary developer of this driver (thanks!). Now as he is not working on it anymore he suggested I take over maintainership. Cc: Marek Vasut Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20200723074112.3159-3-luca@lucaceresoli.net Signed-off-by: Stephen Boyd --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 68f21d46614c..5aa16c245c63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8324,7 +8324,7 @@ W: https://github.com/o2genum/ideapad-slidebar F: drivers/input/misc/ideapad_slidebar.c IDT VersaClock 5 CLOCK DRIVER -M: Marek Vasut +M: Luca Ceresoli S: Maintained F: drivers/clk/clk-versaclock5.c -- cgit From 45c940184b501fc65592432a269b7a34cf2237b6 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Jul 2020 09:41:12 +0200 Subject: dt-bindings: clk: versaclock5: convert to yaml Convert to yaml the VersaClock bindings document. The mapping between clock specifier and physical pins cannot be described formally in yaml schema, then keep it verbatim in the description field. Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20200723074112.3159-4-luca@lucaceresoli.net Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/idt,versaclock5.txt | 125 ----------------- .../devicetree/bindings/clock/idt,versaclock5.yaml | 154 +++++++++++++++++++++ MAINTAINERS | 1 + 3 files changed, 155 insertions(+), 125 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/idt,versaclock5.txt create mode 100644 Documentation/devicetree/bindings/clock/idt,versaclock5.yaml diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt deleted file mode 100644 index 9656d4cf221c..000000000000 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt +++ /dev/null @@ -1,125 +0,0 @@ -Binding for IDT VersaClock 5,6 programmable i2c clock generators. - -The IDT VersaClock 5 and VersaClock 6 are programmable i2c clock -generators providing from 3 to 12 output clocks. - -==I2C device node== - -Required properties: -- compatible: shall be one of - "idt,5p49v5923" - "idt,5p49v5925" - "idt,5p49v5933" - "idt,5p49v5935" - "idt,5p49v6901" - "idt,5p49v6965" -- reg: i2c device address, shall be 0x68 or 0x6a. -- #clock-cells: from common clock binding; shall be set to 1. -- clocks: from common clock binding; list of parent clock handles, - - 5p49v5923 and - 5p49v5925 and - 5p49v6901: (required) either or both of XTAL or CLKIN - reference clock. - - 5p49v5933 and - - 5p49v5935: (optional) property not present (internal - Xtal used) or CLKIN reference - clock. -- clock-names: from common clock binding; clock input names, can be - - 5p49v5923 and - 5p49v5925 and - 5p49v6901: (required) either or both of "xin", "clkin". - - 5p49v5933 and - - 5p49v5935: (optional) property not present or "clkin". - -For all output ports, a corresponding, optional child node named OUT1, -OUT2, etc. can represent a each output, and the node can be used to -specify the following: - -- idt,mode: can be one of the following: - - VC5_LVPECL - - VC5_CMOS - - VC5_HCSL33 - - VC5_LVDS - - VC5_CMOS2 - - VC5_CMOSD - - VC5_HCSL25 - -- idt,voltage-microvolts: can be one of the following - - 1800000 - - 2500000 - - 3300000 -- idt,slew-percent: Percent of normal, can be one of - - 80 - - 85 - - 90 - - 100 - -==Mapping between clock specifier and physical pins== - -When referencing the provided clock in the DT using phandle and -clock specifier, the following mapping applies: - -5P49V5923: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - -5P49V5933: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT4 - -5P49V5925 and -5P49V5935: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - 3 -- OUT3 - 4 -- OUT4 - -5P49V6901: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - 3 -- OUT3 - 4 -- OUT4 - -==Example== - -/* 25MHz reference crystal */ -ref25: ref25m { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <25000000>; -}; - -i2c-master-node { - - /* IDT 5P49V5923 i2c clock generator */ - vc5: clock-generator@6a { - compatible = "idt,5p49v5923"; - reg = <0x6a>; - #clock-cells = <1>; - - /* Connect XIN input to 25MHz reference */ - clocks = <&ref25m>; - clock-names = "xin"; - - OUT1 { - idt,mode = ; - idt,voltage-microvolts = <1800000>; - idt,slew-percent = <80>; - }; - OUT2 { - ... - }; - ... - }; -}; - -/* Consumer referencing the 5P49V5923 pin OUT1 */ -consumer { - ... - clocks = <&vc5 1>; - ... -} diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml new file mode 100644 index 000000000000..3d4e1685cc55 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/idt,versaclock5.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Binding for IDT VersaClock 5 and 6 programmable I2C clock generators + +description: | + The IDT VersaClock 5 and VersaClock 6 are programmable I2C + clock generators providing from 3 to 12 output clocks. + + When referencing the provided clock in the DT using phandle and clock + specifier, the following mapping applies: + + - 5P49V5923: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT2 + + - 5P49V5933: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT4 + + - other parts: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT2 + 3 -- OUT3 + 4 -- OUT4 + +maintainers: + - Luca Ceresoli + +properties: + compatible: + enum: + - idt,5p49v5923 + - idt,5p49v5925 + - idt,5p49v5933 + - idt,5p49v5935 + - idt,5p49v6901 + - idt,5p49v6965 + + reg: + description: I2C device address + enum: [ 0x68, 0x6a ] + + '#clock-cells': + const: 1 + +patternProperties: + "^OUT[1-4]$": + type: object + description: + Description of one of the outputs (OUT1..OUT4). See "Clock1 Output + Configuration" in the Versaclock 5/6/6E Family Register Description + and Programming Guide. + properties: + idt,mode: + description: + The output drive mode. Values defined in dt-bindings/clk/versaclock.h + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 6 + idt,voltage-microvolt: + description: The output drive voltage. + enum: [ 1800000, 2500000, 3300000 ] + idt,slew-percent: + description: The Slew rate control for CMOS single-ended. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 80, 85, 90, 100 ] + +required: + - compatible + - reg + - '#clock-cells' + +allOf: + - if: + properties: + compatible: + enum: + - idt,5p49v5933 + - idt,5p49v5935 + then: + # Devices with builtin crystal + optional external input + properties: + clock-names: + const: clkin + clocks: + maxItems: 1 + else: + # Devices without builtin crystal + properties: + clock-names: + minItems: 1 + maxItems: 2 + items: + enum: [ xin, clkin ] + clocks: + minItems: 1 + maxItems: 2 + required: + - clock-names + - clocks + +examples: + - | + #include + + /* 25MHz reference crystal */ + ref25: ref25m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000000>; + }; + + i2c@0 { + reg = <0x0 0x100>; + #address-cells = <1>; + #size-cells = <0>; + + /* IDT 5P49V5923 I2C clock generator */ + vc5: clock-generator@6a { + compatible = "idt,5p49v5923"; + reg = <0x6a>; + #clock-cells = <1>; + + /* Connect XIN input to 25MHz reference */ + clocks = <&ref25m>; + clock-names = "xin"; + + OUT1 { + idt,drive-mode = ; + idt,voltage-microvolts = <1800000>; + idt,slew-percent = <80>; + }; + + OUT4 { + idt,drive-mode = ; + }; + }; + }; + + /* Consumer referencing the 5P49V5923 pin OUT1 */ + consumer { + /* ... */ + clocks = <&vc5 1>; + /* ... */ + }; + +... diff --git a/MAINTAINERS b/MAINTAINERS index 5aa16c245c63..09d6efd1d0d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8326,6 +8326,7 @@ F: drivers/input/misc/ideapad_slidebar.c IDT VersaClock 5 CLOCK DRIVER M: Luca Ceresoli S: Maintained +F: Documentation/devicetree/bindings/clock/idt,versaclock5.yaml F: drivers/clk/clk-versaclock5.c IEEE 802.15.4 SUBSYSTEM -- cgit From 3bca66b08ec833c4506e97371f2c74e7de00792a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Jul 2020 09:26:03 +0200 Subject: clk: vc5: use a dedicated struct to describe the output drivers Reusing the generic struct vc5_hw_data for all blocks is handy. However it implies we allocate space the div_int and div_frc fields even for the output drivers where they are unused, and the clk_output_cfg0 and clk_output_cfg0_mask fields for all components even though they are used only for the output drivers. Use a dedicated struct for the output drivers so that each block uses exactly the fields it needs, not more. Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20200723072603.1795-1-luca@lucaceresoli.net Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock5.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index 32f0aa64f062..c90460e7ef21 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -167,6 +167,12 @@ struct vc5_hw_data { u32 div_int; u32 div_frc; unsigned int num; +}; + +struct vc5_out_data { + struct clk_hw hw; + struct vc5_driver_data *vc5; + unsigned int num; unsigned int clk_output_cfg0; unsigned int clk_output_cfg0_mask; }; @@ -184,7 +190,7 @@ struct vc5_driver_data { struct clk_hw clk_pfd; struct vc5_hw_data clk_pll; struct vc5_hw_data clk_fod[VC5_MAX_FOD_NUM]; - struct vc5_hw_data clk_out[VC5_MAX_CLK_OUT_NUM]; + struct vc5_out_data clk_out[VC5_MAX_CLK_OUT_NUM]; }; /* @@ -567,7 +573,7 @@ static const struct clk_ops vc5_fod_ops = { static int vc5_clk_out_prepare(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_SELB_NORM | VC5_OUT_DIV_CONTROL_SEL_EXT | @@ -609,7 +615,7 @@ static int vc5_clk_out_prepare(struct clk_hw *hw) static void vc5_clk_out_unprepare(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; /* Disable the clock buffer */ @@ -619,7 +625,7 @@ static void vc5_clk_out_unprepare(struct clk_hw *hw) static unsigned char vc5_clk_out_get_parent(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_SELB_NORM | VC5_OUT_DIV_CONTROL_SEL_EXT | @@ -649,7 +655,7 @@ static unsigned char vc5_clk_out_get_parent(struct clk_hw *hw) static int vc5_clk_out_set_parent(struct clk_hw *hw, u8 index) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_RESET | VC5_OUT_DIV_CONTROL_SELB_NORM | @@ -704,7 +710,7 @@ static int vc5_map_index_to_output(const enum vc5_model model, } static int vc5_update_mode(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -729,7 +735,7 @@ static int vc5_update_mode(struct device_node *np_output, } static int vc5_update_power(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -754,7 +760,7 @@ static int vc5_update_power(struct device_node *np_output, } static int vc5_update_slew(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -782,7 +788,7 @@ static int vc5_update_slew(struct device_node *np_output, } static int vc5_get_output_config(struct i2c_client *client, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { struct device_node *np_output; char *child_name; -- cgit From 667f39b59b494d96ae70f4217637db2ebbee3df0 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:32 -0400 Subject: clk: qcom: gcc: fix sm8150 GPU and NPU clocks Fix the parents and set BRANCH_HALT_SKIP. From the downstream driver it should be a 500us delay and not skip, however this matches what was done for other clocks that had 500us delay in downstream. Fixes: f73a4230d5bb ("clk: qcom: gcc: Add GPU and NPU clocks for SM8150") Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-2-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sm8150.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c index 72524cf11048..55e9d6d75a0c 100644 --- a/drivers/clk/qcom/gcc-sm8150.c +++ b/drivers/clk/qcom/gcc-sm8150.c @@ -1617,6 +1617,7 @@ static struct clk_branch gcc_gpu_cfg_ahb_clk = { }; static struct clk_branch gcc_gpu_gpll0_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(15), @@ -1632,13 +1633,14 @@ static struct clk_branch gcc_gpu_gpll0_clk_src = { }; static struct clk_branch gcc_gpu_gpll0_div_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(16), .hw.init = &(struct clk_init_data){ .name = "gcc_gpu_gpll0_div_clk_src", .parent_hws = (const struct clk_hw *[]){ - &gcc_gpu_gpll0_clk_src.clkr.hw }, + &gpll0_out_even.clkr.hw }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, @@ -1729,6 +1731,7 @@ static struct clk_branch gcc_npu_cfg_ahb_clk = { }; static struct clk_branch gcc_npu_gpll0_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(18), @@ -1744,13 +1747,14 @@ static struct clk_branch gcc_npu_gpll0_clk_src = { }; static struct clk_branch gcc_npu_gpll0_div_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(19), .hw.init = &(struct clk_init_data){ .name = "gcc_npu_gpll0_div_clk_src", .parent_hws = (const struct clk_hw *[]){ - &gcc_npu_gpll0_clk_src.clkr.hw }, + &gpll0_out_even.clkr.hw }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, -- cgit From c8b9002f44e4a1d2771b2f59f6de900864b1f9d7 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:33 -0400 Subject: clk: qcom: clk-alpha-pll: remove unused/incorrect PLL_CAL_VAL 0x44 isn't a register offset, it is the value that goes into CAL_L_VAL. Fixes: 548a909597d5 ("clk: qcom: clk-alpha-pll: Add support for Trion PLLs") Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-3-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 9b2dfa08acb2..1325139173c9 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -56,7 +56,6 @@ #define PLL_STATUS(p) ((p)->offset + (p)->regs[PLL_OFF_STATUS]) #define PLL_OPMODE(p) ((p)->offset + (p)->regs[PLL_OFF_OPMODE]) #define PLL_FRAC(p) ((p)->offset + (p)->regs[PLL_OFF_FRAC]) -#define PLL_CAL_VAL(p) ((p)->offset + (p)->regs[PLL_OFF_CAL_VAL]) const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [CLK_ALPHA_PLL_TYPE_DEFAULT] = { @@ -115,7 +114,6 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_STATUS] = 0x30, [PLL_OFF_OPMODE] = 0x38, [PLL_OFF_ALPHA_VAL] = 0x40, - [PLL_OFF_CAL_VAL] = 0x44, }, [CLK_ALPHA_PLL_TYPE_LUCID] = { [PLL_OFF_L_VAL] = 0x04, -- cgit From 0b01489475c655f8ccce8fa13cc4088954ac5503 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:34 -0400 Subject: clk: qcom: clk-alpha-pll: same regs and ops for trion and lucid Fixed ops were already identical, this adds support for non-fixed ops by sharing between trion and lucid. This also changes the names for trion ops to be consistent with the rest. Note LUCID_PCAL_DONE is renamed to TRION_PCAL_DONE because it is wrong for lucid, LUCID_PCAL_DONE should be BIT(27). Next patch will address this. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-4-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 58 ++++++++++++---------------------------- drivers/clk/qcom/clk-alpha-pll.h | 17 +++++++----- drivers/clk/qcom/gcc-sm8150.c | 8 +++--- 3 files changed, 32 insertions(+), 51 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 1325139173c9..be7ffeae21b1 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -101,21 +101,6 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_FRAC] = 0x38, }, [CLK_ALPHA_PLL_TYPE_TRION] = { - [PLL_OFF_L_VAL] = 0x04, - [PLL_OFF_CAL_L_VAL] = 0x08, - [PLL_OFF_USER_CTL] = 0x0c, - [PLL_OFF_USER_CTL_U] = 0x10, - [PLL_OFF_USER_CTL_U1] = 0x14, - [PLL_OFF_CONFIG_CTL] = 0x18, - [PLL_OFF_CONFIG_CTL_U] = 0x1c, - [PLL_OFF_CONFIG_CTL_U1] = 0x20, - [PLL_OFF_TEST_CTL] = 0x24, - [PLL_OFF_TEST_CTL_U] = 0x28, - [PLL_OFF_STATUS] = 0x30, - [PLL_OFF_OPMODE] = 0x38, - [PLL_OFF_ALPHA_VAL] = 0x40, - }, - [CLK_ALPHA_PLL_TYPE_LUCID] = { [PLL_OFF_L_VAL] = 0x04, [PLL_OFF_CAL_L_VAL] = 0x08, [PLL_OFF_USER_CTL] = 0x0c, @@ -154,9 +139,9 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); #define PLL_OUT_MASK 0x7 #define PLL_RATE_MARGIN 500 -/* LUCID PLL specific settings and offsets */ -#define LUCID_PLL_CAL_VAL 0x44 -#define LUCID_PCAL_DONE BIT(26) +/* TRION PLL specific settings and offsets */ +#define TRION_PLL_CAL_VAL 0x44 +#define TRION_PCAL_DONE BIT(26) #define pll_alpha_width(p) \ ((PLL_ALPHA_VAL_U(p) - PLL_ALPHA_VAL(p) == 4) ? \ @@ -910,14 +895,14 @@ const struct clk_ops clk_alpha_pll_hwfsm_ops = { }; EXPORT_SYMBOL_GPL(clk_alpha_pll_hwfsm_ops); -const struct clk_ops clk_trion_fixed_pll_ops = { +const struct clk_ops clk_alpha_pll_fixed_trion_ops = { .enable = clk_trion_pll_enable, .disable = clk_trion_pll_disable, .is_enabled = clk_trion_pll_is_enabled, .recalc_rate = clk_trion_pll_recalc_rate, .round_rate = clk_alpha_pll_round_rate, }; -EXPORT_SYMBOL_GPL(clk_trion_fixed_pll_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_fixed_trion_ops); static unsigned long clk_alpha_pll_postdiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) @@ -1337,12 +1322,12 @@ clk_trion_pll_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, val << PLL_POST_DIV_SHIFT); } -const struct clk_ops clk_trion_pll_postdiv_ops = { +const struct clk_ops clk_alpha_pll_postdiv_trion_ops = { .recalc_rate = clk_trion_pll_postdiv_recalc_rate, .round_rate = clk_trion_pll_postdiv_round_rate, .set_rate = clk_trion_pll_postdiv_set_rate, }; -EXPORT_SYMBOL_GPL(clk_trion_pll_postdiv_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_trion_ops); static long clk_alpha_pll_postdiv_fabia_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) @@ -1397,13 +1382,13 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops); * @regmap: register map * @config: configuration to apply for pll */ -void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, +void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config) { if (config->l) regmap_write(regmap, PLL_L_VAL(pll), config->l); - regmap_write(regmap, PLL_CAL_L_VAL(pll), LUCID_PLL_CAL_VAL); + regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); if (config->alpha) regmap_write(regmap, PLL_ALPHA_VAL(pll), config->alpha); @@ -1456,13 +1441,13 @@ void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, /* Place the PLL in STANDBY mode */ regmap_update_bits(regmap, PLL_MODE(pll), PLL_RESET_N, PLL_RESET_N); } -EXPORT_SYMBOL_GPL(clk_lucid_pll_configure); +EXPORT_SYMBOL_GPL(clk_trion_pll_configure); /* - * The Lucid PLL requires a power-on self-calibration which happens when the + * The TRION PLL requires a power-on self-calibration which happens when the * PLL comes out of reset. Calibrate in case it is not completed. */ -static int alpha_pll_lucid_prepare(struct clk_hw *hw) +static int alpha_pll_trion_prepare(struct clk_hw *hw) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); u32 regval; @@ -1470,7 +1455,7 @@ static int alpha_pll_lucid_prepare(struct clk_hw *hw) /* Return early if calibration is not needed. */ regmap_read(pll->clkr.regmap, PLL_STATUS(pll), ®val); - if (regval & LUCID_PCAL_DONE) + if (regval & TRION_PCAL_DONE) return 0; /* On/off to calibrate */ @@ -1481,7 +1466,7 @@ static int alpha_pll_lucid_prepare(struct clk_hw *hw) return ret; } -static int alpha_pll_lucid_set_rate(struct clk_hw *hw, unsigned long rate, +static int alpha_pll_trion_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long prate) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); @@ -1535,26 +1520,17 @@ static int alpha_pll_lucid_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } -const struct clk_ops clk_alpha_pll_lucid_ops = { - .prepare = alpha_pll_lucid_prepare, +const struct clk_ops clk_alpha_pll_trion_ops = { + .prepare = alpha_pll_trion_prepare, .enable = clk_trion_pll_enable, .disable = clk_trion_pll_disable, .is_enabled = clk_trion_pll_is_enabled, .recalc_rate = clk_trion_pll_recalc_rate, .round_rate = clk_alpha_pll_round_rate, - .set_rate = alpha_pll_lucid_set_rate, + .set_rate = alpha_pll_trion_set_rate, }; EXPORT_SYMBOL_GPL(clk_alpha_pll_lucid_ops); -const struct clk_ops clk_alpha_pll_fixed_lucid_ops = { - .enable = clk_trion_pll_enable, - .disable = clk_trion_pll_disable, - .is_enabled = clk_trion_pll_is_enabled, - .recalc_rate = clk_trion_pll_recalc_rate, - .round_rate = clk_alpha_pll_round_rate, -}; -EXPORT_SYMBOL_GPL(clk_alpha_pll_fixed_lucid_ops); - const struct clk_ops clk_alpha_pll_postdiv_lucid_ops = { .recalc_rate = clk_alpha_pll_postdiv_fabia_recalc_rate, .round_rate = clk_alpha_pll_postdiv_fabia_round_rate, diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index 1ba82be93dd5..d283ba739057 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -14,7 +14,7 @@ enum { CLK_ALPHA_PLL_TYPE_BRAMMO, CLK_ALPHA_PLL_TYPE_FABIA, CLK_ALPHA_PLL_TYPE_TRION, - CLK_ALPHA_PLL_TYPE_LUCID, + CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION, CLK_ALPHA_PLL_TYPE_MAX, }; @@ -134,18 +134,23 @@ extern const struct clk_ops clk_alpha_pll_fabia_ops; extern const struct clk_ops clk_alpha_pll_fixed_fabia_ops; extern const struct clk_ops clk_alpha_pll_postdiv_fabia_ops; -extern const struct clk_ops clk_alpha_pll_lucid_ops; -extern const struct clk_ops clk_alpha_pll_fixed_lucid_ops; +extern const struct clk_ops clk_alpha_pll_trion_ops; +extern const struct clk_ops clk_alpha_pll_fixed_trion_ops; +extern const struct clk_ops clk_alpha_pll_postdiv_trion_ops; + +#define clk_alpha_pll_lucid_ops clk_alpha_pll_trion_ops +#define clk_alpha_pll_fixed_lucid_ops clk_alpha_pll_fixed_trion_ops extern const struct clk_ops clk_alpha_pll_postdiv_lucid_ops; void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); -void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, +void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); +#define clk_lucid_pll_configure(pll, regmap, config) \ + clk_trion_pll_configure(pll, regmap, config) + -extern const struct clk_ops clk_trion_fixed_pll_ops; -extern const struct clk_ops clk_trion_pll_postdiv_ops; #endif diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c index 55e9d6d75a0c..d7778def37da 100644 --- a/drivers/clk/qcom/gcc-sm8150.c +++ b/drivers/clk/qcom/gcc-sm8150.c @@ -53,7 +53,7 @@ static struct clk_alpha_pll gpll0 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; @@ -79,7 +79,7 @@ static struct clk_alpha_pll_postdiv gpll0_out_even = { .hw = &gpll0.clkr.hw, }, .num_parents = 1, - .ops = &clk_trion_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_trion_ops, }, }; @@ -98,7 +98,7 @@ static struct clk_alpha_pll gpll7 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; @@ -118,7 +118,7 @@ static struct clk_alpha_pll gpll9 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; -- cgit From d28b503c248df8a3b4c73b504a043bdf7e2d5207 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:35 -0400 Subject: clk: qcom: clk-alpha-pll: use the right PCAL_DONE value for lucid pll Lucid PCAL_DONE is different from trion. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-5-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 28 ++++++++++++++++++++++++++-- drivers/clk/qcom/clk-alpha-pll.h | 2 +- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index be7ffeae21b1..26139ef005e4 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -143,6 +143,9 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); #define TRION_PLL_CAL_VAL 0x44 #define TRION_PCAL_DONE BIT(26) +/* LUCID PLL specific settings and offsets */ +#define LUCID_PCAL_DONE BIT(27) + #define pll_alpha_width(p) \ ((PLL_ALPHA_VAL_U(p) - PLL_ALPHA_VAL(p) == 4) ? \ ALPHA_REG_BITWIDTH : ALPHA_REG_16BIT_WIDTH) @@ -1447,7 +1450,7 @@ EXPORT_SYMBOL_GPL(clk_trion_pll_configure); * The TRION PLL requires a power-on self-calibration which happens when the * PLL comes out of reset. Calibrate in case it is not completed. */ -static int alpha_pll_trion_prepare(struct clk_hw *hw) +static int __alpha_pll_trion_prepare(struct clk_hw *hw, u32 pcal_done) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); u32 regval; @@ -1455,7 +1458,7 @@ static int alpha_pll_trion_prepare(struct clk_hw *hw) /* Return early if calibration is not needed. */ regmap_read(pll->clkr.regmap, PLL_STATUS(pll), ®val); - if (regval & TRION_PCAL_DONE) + if (regval & pcal_done) return 0; /* On/off to calibrate */ @@ -1466,6 +1469,16 @@ static int alpha_pll_trion_prepare(struct clk_hw *hw) return ret; } +static int alpha_pll_trion_prepare(struct clk_hw *hw) +{ + return __alpha_pll_trion_prepare(hw, TRION_PCAL_DONE); +} + +static int alpha_pll_lucid_prepare(struct clk_hw *hw) +{ + return __alpha_pll_trion_prepare(hw, LUCID_PCAL_DONE); +} + static int alpha_pll_trion_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long prate) { @@ -1529,6 +1542,17 @@ const struct clk_ops clk_alpha_pll_trion_ops = { .round_rate = clk_alpha_pll_round_rate, .set_rate = alpha_pll_trion_set_rate, }; +EXPORT_SYMBOL_GPL(clk_alpha_pll_trion_ops); + +const struct clk_ops clk_alpha_pll_lucid_ops = { + .prepare = alpha_pll_lucid_prepare, + .enable = clk_trion_pll_enable, + .disable = clk_trion_pll_disable, + .is_enabled = clk_trion_pll_is_enabled, + .recalc_rate = clk_trion_pll_recalc_rate, + .round_rate = clk_alpha_pll_round_rate, + .set_rate = alpha_pll_trion_set_rate, +}; EXPORT_SYMBOL_GPL(clk_alpha_pll_lucid_ops); const struct clk_ops clk_alpha_pll_postdiv_lucid_ops = { diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index d283ba739057..d3201b87c0cd 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -138,7 +138,7 @@ extern const struct clk_ops clk_alpha_pll_trion_ops; extern const struct clk_ops clk_alpha_pll_fixed_trion_ops; extern const struct clk_ops clk_alpha_pll_postdiv_trion_ops; -#define clk_alpha_pll_lucid_ops clk_alpha_pll_trion_ops +extern const struct clk_ops clk_alpha_pll_lucid_ops; #define clk_alpha_pll_fixed_lucid_ops clk_alpha_pll_fixed_trion_ops extern const struct clk_ops clk_alpha_pll_postdiv_lucid_ops; -- cgit From 3f6b25062587cd18ef01bf67ca67e601e6abde94 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:36 -0400 Subject: clk: qcom: gcc: remove unnecessary vco_table from SM8150 The fixed alpha pll ops only use it for clamping in round_rate, which is unnecessary. This is consistent with SM8250 GCC not using vco_table. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-6-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sm8150.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c index d7778def37da..8e9b5b3cceaf 100644 --- a/drivers/clk/qcom/gcc-sm8150.c +++ b/drivers/clk/qcom/gcc-sm8150.c @@ -34,14 +34,8 @@ enum { P_SLEEP_CLK, }; -static const struct pll_vco trion_vco[] = { - { 249600000, 2000000000, 0 }, -}; - static struct clk_alpha_pll gpll0 = { .offset = 0x0, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, @@ -85,8 +79,6 @@ static struct clk_alpha_pll_postdiv gpll0_out_even = { static struct clk_alpha_pll gpll7 = { .offset = 0x1a000, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, @@ -105,8 +97,6 @@ static struct clk_alpha_pll gpll7 = { static struct clk_alpha_pll gpll9 = { .offset = 0x1c000, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, -- cgit From 23e2653ee649125d1fddd1b16c2d2ca95c684631 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:37 -0400 Subject: dt-bindings: clock: combine qcom,sdm845-gpucc and qcom,sc7180-gpucc These two bindings are almost identical, so combine them into one. This will make it easier to add the sm8150 and sm8250 gpucc bindings. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-7-jonathan@marek.ca Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/qcom,gpucc.yaml | 78 ++++++++++++++++++++++ .../bindings/clock/qcom,sc7180-gpucc.yaml | 74 -------------------- .../bindings/clock/qcom,sdm845-gpucc.yaml | 74 -------------------- 3 files changed, 78 insertions(+), 148 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/qcom,gpucc.yaml delete mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml delete mode 100644 Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml new file mode 100644 index 000000000000..aab6bef79771 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,gpucc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Graphics Clock & Reset Controller Binding + +maintainers: + - Taniya Das + +description: | + Qualcomm graphics clock control module which supports the clocks, resets and + power domains on SDM845/SC7180. + + See also: + dt-bindings/clock/qcom,gpucc-sdm845.h + dt-bindings/clock/qcom,gpucc-sc7180.h + +properties: + compatible: + enum: + - qcom,sdm845-gpucc + - qcom,sc7180-gpucc + + clocks: + items: + - description: Board XO source + - description: GPLL0 main branch source + - description: GPLL0 div branch source + + clock-names: + items: + - const: bi_tcxo + - const: gcc_gpu_gpll0_clk_src + - const: gcc_gpu_gpll0_div_clk_src + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#reset-cells' + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + #include + #include + clock-controller@5090000 { + compatible = "qcom,sdm845-gpucc"; + reg = <0x05090000 0x9000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_GPU_GPLL0_CLK_SRC>, + <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; + clock-names = "bi_tcxo", + "gcc_gpu_gpll0_clk_src", + "gcc_gpu_gpll0_div_clk_src"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml deleted file mode 100644 index fe08461fce05..000000000000 --- a/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/clock/qcom,sc7180-gpucc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm Graphics Clock & Reset Controller Binding for SC7180 - -maintainers: - - Taniya Das - -description: | - Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SC7180. - - See also dt-bindings/clock/qcom,gpucc-sc7180.h. - -properties: - compatible: - const: qcom,sc7180-gpucc - - clocks: - items: - - description: Board XO source - - description: GPLL0 main branch source - - description: GPLL0 div branch source - - clock-names: - items: - - const: bi_tcxo - - const: gcc_gpu_gpll0_clk_src - - const: gcc_gpu_gpll0_div_clk_src - - '#clock-cells': - const: 1 - - '#reset-cells': - const: 1 - - '#power-domain-cells': - const: 1 - - reg: - maxItems: 1 - -required: - - compatible - - reg - - clocks - - clock-names - - '#clock-cells' - - '#reset-cells' - - '#power-domain-cells' - -additionalProperties: false - -examples: - - | - #include - #include - clock-controller@5090000 { - compatible = "qcom,sc7180-gpucc"; - reg = <0x05090000 0x9000>; - clocks = <&rpmhcc RPMH_CXO_CLK>, - <&gcc GCC_GPU_GPLL0_CLK_SRC>, - <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; - clock-names = "bi_tcxo", - "gcc_gpu_gpll0_clk_src", - "gcc_gpu_gpll0_div_clk_src"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; -... diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml deleted file mode 100644 index 8a0c576ba8b3..000000000000 --- a/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/clock/qcom,sdm845-gpucc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm Graphics Clock & Reset Controller Binding for SDM845 - -maintainers: - - Taniya Das - -description: | - Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SDM845. - - See also dt-bindings/clock/qcom,gpucc-sdm845.h. - -properties: - compatible: - const: qcom,sdm845-gpucc - - clocks: - items: - - description: Board XO source - - description: GPLL0 main branch source - - description: GPLL0 div branch source - - clock-names: - items: - - const: bi_tcxo - - const: gcc_gpu_gpll0_clk_src - - const: gcc_gpu_gpll0_div_clk_src - - '#clock-cells': - const: 1 - - '#reset-cells': - const: 1 - - '#power-domain-cells': - const: 1 - - reg: - maxItems: 1 - -required: - - compatible - - reg - - clocks - - clock-names - - '#clock-cells' - - '#reset-cells' - - '#power-domain-cells' - -additionalProperties: false - -examples: - - | - #include - #include - clock-controller@5090000 { - compatible = "qcom,sdm845-gpucc"; - reg = <0x05090000 0x9000>; - clocks = <&rpmhcc RPMH_CXO_CLK>, - <&gcc GCC_GPU_GPLL0_CLK_SRC>, - <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; - clock-names = "bi_tcxo", - "gcc_gpu_gpll0_clk_src", - "gcc_gpu_gpll0_div_clk_src"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; -... -- cgit From f793e45494586f742410f17539f1ea4156ea7bf9 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:38 -0400 Subject: dt-bindings: clock: add SM8150 QCOM Graphics clock bindings Add device tree bindings for graphics clock controller for Qualcomm Technology Inc's SM8150 SoCs. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-8-jonathan@marek.ca Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/qcom,gpucc.yaml | 4 ++- include/dt-bindings/clock/qcom,gpucc-sm8150.h | 33 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 include/dt-bindings/clock/qcom,gpucc-sm8150.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml index aab6bef79771..3e064ed0e0ea 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -11,17 +11,19 @@ maintainers: description: | Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SDM845/SC7180. + power domains on SDM845/SC7180/SM8150. See also: dt-bindings/clock/qcom,gpucc-sdm845.h dt-bindings/clock/qcom,gpucc-sc7180.h + dt-bindings/clock/qcom,gpucc-sm8150.h properties: compatible: enum: - qcom,sdm845-gpucc - qcom,sc7180-gpucc + - qcom,sm8150-gpucc clocks: items: diff --git a/include/dt-bindings/clock/qcom,gpucc-sm8150.h b/include/dt-bindings/clock/qcom,gpucc-sm8150.h new file mode 100644 index 000000000000..c5b70aad7770 --- /dev/null +++ b/include/dt-bindings/clock/qcom,gpucc-sm8150.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8150_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8150_H + +/* GPU_CC clock registers */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_APB_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GMU_CLK 8 +#define GPU_CC_PLL1 9 + +/* GPU_CC Resets */ +#define GPUCC_GPU_CC_CX_BCR 0 +#define GPUCC_GPU_CC_GFX3D_AON_BCR 1 +#define GPUCC_GPU_CC_GMU_BCR 2 +#define GPUCC_GPU_CC_GX_BCR 3 +#define GPUCC_GPU_CC_SPDM_BCR 4 +#define GPUCC_GPU_CC_XO_BCR 5 + +/* GPU_CC GDSCRs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif -- cgit From 324e0bfcfb005f161bbb31ea21ddad0f1bc8f400 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:39 -0400 Subject: dt-bindings: clock: add SM8250 QCOM Graphics clock bindings Add device tree bindings for graphics clock controller for Qualcomm Technology Inc's SM8250 SoCs. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-9-jonathan@marek.ca Reviewed-by: Rob Herring Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/qcom,gpucc.yaml | 4 ++- include/dt-bindings/clock/qcom,gpucc-sm8250.h | 34 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 include/dt-bindings/clock/qcom,gpucc-sm8250.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml index 3e064ed0e0ea..df943c4c3234 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -11,12 +11,13 @@ maintainers: description: | Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SDM845/SC7180/SM8150. + power domains on SDM845/SC7180/SM8150/SM8250. See also: dt-bindings/clock/qcom,gpucc-sdm845.h dt-bindings/clock/qcom,gpucc-sc7180.h dt-bindings/clock/qcom,gpucc-sm8150.h + dt-bindings/clock/qcom,gpucc-sm8250.h properties: compatible: @@ -24,6 +25,7 @@ properties: - qcom,sdm845-gpucc - qcom,sc7180-gpucc - qcom,sm8150-gpucc + - qcom,sm8250-gpucc clocks: items: diff --git a/include/dt-bindings/clock/qcom,gpucc-sm8250.h b/include/dt-bindings/clock/qcom,gpucc-sm8250.h new file mode 100644 index 000000000000..dc8e387c48ad --- /dev/null +++ b/include/dt-bindings/clock/qcom,gpucc-sm8250.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8250_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8250_H + +/* GPU_CC clock registers */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_APB_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GMU_CLK 8 +#define GPU_CC_PLL1 9 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 10 + +/* GPU_CC Resets */ +#define GPUCC_GPU_CC_ACD_BCR 0 +#define GPUCC_GPU_CC_CX_BCR 1 +#define GPUCC_GPU_CC_GFX3D_AON_BCR 2 +#define GPUCC_GPU_CC_GMU_BCR 3 +#define GPUCC_GPU_CC_GX_BCR 4 +#define GPUCC_GPU_CC_XO_BCR 5 + +/* GPU_CC GDSCRs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif -- cgit From 0638226dd0953c0c34f8df203b6c32d6066ceb65 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:40 -0400 Subject: clk: qcom: add common gdsc_gx_do_nothing_enable for gpucc drivers All gpucc drivers need this, so move it to common code instead of duplicating it in every gpucc driver. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-10-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gdsc.c | 25 +++++++++++++++++++++++++ drivers/clk/qcom/gdsc.h | 1 + drivers/clk/qcom/gpucc-sc7180.c | 27 +-------------------------- drivers/clk/qcom/gpucc-sdm845.c | 27 +-------------------------- 4 files changed, 28 insertions(+), 52 deletions(-) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 04944f11659b..628397703717 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -433,3 +433,28 @@ void gdsc_unregister(struct gdsc_desc *desc) } of_genpd_del_provider(dev->of_node); } + +/* + * On SDM845+ the GPU GX domain is *almost* entirely controlled by the GMU + * running in the CX domain so the CPU doesn't need to know anything about the + * GX domain EXCEPT.... + * + * Hardware constraints dictate that the GX be powered down before the CX. If + * the GMU crashes it could leave the GX on. In order to successfully bring back + * the device the CPU needs to disable the GX headswitch. There being no sane + * way to reach in and touch that register from deep inside the GPU driver we + * need to set up the infrastructure to be able to ensure that the GPU can + * ensure that the GX is off during this super special case. We do this by + * defining a GX gdsc with a dummy enable function and a "default" disable + * function. + * + * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU + * driver. During power up, nothing will happen from the CPU (and the GMU will + * power up normally but during power down this will ensure that the GX domain + * is *really* off - this gives us a semi standard way of doing what we need. + */ +int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain) +{ + /* Do nothing but give genpd the impression that we were successful */ + return 0; +} diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index c36fc26dcdff..1896bfb2bbd1 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -68,6 +68,7 @@ struct gdsc_desc { int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *, struct regmap *); void gdsc_unregister(struct gdsc_desc *desc); +int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain); #else static inline int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *rcdev, diff --git a/drivers/clk/qcom/gpucc-sc7180.c b/drivers/clk/qcom/gpucc-sc7180.c index 7b656b6aeced..88a739b6fec3 100644 --- a/drivers/clk/qcom/gpucc-sc7180.c +++ b/drivers/clk/qcom/gpucc-sc7180.c @@ -170,37 +170,12 @@ static struct gdsc cx_gdsc = { .flags = VOTABLE, }; -/* - * On SC7180 the GPU GX domain is *almost* entirely controlled by the GMU - * running in the CX domain so the CPU doesn't need to know anything about the - * GX domain EXCEPT.... - * - * Hardware constraints dictate that the GX be powered down before the CX. If - * the GMU crashes it could leave the GX on. In order to successfully bring back - * the device the CPU needs to disable the GX headswitch. There being no sane - * way to reach in and touch that register from deep inside the GPU driver we - * need to set up the infrastructure to be able to ensure that the GPU can - * ensure that the GX is off during this super special case. We do this by - * defining a GX gdsc with a dummy enable function and a "default" disable - * function. - * - * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU - * driver. During power up, nothing will happen from the CPU (and the GMU will - * power up normally but during power down this will ensure that the GX domain - * is *really* off - this gives us a semi standard way of doing what we need. - */ -static int gx_gdsc_enable(struct generic_pm_domain *domain) -{ - /* Do nothing but give genpd the impression that we were successful */ - return 0; -} - static struct gdsc gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, .pd = { .name = "gx_gdsc", - .power_on = gx_gdsc_enable, + .power_on = gdsc_gx_do_nothing_enable, }, .pwrsts = PWRSTS_OFF_ON, .flags = CLAMP_IO, diff --git a/drivers/clk/qcom/gpucc-sdm845.c b/drivers/clk/qcom/gpucc-sdm845.c index e40efba1bf7d..5663698b306b 100644 --- a/drivers/clk/qcom/gpucc-sdm845.c +++ b/drivers/clk/qcom/gpucc-sdm845.c @@ -131,37 +131,12 @@ static struct gdsc gpu_cx_gdsc = { .flags = VOTABLE, }; -/* - * On SDM845 the GPU GX domain is *almost* entirely controlled by the GMU - * running in the CX domain so the CPU doesn't need to know anything about the - * GX domain EXCEPT.... - * - * Hardware constraints dictate that the GX be powered down before the CX. If - * the GMU crashes it could leave the GX on. In order to successfully bring back - * the device the CPU needs to disable the GX headswitch. There being no sane - * way to reach in and touch that register from deep inside the GPU driver we - * need to set up the infrastructure to be able to ensure that the GPU can - * ensure that the GX is off during this super special case. We do this by - * defining a GX gdsc with a dummy enable function and a "default" disable - * function. - * - * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU - * driver. During power up, nothing will happen from the CPU (and the GMU will - * power up normally but during power down this will ensure that the GX domain - * is *really* off - this gives us a semi standard way of doing what we need. - */ -static int gx_gdsc_enable(struct generic_pm_domain *domain) -{ - /* Do nothing but give genpd the impression that we were successful */ - return 0; -} - static struct gdsc gpu_gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, .pd = { .name = "gpu_gx_gdsc", - .power_on = gx_gdsc_enable, + .power_on = gdsc_gx_do_nothing_enable, }, .pwrsts = PWRSTS_OFF_ON, .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, -- cgit From 0cef71f2ccc84dd85a60b312343f1973f149e2d3 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:41 -0400 Subject: clk: qcom: Add graphics clock controller driver for SM8150 Add support for the graphics clock controller found on SM8150 based devices. This is initially copied from the downstream kernel, but has been modified to more closely match the upstream sc7180 driver. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-11-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 8 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gpucc-sm8150.c | 320 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 329 insertions(+) create mode 100644 drivers/clk/qcom/gpucc-sm8150.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 318c0adfaae1..9643de4465dc 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -419,6 +419,14 @@ config SM_GCC_8250 Say Y if you want to use peripheral devices such as UART, SPI, I2C, USB, SD/UFS, PCIe etc. +config SM_GPUCC_8150 + tristate "SM8150 Graphics Clock Controller" + select SM_GCC_8150 + help + Support for the graphics clock controller on SM8150 devices. + Say Y if you want to support graphics controller devices and + functionality such as 3D graphics. + config SPMI_PMIC_CLKDIV tristate "SPMI PMIC clkdiv Support" depends on SPMI || COMPILE_TEST diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index ae0979bebe18..02c87943f9fd 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o +obj-$(CONFIG_SM_GPUCC_8150) += gpucc-sm8150.o obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o obj-$(CONFIG_QCOM_HFPLL) += hfpll.o diff --git a/drivers/clk/qcom/gpucc-sm8150.c b/drivers/clk/qcom/gpucc-sm8150.c new file mode 100644 index 000000000000..27c40754b2c7 --- /dev/null +++ b/drivers/clk/qcom/gpucc-sm8150.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include + +#include "common.h" +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-pll.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "reset.h" +#include "gdsc.h" + +enum { + P_BI_TCXO, + P_CORE_BI_PLL_TEST_SE, + P_GPLL0_OUT_MAIN, + P_GPLL0_OUT_MAIN_DIV, + P_GPU_CC_PLL1_OUT_MAIN, +}; + +static const struct pll_vco trion_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static struct alpha_pll_config gpu_cc_pll1_config = { + .l = 0x1a, + .alpha = 0xaaa, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002267, + .config_ctl_hi1_val = 0x00000024, + .test_ctl_val = 0x00000000, + .test_ctl_hi_val = 0x00000002, + .test_ctl_hi1_val = 0x00000000, + .user_ctl_val = 0x00000000, + .user_ctl_hi_val = 0x00000805, + .user_ctl_hi1_val = 0x000000d0, +}; + +static struct clk_alpha_pll gpu_cc_pll1 = { + .offset = 0x100, + .vco_table = trion_vco, + .num_vco = ARRAY_SIZE(trion_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_pll1", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_trion_ops, + }, + }, +}; + +static const struct parent_map gpu_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_GPU_CC_PLL1_OUT_MAIN, 3 }, + { P_GPLL0_OUT_MAIN, 5 }, + { P_GPLL0_OUT_MAIN_DIV, 6 }, +}; + +static const struct clk_parent_data gpu_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpu_cc_pll1.clkr.hw }, + { .fw_name = "gcc_gpu_gpll0_clk_src" }, + { .fw_name = "gcc_gpu_gpll0_div_clk_src" }, +}; + +static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(200000000, P_GPLL0_OUT_MAIN_DIV, 1.5, 0, 0), + F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gpu_cc_gmu_clk_src = { + .cmd_rcgr = 0x1120, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gpu_cc_parent_map_0, + .freq_tbl = ftbl_gpu_cc_gmu_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gmu_clk_src", + .parent_data = gpu_cc_parent_data_0, + .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0), + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch gpu_cc_ahb_clk = { + .halt_reg = 0x1078, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x1078, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_crc_ahb_clk = { + .halt_reg = 0x107c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x107c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_crc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_apb_clk = { + .halt_reg = 0x1088, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1088, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_apb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_gmu_clk = { + .halt_reg = 0x1098, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1098, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_snoc_dvm_clk = { + .halt_reg = 0x108c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x108c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_snoc_dvm_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_aon_clk = { + .halt_reg = 0x1004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_aon_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_clk = { + .halt_reg = 0x109c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x109c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_gx_gmu_clk = { + .halt_reg = 0x1064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct gdsc gpu_cx_gdsc = { + .gdscr = 0x106c, + .gds_hw_ctrl = 0x1540, + .pd = { + .name = "gpu_cx_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc gpu_gx_gdsc = { + .gdscr = 0x100c, + .clamp_io_ctrl = 0x1508, + .pd = { + .name = "gpu_gx_gdsc", + .power_on = gdsc_gx_do_nothing_enable, + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, +}; + +static struct clk_regmap *gpu_cc_sm8150_clocks[] = { + [GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr, + [GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr, + [GPU_CC_CX_APB_CLK] = &gpu_cc_cx_apb_clk.clkr, + [GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr, + [GPU_CC_CX_SNOC_DVM_CLK] = &gpu_cc_cx_snoc_dvm_clk.clkr, + [GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr, + [GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr, + [GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr, + [GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr, + [GPU_CC_PLL1] = &gpu_cc_pll1.clkr, +}; + +static const struct qcom_reset_map gpu_cc_sm8150_resets[] = { + [GPUCC_GPU_CC_CX_BCR] = { 0x1068 }, + [GPUCC_GPU_CC_GMU_BCR] = { 0x111c }, + [GPUCC_GPU_CC_GX_BCR] = { 0x1008 }, + [GPUCC_GPU_CC_SPDM_BCR] = { 0x1110 }, + [GPUCC_GPU_CC_XO_BCR] = { 0x1000 }, +}; + +static struct gdsc *gpu_cc_sm8150_gdscs[] = { + [GPU_CX_GDSC] = &gpu_cx_gdsc, + [GPU_GX_GDSC] = &gpu_gx_gdsc, +}; + +static const struct regmap_config gpu_cc_sm8150_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x8008, + .fast_io = true, +}; + +static const struct qcom_cc_desc gpu_cc_sm8150_desc = { + .config = &gpu_cc_sm8150_regmap_config, + .clks = gpu_cc_sm8150_clocks, + .num_clks = ARRAY_SIZE(gpu_cc_sm8150_clocks), + .resets = gpu_cc_sm8150_resets, + .num_resets = ARRAY_SIZE(gpu_cc_sm8150_resets), + .gdscs = gpu_cc_sm8150_gdscs, + .num_gdscs = ARRAY_SIZE(gpu_cc_sm8150_gdscs), +}; + +static const struct of_device_id gpu_cc_sm8150_match_table[] = { + { .compatible = "qcom,sm8150-gpucc" }, + { } +}; +MODULE_DEVICE_TABLE(of, gpu_cc_sm8150_match_table); + +static int gpu_cc_sm8150_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + + regmap = qcom_cc_map(pdev, &gpu_cc_sm8150_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + clk_trion_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config); + + return qcom_cc_really_probe(pdev, &gpu_cc_sm8150_desc, regmap); +} + +static struct platform_driver gpu_cc_sm8150_driver = { + .probe = gpu_cc_sm8150_probe, + .driver = { + .name = "sm8150-gpucc", + .of_match_table = gpu_cc_sm8150_match_table, + }, +}; + +static int __init gpu_cc_sm8150_init(void) +{ + return platform_driver_register(&gpu_cc_sm8150_driver); +} +subsys_initcall(gpu_cc_sm8150_init); + +static void __exit gpu_cc_sm8150_exit(void) +{ + platform_driver_unregister(&gpu_cc_sm8150_driver); +} +module_exit(gpu_cc_sm8150_exit); + +MODULE_DESCRIPTION("QTI GPUCC SM8150 Driver"); +MODULE_LICENSE("GPL v2"); -- cgit From 28f0769c772bb0c431e2833978474d4dfe3754a7 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Jul 2020 09:52:42 -0400 Subject: clk: qcom: Add graphics clock controller driver for SM8250 Add support for the graphics clock controller found on SM8250 based devices. This is initially copied from the downstream kernel, but has been modified to more closely match the upstream sc7180 driver. Signed-off-by: Jonathan Marek Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20200709135251.643-12-jonathan@marek.ca Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 8 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gpucc-sm8250.c | 348 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 drivers/clk/qcom/gpucc-sm8250.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 9643de4465dc..cd0f01136039 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -427,6 +427,14 @@ config SM_GPUCC_8150 Say Y if you want to support graphics controller devices and functionality such as 3D graphics. +config SM_GPUCC_8250 + tristate "SM8250 Graphics Clock Controller" + select SM_GCC_8250 + help + Support for the graphics clock controller on SM8250 devices. + Say Y if you want to support graphics controller devices and + functionality such as 3D graphics. + config SPMI_PMIC_CLKDIV tristate "SPMI PMIC clkdiv Support" depends on SPMI || COMPILE_TEST diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 02c87943f9fd..1e9a21882ed5 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o obj-$(CONFIG_SM_GPUCC_8150) += gpucc-sm8150.o +obj-$(CONFIG_SM_GPUCC_8250) += gpucc-sm8250.o obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o obj-$(CONFIG_QCOM_HFPLL) += hfpll.o diff --git a/drivers/clk/qcom/gpucc-sm8250.c b/drivers/clk/qcom/gpucc-sm8250.c new file mode 100644 index 000000000000..3fa7d1f9ff98 --- /dev/null +++ b/drivers/clk/qcom/gpucc-sm8250.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include + +#include "common.h" +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-pll.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "reset.h" +#include "gdsc.h" + +#define CX_GMU_CBCR_SLEEP_MASK 0xf +#define CX_GMU_CBCR_SLEEP_SHIFT 4 +#define CX_GMU_CBCR_WAKE_MASK 0xf +#define CX_GMU_CBCR_WAKE_SHIFT 8 + +enum { + P_BI_TCXO, + P_CORE_BI_PLL_TEST_SE, + P_GPLL0_OUT_MAIN, + P_GPLL0_OUT_MAIN_DIV, + P_GPU_CC_PLL0_OUT_MAIN, + P_GPU_CC_PLL1_OUT_MAIN, +}; + +static struct pll_vco lucid_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static const struct alpha_pll_config gpu_cc_pll1_config = { + .l = 0x1a, + .alpha = 0xaaa, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002261, + .config_ctl_hi1_val = 0x029a699c, + .user_ctl_val = 0x00000000, + .user_ctl_hi_val = 0x00000805, + .user_ctl_hi1_val = 0x00000000, +}; + +static struct clk_alpha_pll gpu_cc_pll1 = { + .offset = 0x100, + .vco_table = lucid_vco, + .num_vco = ARRAY_SIZE(lucid_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_pll1", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_lucid_ops, + }, + }, +}; + +static const struct parent_map gpu_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_GPU_CC_PLL1_OUT_MAIN, 3 }, + { P_GPLL0_OUT_MAIN, 5 }, + { P_GPLL0_OUT_MAIN_DIV, 6 }, +}; + +static const struct clk_parent_data gpu_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpu_cc_pll1.clkr.hw }, + { .fw_name = "gcc_gpu_gpll0_clk_src" }, + { .fw_name = "gcc_gpu_gpll0_div_clk_src" }, +}; + +static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(200000000, P_GPLL0_OUT_MAIN_DIV, 1.5, 0, 0), + F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gpu_cc_gmu_clk_src = { + .cmd_rcgr = 0x1120, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gpu_cc_parent_map_0, + .freq_tbl = ftbl_gpu_cc_gmu_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gmu_clk_src", + .parent_data = gpu_cc_parent_data_0, + .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0), + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch gpu_cc_ahb_clk = { + .halt_reg = 0x1078, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x1078, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_crc_ahb_clk = { + .halt_reg = 0x107c, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x107c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_crc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_apb_clk = { + .halt_reg = 0x1088, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x1088, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_apb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_gmu_clk = { + .halt_reg = 0x1098, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1098, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_snoc_dvm_clk = { + .halt_reg = 0x108c, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x108c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_snoc_dvm_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_aon_clk = { + .halt_reg = 0x1004, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x1004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_aon_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_clk = { + .halt_reg = 0x109c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x109c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_gx_gmu_clk = { + .halt_reg = 0x1064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_hlos1_vote_gpu_smmu_clk = { + .halt_reg = 0x5000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x5000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_hlos1_vote_gpu_smmu_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct gdsc gpu_cx_gdsc = { + .gdscr = 0x106c, + .gds_hw_ctrl = 0x1540, + .pd = { + .name = "gpu_cx_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc gpu_gx_gdsc = { + .gdscr = 0x100c, + .clamp_io_ctrl = 0x1508, + .pd = { + .name = "gpu_gx_gdsc", + .power_on = gdsc_gx_do_nothing_enable, + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, +}; + +static struct clk_regmap *gpu_cc_sm8250_clocks[] = { + [GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr, + [GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr, + [GPU_CC_CX_APB_CLK] = &gpu_cc_cx_apb_clk.clkr, + [GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr, + [GPU_CC_CX_SNOC_DVM_CLK] = &gpu_cc_cx_snoc_dvm_clk.clkr, + [GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr, + [GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr, + [GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr, + [GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr, + [GPU_CC_PLL1] = &gpu_cc_pll1.clkr, + [GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK] = &gpu_cc_hlos1_vote_gpu_smmu_clk.clkr, +}; + +static const struct qcom_reset_map gpu_cc_sm8250_resets[] = { + [GPUCC_GPU_CC_ACD_BCR] = { 0x1160 }, + [GPUCC_GPU_CC_CX_BCR] = { 0x1068 }, + [GPUCC_GPU_CC_GFX3D_AON_BCR] = { 0x10a0 }, + [GPUCC_GPU_CC_GMU_BCR] = { 0x111c }, + [GPUCC_GPU_CC_GX_BCR] = { 0x1008 }, + [GPUCC_GPU_CC_XO_BCR] = { 0x1000 }, +}; + +static struct gdsc *gpu_cc_sm8250_gdscs[] = { + [GPU_CX_GDSC] = &gpu_cx_gdsc, + [GPU_GX_GDSC] = &gpu_gx_gdsc, +}; + +static const struct regmap_config gpu_cc_sm8250_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x8008, + .fast_io = true, +}; + +static const struct qcom_cc_desc gpu_cc_sm8250_desc = { + .config = &gpu_cc_sm8250_regmap_config, + .clks = gpu_cc_sm8250_clocks, + .num_clks = ARRAY_SIZE(gpu_cc_sm8250_clocks), + .resets = gpu_cc_sm8250_resets, + .num_resets = ARRAY_SIZE(gpu_cc_sm8250_resets), + .gdscs = gpu_cc_sm8250_gdscs, + .num_gdscs = ARRAY_SIZE(gpu_cc_sm8250_gdscs), +}; + +static const struct of_device_id gpu_cc_sm8250_match_table[] = { + { .compatible = "qcom,sm8250-gpucc" }, + { } +}; +MODULE_DEVICE_TABLE(of, gpu_cc_sm8250_match_table); + +static int gpu_cc_sm8250_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + unsigned int value, mask; + + regmap = qcom_cc_map(pdev, &gpu_cc_sm8250_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + clk_lucid_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config); + + /* + * Configure gpu_cc_cx_gmu_clk with recommended + * wakeup/sleep settings + */ + mask = CX_GMU_CBCR_WAKE_MASK << CX_GMU_CBCR_WAKE_SHIFT; + mask |= CX_GMU_CBCR_SLEEP_MASK << CX_GMU_CBCR_SLEEP_SHIFT; + value = 0xf << CX_GMU_CBCR_WAKE_SHIFT | 0xf << CX_GMU_CBCR_SLEEP_SHIFT; + regmap_update_bits(regmap, 0x1098, mask, value); + + return qcom_cc_really_probe(pdev, &gpu_cc_sm8250_desc, regmap); +} + +static struct platform_driver gpu_cc_sm8250_driver = { + .probe = gpu_cc_sm8250_probe, + .driver = { + .name = "sm8250-gpucc", + .of_match_table = gpu_cc_sm8250_match_table, + }, +}; + +static int __init gpu_cc_sm8250_init(void) +{ + return platform_driver_register(&gpu_cc_sm8250_driver); +} +subsys_initcall(gpu_cc_sm8250_init); + +static void __exit gpu_cc_sm8250_exit(void) +{ + platform_driver_unregister(&gpu_cc_sm8250_driver); +} +module_exit(gpu_cc_sm8250_exit); + +MODULE_DESCRIPTION("QTI GPU_CC SM8250 Driver"); +MODULE_LICENSE("GPL v2"); -- cgit From 9f7d5042a64160e20c73d734f9ed5dd8c6bba205 Mon Sep 17 00:00:00 2001 From: Lori Hikichi Date: Fri, 12 Jun 2020 15:52:12 -0700 Subject: clk: iproc: round clock rate to the closest Change from 'DIV_ROUND_UP' to 'DIV_ROUND_CLOSEST' when calculating the clock divisor in the iProc ASIU clock driver to allow to get to the closest clock rate. Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Lori Hikichi Signed-off-by: Ray Jui Link: https://lore.kernel.org/r/20200612225212.124301-1-ray.jui@broadcom.com Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-asiu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index 6fb8af506777..e062dd4992ea 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -119,7 +119,7 @@ static long iproc_asiu_clk_round_rate(struct clk_hw *hw, unsigned long rate, if (rate == *parent_rate) return *parent_rate; - div = DIV_ROUND_UP(*parent_rate, rate); + div = DIV_ROUND_CLOSEST(*parent_rate, rate); if (div < 2) return *parent_rate; @@ -145,7 +145,7 @@ static int iproc_asiu_clk_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } - div = DIV_ROUND_UP(parent_rate, rate); + div = DIV_ROUND_CLOSEST(parent_rate, rate); if (div < 2) return -EINVAL; -- cgit From 8529fc0aaa704c6618e69e8d35834aa977b6ece9 Mon Sep 17 00:00:00 2001 From: Rahul Tanwar Date: Thu, 16 Jul 2020 14:30:30 +0800 Subject: clk: intel: Use devm_clk_hw_register() instead of clk_hw_register() To ensure that clks are unregistered in case of any failure, use devm_clk_hw_register() instead of clk_hw_register(). Signed-off-by: Rahul Tanwar Link: https://lore.kernel.org/r/7ef7009b4e9f986fd6dfbf487c0e85de68a4ba9b.1594880946.git.rahul.tanwar@linux.intel.com Signed-off-by: Stephen Boyd --- drivers/clk/x86/clk-cgu-pll.c | 2 +- drivers/clk/x86/clk-cgu.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/x86/clk-cgu-pll.c b/drivers/clk/x86/clk-cgu-pll.c index c03cc6b85b9f..3179557b5f78 100644 --- a/drivers/clk/x86/clk-cgu-pll.c +++ b/drivers/clk/x86/clk-cgu-pll.c @@ -128,7 +128,7 @@ lgm_clk_register_pll(struct lgm_clk_provider *ctx, pll->hw.init = &init; hw = &pll->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c index 56af0e04ec1e..88ebeb53b109 100644 --- a/drivers/clk/x86/clk-cgu.c +++ b/drivers/clk/x86/clk-cgu.c @@ -119,7 +119,7 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx, mux->hw.init = &init; hw = &mux->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -247,7 +247,7 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx, div->hw.init = &init; hw = &div->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -361,7 +361,7 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx, gate->hw.init = &init; hw = &gate->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -624,7 +624,7 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx, ddiv->hw.init = &init; hw = &ddiv->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) { dev_err(dev, "register clk: %s failed!\n", list->name); return ret; -- cgit From c9e28fe649f76ee13a9bcc09f89ae0189f85801b Mon Sep 17 00:00:00 2001 From: Rahul Tanwar Date: Thu, 16 Jul 2020 14:30:31 +0800 Subject: clk: intel: Improve locking in the driver Remove/reduce unnecessary spin locking of the code. Signed-off-by: Rahul Tanwar Link: https://lore.kernel.org/r/79c0f5f9f5bc512a7e2b5f3c91f6341f28b5854c.1594880946.git.rahul.tanwar@linux.intel.com Signed-off-by: Stephen Boyd --- drivers/clk/x86/clk-cgu.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c index 88ebeb53b109..c379fedfb9f2 100644 --- a/drivers/clk/x86/clk-cgu.c +++ b/drivers/clk/x86/clk-cgu.c @@ -420,18 +420,14 @@ lgm_clk_ddiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); unsigned int div0, div1, exdiv; - unsigned long flags; u64 prate; - spin_lock_irqsave(&ddiv->lock, flags); div0 = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift0, ddiv->width0) + 1; div1 = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift1, ddiv->width1) + 1; exdiv = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, ddiv->width2); - spin_unlock_irqrestore(&ddiv->lock, flags); - prate = (u64)parent_rate; do_div(prate, div0); do_div(prate, div1); @@ -548,24 +544,21 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate, div = div * 2; div = DIV_ROUND_CLOSEST_ULL((u64)div, 5); } + spin_unlock_irqrestore(&ddiv->lock, flags); - if (div <= 0) { - spin_unlock_irqrestore(&ddiv->lock, flags); + if (div <= 0) return *prate; - } - if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2) != 0) { - if (lgm_clk_get_ddiv_val(div + 1, &ddiv1, &ddiv2) != 0) { - spin_unlock_irqrestore(&ddiv->lock, flags); + if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2) != 0) + if (lgm_clk_get_ddiv_val(div + 1, &ddiv1, &ddiv2) != 0) return -EINVAL; - } - } rate64 = *prate; do_div(rate64, ddiv1); do_div(rate64, ddiv2); /* if predivide bit is enabled, modify rounded rate by factor of 2.5 */ + spin_lock_irqsave(&ddiv->lock, flags); if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) { rate64 = rate64 * 2; rate64 = DIV_ROUND_CLOSEST_ULL(rate64, 5); -- cgit From d310124cd99df5e5011d79f36a6d16d3452de348 Mon Sep 17 00:00:00 2001 From: Rahul Tanwar Date: Thu, 16 Jul 2020 14:30:32 +0800 Subject: clk: intel: Avoid unnecessary memset by improving code memset can be avoided in a loop if the variables used are declared inside the loop. Move such variables declaration inside the loop to avoid memset. Signed-off-by: Rahul Tanwar Link: https://lore.kernel.org/r/26624b65d0e6b958c4765a406b9929d1a9ce1c2c.1594880946.git.rahul.tanwar@linux.intel.com [sboyd@kernel.org: Drop NULL assignment that is overwritten] Signed-off-by: Stephen Boyd --- drivers/clk/x86/clk-cgu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c index c379fedfb9f2..33de600e0c38 100644 --- a/drivers/clk/x86/clk-cgu.c +++ b/drivers/clk/x86/clk-cgu.c @@ -581,19 +581,18 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx, unsigned int nr_clk) { struct device *dev = ctx->dev; - struct clk_init_data init = {}; - struct lgm_clk_ddiv *ddiv; struct clk_hw *hw; unsigned int idx; int ret; for (idx = 0; idx < nr_clk; idx++, list++) { - ddiv = NULL; + struct clk_init_data init = {}; + struct lgm_clk_ddiv *ddiv; + ddiv = devm_kzalloc(dev, sizeof(*ddiv), GFP_KERNEL); if (!ddiv) return -ENOMEM; - memset(&init, 0, sizeof(init)); init.name = list->name; init.ops = &lgm_clk_ddiv_ops; init.flags = list->flags; -- cgit From 03111b1088f18f93d38e888c41e8a1e6aba9f8bb Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Mon, 29 Jun 2020 17:30:24 -0700 Subject: clk: Add support for enabling/disabling clocks from debugfs For test and debug purposes, it's simple enough to enable or disable clocks from shell. Add a new debugfs file 'clk_prepare_enable' that calls clk_prepare_enable() when writing "1" and clk_disable_unprepare() when writing "0". This can have security implications, so only support it when the code has been modified to #define CLOCK_ALLOW_WRITE_DEBUGFS. Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20200630003024.6282-1-mdtipton@codeaurora.org [sboyd@kernel.org: Reword commit text and remove comment update] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 3f588ed06ce3..4d455a657b01 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3039,6 +3039,31 @@ static int clk_rate_set(void *data, u64 val) } #define clk_rate_mode 0644 + +static int clk_prepare_enable_set(void *data, u64 val) +{ + struct clk_core *core = data; + int ret = 0; + + if (val) + ret = clk_prepare_enable(core->hw->clk); + else + clk_disable_unprepare(core->hw->clk); + + return ret; +} + +static int clk_prepare_enable_get(void *data, u64 *val) +{ + struct clk_core *core = data; + + *val = core->enable_count && core->prepare_count; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(clk_prepare_enable_fops, clk_prepare_enable_get, + clk_prepare_enable_set, "%llu\n"); + #else #define clk_rate_set NULL #define clk_rate_mode 0444 @@ -3216,6 +3241,10 @@ static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count); debugfs_create_file("clk_duty_cycle", 0444, root, core, &clk_duty_cycle_fops); +#ifdef CLOCK_ALLOW_WRITE_DEBUGFS + debugfs_create_file("clk_prepare_enable", 0644, root, core, + &clk_prepare_enable_fops); +#endif if (core->num_parents > 0) debugfs_create_file("clk_parent", 0444, root, core, -- cgit From 153bc1c66a8814e621ca0483098ac722be860aaf Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Fri, 3 Jul 2020 09:32:35 +0200 Subject: clk: at91: fix possible dead lock in new drivers syscon_node_to_regmap() will make the created regmap get and enable the first clock it can parse from the device tree. This clock is not needed to access the registers and should not be enabled at that time. Use device_node_to_regmap to resolve this as it looks up the regmap in the same list but doesn't care about the clocks. This issue is detected by lockdep when booting the sama5d3 with a device tree containing the new clk bindings. This fix already happened in 6956eb33abb5 ("clk: at91: fix possible deadlock") for the drivers that had been migrated to the new clk binding back then. This does the same for the new drivers as well. Fixes: 01e2113de9a5 ("clk: at91: add sam9x60 pmc driver") Signed-off-by: Ahmad Fatoum Link: https://lore.kernel.org/r/20200703073236.23923-1-a.fatoum@pengutronix.de Acked-by: Alexandre Belloni Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91sam9g45.c | 2 +- drivers/clk/at91/at91sam9n12.c | 2 +- drivers/clk/at91/sam9x60.c | 2 +- drivers/clk/at91/sama5d3.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/at91/at91sam9g45.c b/drivers/clk/at91/at91sam9g45.c index 9873b583c260..fe9d391adeba 100644 --- a/drivers/clk/at91/at91sam9g45.c +++ b/drivers/clk/at91/at91sam9g45.c @@ -111,7 +111,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; diff --git a/drivers/clk/at91/at91sam9n12.c b/drivers/clk/at91/at91sam9n12.c index 630dc5d87171..4aa97e672bd6 100644 --- a/drivers/clk/at91/at91sam9n12.c +++ b/drivers/clk/at91/at91sam9n12.c @@ -124,7 +124,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 3e20aa68259f..2b4c67485eee 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -178,7 +178,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; diff --git a/drivers/clk/at91/sama5d3.c b/drivers/clk/at91/sama5d3.c index 5e4e44dd4c37..5609b04e6565 100644 --- a/drivers/clk/at91/sama5d3.c +++ b/drivers/clk/at91/sama5d3.c @@ -121,7 +121,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; -- cgit From 3a5c42b18a6ddc0de77dde017baf45262fbf1ced Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:09 +0300 Subject: clk: at91: clk-generated: continue if __clk_determine_rate() returns error __clk_determine_rate() may return error. Skip the current step in case of error. Fixes: 1a1a36d72e3d3 ("clk: at91: clk-generated: make gclk determine audio_pll rate") Signed-off-by: Claudiu Beznea Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/1595403506-8209-2-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 44a46dcc0518..995a13133cfb 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -170,7 +170,8 @@ static int clk_generated_determine_rate(struct clk_hw *hw, for (div = 1; div < GENERATED_MAX_DIV + 2; div++) { req_parent.rate = req->rate * div; - __clk_determine_rate(parent, &req_parent); + if (__clk_determine_rate(parent, &req_parent)) + continue; clk_generated_best_diff(req, parent, req_parent.rate, div, &best_diff, &best_rate); -- cgit From 83331bfcc021a24da79f38a2452e86182c7ae94b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:10 +0300 Subject: clk: at91: clk-generated: check best_rate against ranges Check best_rate against available clock ranges. Fixes: df70aeef6083 ("clk: at91: add generated clock driver") Signed-off-by: Claudiu Beznea Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/1595403506-8209-3-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 995a13133cfb..f8e557e0e1b8 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -185,8 +185,8 @@ end: __clk_get_name((req->best_parent_hw)->clk), req->best_parent_rate); - if (best_rate < 0) - return best_rate; + if (best_rate < 0 || (gck->range.max && best_rate > gck->range.max)) + return -EINVAL; req->rate = best_rate; return 0; -- cgit From 3bf639a611a8deca7e4daa57a4df9ba4c6025249 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:11 +0300 Subject: clk: at91: clk-sam9x60-pll: fix mul mask According to datasheet mul mask is on bits 31..24. Fixes: a436c2a447e59 ("clk: at91: add sam9x60 PLL driver") Signed-off-by: Claudiu Beznea Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/1595403506-8209-4-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index e699803986e5..3522eae2edd6 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -15,7 +15,7 @@ #include "pmc.h" #define PMC_PLL_CTRL0_DIV_MSK GENMASK(7, 0) -#define PMC_PLL_CTRL1_MUL_MSK GENMASK(30, 24) +#define PMC_PLL_CTRL1_MUL_MSK GENMASK(31, 24) #define PLL_DIV_MAX (FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, UINT_MAX) + 1) #define UPLL_DIV 2 -- cgit From 1bef0986b125942d65d39c10cbfa75b20c3b94b3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:12 +0300 Subject: clk: at91: sam9x60-pll: use logical or for range check Use logical or for range check. In case bestrate is not in characteristics->output[0].min..characteristics->output[0].max range we should return -ERANGE. Fixes: a436c2a447e59 ("clk: at91: add sam9x60 PLL driver") Signed-off-by: Claudiu Beznea Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/1595403506-8209-5-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index 3522eae2edd6..4b7b6c435d4e 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -231,7 +231,7 @@ static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, } /* Check if bestrate is a valid output rate */ - if (bestrate < characteristics->output[0].min && + if (bestrate < characteristics->output[0].min || bestrate > characteristics->output[0].max) return -ERANGE; -- cgit From 390227dca870cd0b8b0961da9e293551015c0007 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:13 +0300 Subject: clk: at91: sam9x60-pll: check fcore against ranges According to datasheet the range of 600-1200MHz is for the frequency generated by the fractional part of the PLL (namely Fcorepllck according to datasheet). With this in mind the output range of the PLL itself (fractional + div), taking into account that the divider is 8 bits wide, is 600/256-1200Hz=2.3-1200MHz. Fixes: a436c2a447e59 ("clk: at91: add sam9x60 PLL driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-6-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 12 +++++++++++- drivers/clk/at91/sam9x60.c | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index 4b7b6c435d4e..a933abebfbaf 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -21,6 +21,9 @@ #define UPLL_DIV 2 #define PLL_MUL_MAX (FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, UINT_MAX) + 1) +#define FCORE_MIN (600000000) +#define FCORE_MAX (1200000000) + #define PLL_MAX_ID 1 struct sam9x60_pll { @@ -168,6 +171,7 @@ static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, unsigned long bestdiv = 0; unsigned long bestmul = 0; unsigned long bestfrac = 0; + u64 fcore = 0; if (rate < characteristics->output[0].min || rate > characteristics->output[0].max) @@ -212,6 +216,11 @@ static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, remainder = rate - tmprate; } + fcore = parent_rate * (tmpmul + 1) + + ((u64)parent_rate * tmpfrac >> 22); + if (fcore < FCORE_MIN || fcore > FCORE_MAX) + continue; + /* * Compare the remainder with the best remainder found until * now and elect a new best multiplier/divider pair if the @@ -231,7 +240,8 @@ static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, } /* Check if bestrate is a valid output rate */ - if (bestrate < characteristics->output[0].min || + if (fcore < FCORE_MIN || fcore > FCORE_MAX || + bestrate < characteristics->output[0].min || bestrate > characteristics->output[0].max) return -ERANGE; diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 2b4c67485eee..19945138123b 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -22,7 +22,7 @@ static const struct clk_master_layout sam9x60_master_layout = { }; static const struct clk_range plla_outputs[] = { - { .min = 300000000, .max = 600000000 }, + { .min = 2343750, .max = 1200000000 }, }; static const struct clk_pll_characteristics plla_characteristics = { -- cgit From 35d06f74785cc8d108dcb1e3324b2404fb39df0b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:14 +0300 Subject: clk: at91: sam9x60-pll: use frac when setting frequency In commit a436c2a447e59 ("clk: at91: add sam9x60 PLL driver") the fractional part of PLL wasn't set on registers but it was calculated and taken into account for determining div and mul (see sam9x60_pll_get_best_div_mul()). Fixes: a436c2a447e59 ("clk: at91: add sam9x60 PLL driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-7-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index a933abebfbaf..1f0bcc6b4fea 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -16,6 +16,7 @@ #define PMC_PLL_CTRL0_DIV_MSK GENMASK(7, 0) #define PMC_PLL_CTRL1_MUL_MSK GENMASK(31, 24) +#define PMC_PLL_CTRL1_FRACR_MSK GENMASK(21, 0) #define PLL_DIV_MAX (FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, UINT_MAX) + 1) #define UPLL_DIV 2 @@ -55,7 +56,7 @@ static int sam9x60_pll_prepare(struct clk_hw *hw) unsigned long flags; u8 div; u16 mul; - u32 val; + u32 val, frac; spin_lock_irqsave(pll->lock, flags); regmap_write(regmap, AT91_PMC_PLL_UPDT, pll->id); @@ -65,9 +66,10 @@ static int sam9x60_pll_prepare(struct clk_hw *hw) regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, val); + frac = FIELD_GET(PMC_PLL_CTRL1_FRACR_MSK, val); if (sam9x60_pll_ready(regmap, pll->id) && - (div == pll->div && mul == pll->mul)) { + (div == pll->div && mul == pll->mul && frac == pll->frac)) { spin_unlock_irqrestore(pll->lock, flags); return 0; } @@ -80,7 +82,8 @@ static int sam9x60_pll_prepare(struct clk_hw *hw) regmap_write(regmap, AT91_PMC_PLL_ACR, val); regmap_write(regmap, AT91_PMC_PLL_CTRL1, - FIELD_PREP(PMC_PLL_CTRL1_MUL_MSK, pll->mul)); + FIELD_PREP(PMC_PLL_CTRL1_MUL_MSK, pll->mul) | + FIELD_PREP(PMC_PLL_CTRL1_FRACR_MSK, pll->frac)); if (pll->characteristics->upll) { /* Enable the UTMI internal bandgap */ @@ -155,7 +158,8 @@ static unsigned long sam9x60_pll_recalc_rate(struct clk_hw *hw, { struct sam9x60_pll *pll = to_sam9x60_pll(hw); - return (parent_rate * (pll->mul + 1)) / (pll->div + 1); + return DIV_ROUND_CLOSEST_ULL((parent_rate * (pll->mul + 1) + + ((u64)parent_rate * pll->frac >> 22)), (pll->div + 1)); } static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, -- cgit From 172e7ddeeafd95caa809b7b1a140bfd7f20597d9 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:15 +0300 Subject: clk: at91: sam9x60: fix main rc oscillator frequency Main RC oscillator frequency is 12MHz according to datasheet (chapter 27.2). Fixes: 01e2113de9a52 ("clk: at91: add sam9x60 pmc driver") Signed-off-by: Claudiu Beznea Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/1595403506-8209-8-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sam9x60.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 19945138123b..d9f6d9ba1eca 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -189,7 +189,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) if (!sam9x60_pmc) return; - hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 24000000, + hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 12000000, 50000000); if (IS_ERR(hw)) goto err_free; -- cgit From e1e3e7008a90f9ffe0161191bd64a4c06f0d417a Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:16 +0300 Subject: clk: at91: sckc: register slow_rc with accuracy option Chapter 57.7.5 of SAM9X60 datasheet specifies that the maximum drift of this oscillator is +- 3KHz. Use that value and the formula at [1] or the calculator at [2] to compute the PPB value. [1] https://www.everythingrf.com/rf-calculators/ppm-to-hz-calculator [2] https://www.changpuak.ch/electronics/ppm_to_Hz_converter.php Fixes: 04bcc4275e601 ("clk: at91: sckc: add support for SAM9X60") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-9-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/sckc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c index 15dc4cd86d76..2d65770d8665 100644 --- a/drivers/clk/at91/sckc.c +++ b/drivers/clk/at91/sckc.c @@ -471,8 +471,9 @@ static void __init of_sam9x60_sckc_setup(struct device_node *np) if (!regbase) return; - slow_rc = clk_hw_register_fixed_rate(NULL, parent_names[0], NULL, 0, - 32768); + slow_rc = clk_hw_register_fixed_rate_with_accuracy(NULL, parent_names[0], + NULL, 0, 32768, + 93750000); if (IS_ERR(slow_rc)) return; -- cgit From 42324d953b38e74cf5cb05a02c81d4922a2ddcd5 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:17 +0300 Subject: clk: at91: replace conditional operator with double logical not Replace conditional operator with double logical not as code may be simpler to read. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-10-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 2 +- drivers/clk/at91/clk-main.c | 6 +++--- drivers/clk/at91/clk-master.c | 2 +- drivers/clk/at91/clk-peripheral.c | 2 +- drivers/clk/at91/clk-system.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index f8e557e0e1b8..2448bdc63425 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -83,7 +83,7 @@ static int clk_generated_is_enabled(struct clk_hw *hw) regmap_read(gck->regmap, gck->layout->offset, &status); spin_unlock_irqrestore(gck->lock, flags); - return status & AT91_PMC_PCR_GCKEN ? 1 : 0; + return !!(status & AT91_PMC_PCR_GCKEN); } static unsigned long diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index 37c22667e831..5c83e899084f 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -175,7 +175,7 @@ static bool clk_main_rc_osc_ready(struct regmap *regmap) regmap_read(regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MOSCRCS; + return !!(status & AT91_PMC_MOSCRCS); } static int clk_main_rc_osc_prepare(struct clk_hw *hw) @@ -336,7 +336,7 @@ static int clk_rm9200_main_is_prepared(struct clk_hw *hw) regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status); - return status & AT91_PMC_MAINRDY ? 1 : 0; + return !!(status & AT91_PMC_MAINRDY); } static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw, @@ -398,7 +398,7 @@ static inline bool clk_sam9x5_main_ready(struct regmap *regmap) regmap_read(regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MOSCSELS ? 1 : 0; + return !!(status & AT91_PMC_MOSCSELS); } static int clk_sam9x5_main_prepare(struct clk_hw *hw) diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index e7e0ba652de1..88d545b1698c 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -33,7 +33,7 @@ static inline bool clk_master_ready(struct regmap *regmap) regmap_read(regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MCKRDY ? 1 : 0; + return !!(status & AT91_PMC_MCKRDY); } static int clk_master_prepare(struct clk_hw *hw) diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c index c2ab4860a2bf..4c9a4147dfe5 100644 --- a/drivers/clk/at91/clk-peripheral.c +++ b/drivers/clk/at91/clk-peripheral.c @@ -208,7 +208,7 @@ static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw) regmap_read(periph->regmap, periph->layout->offset, &status); spin_unlock_irqrestore(periph->lock, flags); - return status & AT91_PMC_PCR_EN ? 1 : 0; + return !!(status & AT91_PMC_PCR_EN); } static unsigned long diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index c4b3877aa445..f83ec0de86c3 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -34,7 +34,7 @@ static inline bool clk_system_ready(struct regmap *regmap, int id) regmap_read(regmap, AT91_PMC_SR, &status); - return status & (1 << id) ? 1 : 0; + return !!(status & (1 << id)); } static int clk_system_prepare(struct clk_hw *hw) @@ -74,7 +74,7 @@ static int clk_system_is_prepared(struct clk_hw *hw) regmap_read(sys->regmap, AT91_PMC_SR, &status); - return status & (1 << sys->id) ? 1 : 0; + return !!(status & (1 << sys->id)); } static const struct clk_ops system_ops = { -- cgit From 64c9247b9e87e96e41cea545eb64727cee10c55c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:18 +0300 Subject: clk: at91: clk-generated: pass the id of changeable parent at registration Pass the ID of changeable parent at registration. This will allow the scalability of this clock driver with regards to the changeable parent ID for versions of this IP where changeable parent is not the last one in the parents list (e.g. SAMA7G5). With this the clock flags are set to zero in case we have no changeable parent. Also in clk_generated_best_diff() the *best_diff variable is check against tmp_diff variable using ">=" operator instead of ">" so that in case the requested frequency could be obtained using fix parents + gck dividers but the clock also supports changeable parent to be able to force the usage of the changeable parent. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-11-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 26 ++++++++++++++------------ drivers/clk/at91/dt-compat.c | 8 +++++--- drivers/clk/at91/pmc.h | 4 ++-- drivers/clk/at91/sam9x60.c | 3 +-- drivers/clk/at91/sama5d2.c | 31 +++++++++++++++---------------- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 2448bdc63425..f9ca04c97128 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -18,8 +18,6 @@ #define GENERATED_MAX_DIV 255 -#define GCK_INDEX_DT_AUDIO_PLL 5 - struct clk_generated { struct clk_hw hw; struct regmap *regmap; @@ -29,7 +27,7 @@ struct clk_generated { u32 gckdiv; const struct clk_pcr_layout *layout; u8 parent_id; - bool audio_pll_allowed; + int chg_pid; }; #define to_clk_generated(hw) \ @@ -109,7 +107,7 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate / div; tmp_diff = abs(req->rate - tmp_rate); - if (*best_diff < 0 || *best_diff > tmp_diff) { + if (*best_diff < 0 || *best_diff >= tmp_diff) { *best_rate = tmp_rate; *best_diff = tmp_diff; req->best_parent_rate = parent_rate; @@ -129,7 +127,10 @@ static int clk_generated_determine_rate(struct clk_hw *hw, int i; u32 div; - for (i = 0; i < clk_hw_get_num_parents(hw) - 1; i++) { + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + if (gck->chg_pid == i) + continue; + parent = clk_hw_get_parent_by_index(hw, i); if (!parent) continue; @@ -161,10 +162,10 @@ static int clk_generated_determine_rate(struct clk_hw *hw, * that the only clks able to modify gck rate are those of audio IPs. */ - if (!gck->audio_pll_allowed) + if (gck->chg_pid < 0) goto end; - parent = clk_hw_get_parent_by_index(hw, GCK_INDEX_DT_AUDIO_PLL); + parent = clk_hw_get_parent_by_index(hw, gck->chg_pid); if (!parent) goto end; @@ -272,8 +273,8 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range) + u8 num_parents, u8 id, + const struct clk_range *range, int chg_pid) { struct clk_generated *gck; struct clk_init_data init; @@ -288,15 +289,16 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, init.ops = &generated_ops; init.parent_names = parent_names; init.num_parents = num_parents; - init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | - CLK_SET_RATE_PARENT; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; + if (chg_pid >= 0) + init.flags |= CLK_SET_RATE_PARENT; gck->id = id; gck->hw.init = &init; gck->regmap = regmap; gck->lock = lock; gck->range = *range; - gck->audio_pll_allowed = pll_audio; + gck->chg_pid = chg_pid; gck->layout = layout; clk_generated_startup(gck); diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index aa1754eac59f..8a652c44c25a 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -22,6 +22,8 @@ #define SYSTEM_MAX_ID 31 +#define GCK_INDEX_DT_AUDIO_PLL 5 + #ifdef CONFIG_HAVE_AT91_AUDIO_PLL static void __init of_sama5d2_clk_audio_pll_frac_setup(struct device_node *np) { @@ -135,7 +137,7 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) return; for_each_child_of_node(np, gcknp) { - bool pll_audio = false; + int chg_pid = INT_MIN; if (of_property_read_u32(gcknp, "reg", &id)) continue; @@ -152,12 +154,12 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) if (of_device_is_compatible(np, "atmel,sama5d2-clk-generated") && (id == GCK_ID_I2S0 || id == GCK_ID_I2S1 || id == GCK_ID_CLASSD)) - pll_audio = true; + chg_pid = GCK_INDEX_DT_AUDIO_PLL; hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &dt_pcr_layout, name, parent_names, num_parents, - id, pll_audio, &range); + id, &range, chg_pid); if (IS_ERR(hw)) continue; diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index df616f2937e7..949b1a7b91e5 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -122,8 +122,8 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range); + u8 num_parents, u8 id, + const struct clk_range *range, int chg_pid); struct clk_hw * __init at91_clk_register_h32mx(struct regmap *regmap, const char *name, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index d9f6d9ba1eca..58661b6f6eaa 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -290,8 +290,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_gck[i].n, parent_names, 6, sam9x60_gck[i].id, - false, - &sam9x60_gck[i].r); + &sam9x60_gck[i].r, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index d69421d71daf..42637d9531d3 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -116,21 +116,20 @@ static const struct { char *n; u8 id; struct clk_range r; - bool pll; + int chg_pid; } sama5d2_gck[] = { - { .n = "sdmmc0_gclk", .id = 31, }, - { .n = "sdmmc1_gclk", .id = 32, }, - { .n = "tcb0_gclk", .id = 35, .r = { .min = 0, .max = 83000000 }, }, - { .n = "tcb1_gclk", .id = 36, .r = { .min = 0, .max = 83000000 }, }, - { .n = "pwm_gclk", .id = 38, .r = { .min = 0, .max = 83000000 }, }, - { .n = "isc_gclk", .id = 46, }, - { .n = "pdmic_gclk", .id = 48, }, - { .n = "i2s0_gclk", .id = 54, .pll = true }, - { .n = "i2s1_gclk", .id = 55, .pll = true }, - { .n = "can0_gclk", .id = 56, .r = { .min = 0, .max = 80000000 }, }, - { .n = "can1_gclk", .id = 57, .r = { .min = 0, .max = 80000000 }, }, - { .n = "classd_gclk", .id = 59, .r = { .min = 0, .max = 100000000 }, - .pll = true }, + { .n = "sdmmc0_gclk", .id = 31, .chg_pid = INT_MIN, }, + { .n = "sdmmc1_gclk", .id = 32, .chg_pid = INT_MIN, }, + { .n = "tcb0_gclk", .id = 35, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "tcb1_gclk", .id = 36, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "pwm_gclk", .id = 38, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "isc_gclk", .id = 46, .chg_pid = INT_MIN, }, + { .n = "pdmic_gclk", .id = 48, .chg_pid = INT_MIN, }, + { .n = "i2s0_gclk", .id = 54, .chg_pid = 5, }, + { .n = "i2s1_gclk", .id = 55, .chg_pid = 5, }, + { .n = "can0_gclk", .id = 56, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "can1_gclk", .id = 57, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "classd_gclk", .id = 59, .chg_pid = 5, .r = { .min = 0, .max = 100000000 }, }, }; static const struct clk_programmable_layout sama5d2_programmable_layout = { @@ -324,8 +323,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_gck[i].n, parent_names, 6, sama5d2_gck[i].id, - sama5d2_gck[i].pll, - &sama5d2_gck[i].r); + &sama5d2_gck[i].r, + sama5d2_gck[i].chg_pid); if (IS_ERR(hw)) goto err_free; -- cgit From 22a1dfe93bf496d03cb1d76b1fbd23a7ff4a062c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:19 +0300 Subject: clk: at91: clk-generated: add mux_table option Add mux table option. This is necessary for IP versions that has gaps in the range of available clock sources (e.g. SAMA7G5). Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-12-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-generated.c | 13 ++++++++++--- drivers/clk/at91/dt-compat.c | 5 +++-- drivers/clk/at91/pmc.h | 2 +- drivers/clk/at91/sam9x60.c | 2 +- drivers/clk/at91/sama5d2.c | 2 +- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index f9ca04c97128..b4fc8d71daf2 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -23,6 +23,7 @@ struct clk_generated { struct regmap *regmap; struct clk_range range; spinlock_t *lock; + u32 *mux_table; u32 id; u32 gckdiv; const struct clk_pcr_layout *layout; @@ -201,7 +202,11 @@ static int clk_generated_set_parent(struct clk_hw *hw, u8 index) if (index >= clk_hw_get_num_parents(hw)) return -EINVAL; - gck->parent_id = index; + if (gck->mux_table) + gck->parent_id = clk_mux_index_to_val(gck->mux_table, 0, index); + else + gck->parent_id = index; + return 0; } @@ -273,8 +278,9 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, - const struct clk_range *range, int chg_pid) + u32 *mux_table, u8 num_parents, u8 id, + const struct clk_range *range, + int chg_pid) { struct clk_generated *gck; struct clk_init_data init; @@ -300,6 +306,7 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, gck->range = *range; gck->chg_pid = chg_pid; gck->layout = layout; + gck->mux_table = mux_table; clk_generated_startup(gck); hw = &gck->hw; diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index 8a652c44c25a..cc95d42f4d53 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -158,8 +158,9 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &dt_pcr_layout, name, - parent_names, num_parents, - id, &range, chg_pid); + parent_names, NULL, + num_parents, id, &range, + chg_pid); if (IS_ERR(hw)) continue; diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 949b1a7b91e5..2bfe1405f9f8 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -122,7 +122,7 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, + u32 *mux_table, u8 num_parents, u8 id, const struct clk_range *range, int chg_pid); struct clk_hw * __init diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 58661b6f6eaa..197ffc70305a 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -288,7 +288,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &sam9x60_pcr_layout, sam9x60_gck[i].n, - parent_names, 6, + parent_names, NULL, 6, sam9x60_gck[i].id, &sam9x60_gck[i].r, INT_MIN); if (IS_ERR(hw)) diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index 42637d9531d3..6a685d00f16d 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -321,7 +321,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &sama5d2_pcr_layout, sama5d2_gck[i].n, - parent_names, 6, + parent_names, NULL, 6, sama5d2_gck[i].id, &sama5d2_gck[i].r, sama5d2_gck[i].chg_pid); -- cgit From 75c88143f3b879664cc5bf68b91854c1a98f5e5b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:20 +0300 Subject: clk: at91: clk-master: add master clock support for SAMA7G5 Add master clock support (MCK1..4) for SAMA7G5. SAMA7G5's PMC has multiple master clocks feeding different subsystems. One of them feeds image subsystem and is changeable based on image subsystem needs. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-13-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-master.c | 310 +++++++++++++++++++++++++++++++++++++++++- drivers/clk/at91/pmc.h | 7 + include/linux/clk/at91_pmc.h | 1 + 3 files changed, 313 insertions(+), 5 deletions(-) diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index 88d545b1698c..bd0d8a69a2cf 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -17,30 +17,49 @@ #define MASTER_DIV_SHIFT 8 #define MASTER_DIV_MASK 0x3 +#define PMC_MCR 0x30 +#define PMC_MCR_ID_MSK GENMASK(3, 0) +#define PMC_MCR_CMD BIT(7) +#define PMC_MCR_DIV GENMASK(10, 8) +#define PMC_MCR_CSS GENMASK(20, 16) +#define PMC_MCR_CSS_SHIFT (16) +#define PMC_MCR_EN BIT(28) + +#define PMC_MCR_ID(x) ((x) & PMC_MCR_ID_MSK) + +#define MASTER_MAX_ID 4 + #define to_clk_master(hw) container_of(hw, struct clk_master, hw) struct clk_master { struct clk_hw hw; struct regmap *regmap; + spinlock_t *lock; const struct clk_master_layout *layout; const struct clk_master_characteristics *characteristics; + u32 *mux_table; u32 mckr; + int chg_pid; + u8 id; + u8 parent; + u8 div; }; -static inline bool clk_master_ready(struct regmap *regmap) +static inline bool clk_master_ready(struct clk_master *master) { + unsigned int bit = master->id ? AT91_PMC_MCKXRDY : AT91_PMC_MCKRDY; unsigned int status; - regmap_read(regmap, AT91_PMC_SR, &status); + regmap_read(master->regmap, AT91_PMC_SR, &status); - return !!(status & AT91_PMC_MCKRDY); + return !!(status & bit); } static int clk_master_prepare(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); - while (!clk_master_ready(master->regmap)) + while (!clk_master_ready(master)) cpu_relax(); return 0; @@ -50,7 +69,7 @@ static int clk_master_is_prepared(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); - return clk_master_ready(master->regmap); + return clk_master_ready(master); } static unsigned long clk_master_recalc_rate(struct clk_hw *hw, @@ -143,6 +162,287 @@ at91_clk_register_master(struct regmap *regmap, return hw; } +static unsigned long +clk_sama7g5_master_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div)); +} + +static void clk_sama7g5_master_best_diff(struct clk_rate_request *req, + struct clk_hw *parent, + unsigned long parent_rate, + long *best_rate, + long *best_diff, + u32 div) +{ + unsigned long tmp_rate, tmp_diff; + + if (div == MASTER_PRES_MAX) + tmp_rate = parent_rate / 3; + else + tmp_rate = parent_rate >> div; + + tmp_diff = abs(req->rate - tmp_rate); + + if (*best_diff < 0 || *best_diff >= tmp_diff) { + *best_rate = tmp_rate; + *best_diff = tmp_diff; + req->best_parent_rate = parent_rate; + req->best_parent_hw = parent; + } +} + +static int clk_sama7g5_master_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_master *master = to_clk_master(hw); + struct clk_rate_request req_parent = *req; + struct clk_hw *parent; + long best_rate = LONG_MIN, best_diff = LONG_MIN; + unsigned long parent_rate; + unsigned int div, i; + + /* First: check the dividers of MCR. */ + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + parent = clk_hw_get_parent_by_index(hw, i); + if (!parent) + continue; + + parent_rate = clk_hw_get_rate(parent); + if (!parent_rate) + continue; + + for (div = 0; div < MASTER_PRES_MAX + 1; div++) { + clk_sama7g5_master_best_diff(req, parent, parent_rate, + &best_rate, &best_diff, + div); + if (!best_diff) + break; + } + + if (!best_diff) + break; + } + + /* Second: try to request rate form changeable parent. */ + if (master->chg_pid < 0) + goto end; + + parent = clk_hw_get_parent_by_index(hw, master->chg_pid); + if (!parent) + goto end; + + for (div = 0; div < MASTER_PRES_MAX + 1; div++) { + if (div == MASTER_PRES_MAX) + req_parent.rate = req->rate * 3; + else + req_parent.rate = req->rate << div; + + if (__clk_determine_rate(parent, &req_parent)) + continue; + + clk_sama7g5_master_best_diff(req, parent, req_parent.rate, + &best_rate, &best_diff, div); + + if (!best_diff) + break; + } + +end: + pr_debug("MCK: %s, best_rate = %ld, parent clk: %s @ %ld\n", + __func__, best_rate, + __clk_get_name((req->best_parent_hw)->clk), + req->best_parent_rate); + + if (best_rate < 0) + return -EINVAL; + + req->rate = best_rate; + + return 0; +} + +static u8 clk_sama7g5_master_get_parent(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + u8 index; + + spin_lock_irqsave(master->lock, flags); + index = clk_mux_val_to_index(&master->hw, master->mux_table, 0, + master->parent); + spin_unlock_irqrestore(master->lock, flags); + + return index; +} + +static int clk_sama7g5_master_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + + if (index >= clk_hw_get_num_parents(hw)) + return -EINVAL; + + spin_lock_irqsave(master->lock, flags); + master->parent = clk_mux_index_to_val(master->mux_table, 0, index); + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static int clk_sama7g5_master_enable(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + unsigned int val, cparent; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, PMC_MCR_ID(master->id)); + regmap_read(master->regmap, PMC_MCR, &val); + regmap_update_bits(master->regmap, PMC_MCR, + PMC_MCR_EN | PMC_MCR_CSS | PMC_MCR_DIV | + PMC_MCR_CMD | PMC_MCR_ID_MSK, + PMC_MCR_EN | (master->parent << PMC_MCR_CSS_SHIFT) | + (master->div << MASTER_DIV_SHIFT) | + PMC_MCR_CMD | PMC_MCR_ID(master->id)); + + cparent = (val & PMC_MCR_CSS) >> PMC_MCR_CSS_SHIFT; + + /* Wait here only if parent is being changed. */ + while ((cparent != master->parent) && !clk_master_ready(master)) + cpu_relax(); + + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static void clk_sama7g5_master_disable(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_update_bits(master->regmap, PMC_MCR, + PMC_MCR_EN | PMC_MCR_CMD | PMC_MCR_ID_MSK, + PMC_MCR_CMD | PMC_MCR_ID(master->id)); + + spin_unlock_irqrestore(master->lock, flags); +} + +static int clk_sama7g5_master_is_enabled(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + unsigned int val; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_read(master->regmap, PMC_MCR, &val); + + spin_unlock_irqrestore(master->lock, flags); + + return !!(val & PMC_MCR_EN); +} + +static int clk_sama7g5_master_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long div, flags; + + div = DIV_ROUND_CLOSEST(parent_rate, rate); + if ((div > (1 << (MASTER_PRES_MAX - 1))) || (div & (div - 1))) + return -EINVAL; + + if (div == 3) + div = MASTER_PRES_MAX; + else + div = ffs(div) - 1; + + spin_lock_irqsave(master->lock, flags); + master->div = div; + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static const struct clk_ops sama7g5_master_ops = { + .enable = clk_sama7g5_master_enable, + .disable = clk_sama7g5_master_disable, + .is_enabled = clk_sama7g5_master_is_enabled, + .recalc_rate = clk_sama7g5_master_recalc_rate, + .determine_rate = clk_sama7g5_master_determine_rate, + .set_rate = clk_sama7g5_master_set_rate, + .get_parent = clk_sama7g5_master_get_parent, + .set_parent = clk_sama7g5_master_set_parent, +}; + +struct clk_hw * __init +at91_clk_sama7g5_register_master(struct regmap *regmap, + const char *name, int num_parents, + const char **parent_names, + u32 *mux_table, + spinlock_t *lock, u8 id, + bool critical, int chg_pid) +{ + struct clk_master *master; + struct clk_hw *hw; + struct clk_init_data init; + unsigned long flags; + unsigned int val; + int ret; + + if (!name || !num_parents || !parent_names || !mux_table || + !lock || id > MASTER_MAX_ID) + return ERR_PTR(-EINVAL); + + master = kzalloc(sizeof(*master), GFP_KERNEL); + if (!master) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &sama7g5_master_ops; + init.parent_names = parent_names; + init.num_parents = num_parents; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; + if (chg_pid >= 0) + init.flags |= CLK_SET_RATE_PARENT; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + master->hw.init = &init; + master->regmap = regmap; + master->id = id; + master->chg_pid = chg_pid; + master->lock = lock; + master->mux_table = mux_table; + + spin_lock_irqsave(master->lock, flags); + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_read(master->regmap, PMC_MCR, &val); + master->parent = (val & PMC_MCR_CSS) >> PMC_MCR_CSS_SHIFT; + master->div = (val & PMC_MCR_DIV) >> MASTER_DIV_SHIFT; + spin_unlock_irqrestore(master->lock, flags); + + hw = &master->hw; + ret = clk_hw_register(NULL, &master->hw); + if (ret) { + kfree(master); + hw = ERR_PTR(ret); + } + + return hw; +} + const struct clk_master_layout at91rm9200_master_layout = { .mask = 0x31F, .pres_shift = 2, diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 2bfe1405f9f8..29d150feaa46 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -154,6 +154,13 @@ at91_clk_register_master(struct regmap *regmap, const char *name, const struct clk_master_layout *layout, const struct clk_master_characteristics *characteristics); +struct clk_hw * __init +at91_clk_sama7g5_register_master(struct regmap *regmap, + const char *name, int num_parents, + const char **parent_names, u32 *mux_table, + spinlock_t *lock, u8 id, bool critical, + int chg_pid); + struct clk_hw * __init at91_clk_register_peripheral(struct regmap *regmap, const char *name, const char *parent_name, u32 id); diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 49a53a137610..77d6dabc4c3c 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -174,6 +174,7 @@ #define AT91_PMC_MOSCRCS (1 << 17) /* Main On-Chip RC [some SAM9] */ #define AT91_PMC_CFDEV (1 << 18) /* Clock Failure Detector Event [some SAM9] */ #define AT91_PMC_GCKRDY (1 << 24) /* Generated Clocks */ +#define AT91_PMC_MCKXRDY (1 << 26) /* Master Clock x [x=1..4] Ready Status */ #define AT91_PMC_IMR 0x6c /* Interrupt Mask Register */ #define AT91_PMC_FSMR 0x70 /* Fast Startup Mode Register */ -- cgit From b4c115c76184f2c56a295579161652fd5eb2dcc1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:21 +0300 Subject: clk: at91: clk-peripheral: add support for changeable parent rate Some peripheral clocks on SAMA7G5 supports requesting parent to change its rate (image related clocks: csi, csi2dc, isc). Add support so that if registered with this option the clock rate to be requested from parent. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-14-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91sam9n12.c | 2 +- drivers/clk/at91/at91sam9x5.c | 4 +- drivers/clk/at91/clk-peripheral.c | 109 ++++++++++++++++++++++++++++++++++++-- drivers/clk/at91/dt-compat.c | 3 +- drivers/clk/at91/pmc.h | 3 +- drivers/clk/at91/sam9x60.c | 2 +- drivers/clk/at91/sama5d2.c | 5 +- drivers/clk/at91/sama5d3.c | 3 +- drivers/clk/at91/sama5d4.c | 4 +- 9 files changed, 119 insertions(+), 16 deletions(-) diff --git a/drivers/clk/at91/at91sam9n12.c b/drivers/clk/at91/at91sam9n12.c index 4aa97e672bd6..aaf4da30584e 100644 --- a/drivers/clk/at91/at91sam9n12.c +++ b/drivers/clk/at91/at91sam9n12.c @@ -222,7 +222,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) at91sam9n12_periphck[i].n, "masterck", at91sam9n12_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index 0ce3da080287..52a9d2f7ec83 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -257,7 +257,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, at91sam9x5_periphck[i].n, "masterck", at91sam9x5_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -270,7 +270,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, extra_pcks[i].n, "masterck", extra_pcks[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c index 4c9a4147dfe5..7867eaf0447f 100644 --- a/drivers/clk/at91/clk-peripheral.c +++ b/drivers/clk/at91/clk-peripheral.c @@ -38,6 +38,7 @@ struct clk_sam9x5_peripheral { u32 div; const struct clk_pcr_layout *layout; bool auto_div; + int chg_pid; }; #define to_clk_sam9x5_peripheral(hw) \ @@ -238,6 +239,87 @@ clk_sam9x5_peripheral_recalc_rate(struct clk_hw *hw, return parent_rate >> periph->div; } +static void clk_sam9x5_peripheral_best_diff(struct clk_rate_request *req, + struct clk_hw *parent, + unsigned long parent_rate, + u32 shift, long *best_diff, + long *best_rate) +{ + unsigned long tmp_rate = parent_rate >> shift; + unsigned long tmp_diff = abs(req->rate - tmp_rate); + + if (*best_diff < 0 || *best_diff >= tmp_diff) { + *best_rate = tmp_rate; + *best_diff = tmp_diff; + req->best_parent_rate = parent_rate; + req->best_parent_hw = parent; + } +} + +static int clk_sam9x5_peripheral_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); + struct clk_hw *parent = clk_hw_get_parent(hw); + struct clk_rate_request req_parent = *req; + unsigned long parent_rate = clk_hw_get_rate(parent); + unsigned long tmp_rate; + long best_rate = LONG_MIN; + long best_diff = LONG_MIN; + u32 shift; + + if (periph->id < PERIPHERAL_ID_MIN || !periph->range.max) + return parent_rate; + + /* Fist step: check the available dividers. */ + for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) { + tmp_rate = parent_rate >> shift; + + if (periph->range.max && tmp_rate > periph->range.max) + continue; + + clk_sam9x5_peripheral_best_diff(req, parent, parent_rate, + shift, &best_diff, &best_rate); + + if (!best_diff || best_rate <= req->rate) + break; + } + + if (periph->chg_pid < 0) + goto end; + + /* Step two: try to request rate from parent. */ + parent = clk_hw_get_parent_by_index(hw, periph->chg_pid); + if (!parent) + goto end; + + for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) { + req_parent.rate = req->rate << shift; + + if (__clk_determine_rate(parent, &req_parent)) + continue; + + clk_sam9x5_peripheral_best_diff(req, parent, req_parent.rate, + shift, &best_diff, &best_rate); + + if (!best_diff) + break; + } +end: + if (best_rate < 0 || + (periph->range.max && best_rate > periph->range.max)) + return -EINVAL; + + pr_debug("PCK: %s, best_rate = %ld, parent clk: %s @ %ld\n", + __func__, best_rate, + __clk_get_name((req->best_parent_hw)->clk), + req->best_parent_rate); + + req->rate = best_rate; + + return 0; +} + static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate) @@ -320,11 +402,21 @@ static const struct clk_ops sam9x5_peripheral_ops = { .set_rate = clk_sam9x5_peripheral_set_rate, }; +static const struct clk_ops sam9x5_peripheral_chg_ops = { + .enable = clk_sam9x5_peripheral_enable, + .disable = clk_sam9x5_peripheral_disable, + .is_enabled = clk_sam9x5_peripheral_is_enabled, + .recalc_rate = clk_sam9x5_peripheral_recalc_rate, + .determine_rate = clk_sam9x5_peripheral_determine_rate, + .set_rate = clk_sam9x5_peripheral_set_rate, +}; + struct clk_hw * __init at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char *parent_name, - u32 id, const struct clk_range *range) + u32 id, const struct clk_range *range, + int chg_pid) { struct clk_sam9x5_peripheral *periph; struct clk_init_data init; @@ -339,10 +431,16 @@ at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &sam9x5_peripheral_ops; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = (parent_name ? 1 : 0); - init.flags = 0; + init.parent_names = &parent_name; + init.num_parents = 1; + if (chg_pid < 0) { + init.flags = 0; + init.ops = &sam9x5_peripheral_ops; + } else { + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT; + init.ops = &sam9x5_peripheral_chg_ops; + } periph->id = id; periph->hw.init = &init; @@ -353,6 +451,7 @@ at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, periph->auto_div = true; periph->layout = layout; periph->range = *range; + periph->chg_pid = chg_pid; hw = &periph->hw; ret = clk_hw_register(NULL, &periph->hw); diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index cc95d42f4d53..1b90c4f7b1d1 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -463,7 +463,8 @@ of_at91_clk_periph_setup(struct device_node *np, u8 type) &dt_pcr_layout, name, parent_name, - id, &range); + id, &range, + INT_MIN); } if (IS_ERR(hw)) diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 29d150feaa46..34c9506e6275 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -168,7 +168,8 @@ struct clk_hw * __init at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char *parent_name, - u32 id, const struct clk_range *range); + u32 id, const struct clk_range *range, + int chg_pid); struct clk_hw * __init at91_clk_register_pll(struct regmap *regmap, const char *name, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 197ffc70305a..f090518d24a7 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -277,7 +277,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_periphck[i].n, "masterck", sam9x60_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index 6a685d00f16d..c7765b664940 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -291,7 +291,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_periphck[i].n, "masterck", sama5d2_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -304,7 +304,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_periph32ck[i].n, "h32mxck", sama5d2_periph32ck[i].id, - &sama5d2_periph32ck[i].r); + &sama5d2_periph32ck[i].r, + INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d3.c b/drivers/clk/at91/sama5d3.c index 5609b04e6565..49ec5964bcb3 100644 --- a/drivers/clk/at91/sama5d3.c +++ b/drivers/clk/at91/sama5d3.c @@ -223,7 +223,8 @@ static void __init sama5d3_pmc_setup(struct device_node *np) sama5d3_periphck[i].n, "masterck", sama5d3_periphck[i].id, - &sama5d3_periphck[i].r); + &sama5d3_periphck[i].r, + INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c index 662ff5fa6e98..fa121897d95d 100644 --- a/drivers/clk/at91/sama5d4.c +++ b/drivers/clk/at91/sama5d4.c @@ -246,7 +246,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) sama5d4_periphck[i].n, "masterck", sama5d4_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -259,7 +259,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) sama5d4_periph32ck[i].n, "h32mxck", sama5d4_periph32ck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; -- cgit From c57aaaa28cf1a123c0029a36361a809eae2b1960 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:22 +0300 Subject: clk: at91: clk-programmable: add mux_table option Add mux table option. This is necessary for IP versions that has gaps in the range of available clock sources (e.g. SAMA7G5). Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-15-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/at91rm9200.c | 3 ++- drivers/clk/at91/at91sam9260.c | 3 ++- drivers/clk/at91/at91sam9g45.c | 3 ++- drivers/clk/at91/at91sam9n12.c | 3 ++- drivers/clk/at91/at91sam9rl.c | 3 ++- drivers/clk/at91/at91sam9x5.c | 3 ++- drivers/clk/at91/clk-programmable.c | 11 ++++++++++- drivers/clk/at91/dt-compat.c | 11 ++++++----- drivers/clk/at91/pmc.h | 3 ++- drivers/clk/at91/sam9x60.c | 3 ++- drivers/clk/at91/sama5d2.c | 3 ++- drivers/clk/at91/sama5d3.c | 3 ++- drivers/clk/at91/sama5d4.c | 3 ++- 13 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index 38bdb4981315..2c3d8e6ca63c 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -160,7 +160,8 @@ static void __init at91rm9200_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 4, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9260.c b/drivers/clk/at91/at91sam9260.c index 6d0723aa8b13..bb81ff731ad8 100644 --- a/drivers/clk/at91/at91sam9260.c +++ b/drivers/clk/at91/at91sam9260.c @@ -436,7 +436,8 @@ static void __init at91sam926x_pmc_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, 4, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9g45.c b/drivers/clk/at91/at91sam9g45.c index fe9d391adeba..c88ee20bee31 100644 --- a/drivers/clk/at91/at91sam9g45.c +++ b/drivers/clk/at91/at91sam9g45.c @@ -181,7 +181,8 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9g45_programmable_layout); + &at91sam9g45_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9n12.c b/drivers/clk/at91/at91sam9n12.c index aaf4da30584e..93f7eb216122 100644 --- a/drivers/clk/at91/at91sam9n12.c +++ b/drivers/clk/at91/at91sam9n12.c @@ -199,7 +199,8 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9rl.c b/drivers/clk/at91/at91sam9rl.c index 0d1cc44b056f..a343eb69bb35 100644 --- a/drivers/clk/at91/at91sam9rl.c +++ b/drivers/clk/at91/at91sam9rl.c @@ -137,7 +137,8 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index 52a9d2f7ec83..22b9aad9efb8 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -226,7 +226,8 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c index 8ee66fbee3d9..fcf8f6a1c2c6 100644 --- a/drivers/clk/at91/clk-programmable.c +++ b/drivers/clk/at91/clk-programmable.c @@ -21,6 +21,7 @@ struct clk_programmable { struct clk_hw hw; struct regmap *regmap; + u32 *mux_table; u8 id; const struct clk_programmable_layout *layout; }; @@ -108,6 +109,9 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index) if (layout->have_slck_mck) mask |= AT91_PMC_CSSMCK_MCK; + if (prog->mux_table) + pckr = clk_mux_index_to_val(prog->mux_table, 0, index); + if (index > layout->css_mask) { if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck) return -EINVAL; @@ -134,6 +138,9 @@ static u8 clk_programmable_get_parent(struct clk_hw *hw) if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret) ret = PROG_MAX_RM9200_CSS + 1; + if (prog->mux_table) + ret = clk_mux_val_to_index(&prog->hw, prog->mux_table, 0, ret); + return ret; } @@ -182,7 +189,8 @@ struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, const char **parent_names, u8 num_parents, u8 id, - const struct clk_programmable_layout *layout) + const struct clk_programmable_layout *layout, + u32 *mux_table) { struct clk_programmable *prog; struct clk_hw *hw; @@ -206,6 +214,7 @@ at91_clk_register_programmable(struct regmap *regmap, prog->layout = layout; prog->hw.init = &init; prog->regmap = regmap; + prog->mux_table = mux_table; hw = &prog->hw; ret = clk_hw_register(NULL, &prog->hw); diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index 1b90c4f7b1d1..a50084de97d4 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -677,7 +677,8 @@ CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv", static void __init of_at91_clk_prog_setup(struct device_node *np, - const struct clk_programmable_layout *layout) + const struct clk_programmable_layout *layout, + u32 *mux_table) { int num; u32 id; @@ -711,7 +712,7 @@ of_at91_clk_prog_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, num_parents, - id, layout); + id, layout, mux_table); if (IS_ERR(hw)) continue; @@ -721,21 +722,21 @@ of_at91_clk_prog_setup(struct device_node *np, static void __init of_at91rm9200_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout); + of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout, NULL); } CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable", of_at91rm9200_clk_prog_setup); static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout); + of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout, NULL); } CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable", of_at91sam9g45_clk_prog_setup); static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout); + of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout, NULL); } CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable", of_at91sam9x5_clk_prog_setup); diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 34c9506e6275..8d3e22f566cf 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -188,7 +188,8 @@ sam9x60_clk_register_pll(struct regmap *regmap, spinlock_t *lock, struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, const char **parent_names, u8 num_parents, u8 id, - const struct clk_programmable_layout *layout); + const struct clk_programmable_layout *layout, + u32 *mux_table); struct clk_hw * __init at91_clk_register_sam9260_slow(struct regmap *regmap, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index f090518d24a7..d8d4b28ec4cb 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -254,7 +254,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 6, i, - &sam9x60_programmable_layout); + &sam9x60_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index c7765b664940..8b220762941a 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -268,7 +268,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 6, i, - &sama5d2_programmable_layout); + &sama5d2_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d3.c b/drivers/clk/at91/sama5d3.c index 49ec5964bcb3..7c6e0a5b9dc8 100644 --- a/drivers/clk/at91/sama5d3.c +++ b/drivers/clk/at91/sama5d3.c @@ -200,7 +200,8 @@ static void __init sama5d3_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c index fa121897d95d..92d8d4141b43 100644 --- a/drivers/clk/at91/sama5d4.c +++ b/drivers/clk/at91/sama5d4.c @@ -223,7 +223,8 @@ static void __init sama5d4_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; -- cgit From 0416824edca1cdcb6e00e6f909423bf0fc529eef Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:23 +0300 Subject: clk: at91: add macro for pll ids mask Add macro for PLL IDs mask. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-16-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 77d6dabc4c3c..dc5e85f124e0 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -59,6 +59,7 @@ #define AT91_PMC_PLL_UPDT 0x1C /* PMC PLL update register [for SAM9X60] */ #define AT91_PMC_PLL_UPDT_UPDATE (1 << 8) /* Update PLL settings */ #define AT91_PMC_PLL_UPDT_ID (1 << 0) /* PLL ID */ +#define AT91_PMC_PLL_UPDT_ID_MSK (0xf) /* PLL ID mask */ #define AT91_PMC_PLL_UPDT_STUPTIM (0xff << 16) /* Startup time */ #define AT91_CKGR_MOR 0x20 /* Main Oscillator Register [not on SAM9RL] */ -- cgit From 43b1bb4a9b3e183af12225f56c27164c10d06223 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:24 +0300 Subject: clk: at91: clk-sam9x60-pll: re-factor to support plls with multiple outputs Some of the SAMA7G5 PLLs support multiple outputs (e.g. AUDIO PLL). For these, split the PLL clock in two: fractional clock and divider clock. In case PLLs supports multiple outputs (since these outputs are dividers (with different settings) sharing the same fractional part), it will register one fractional clock and multiple divider clocks (dividers sharing the fractional clock). Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-17-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-sam9x60-pll.c | 547 +++++++++++++++++++++++++------------ drivers/clk/at91/pmc.h | 22 +- drivers/clk/at91/sam9x60.c | 50 +++- 3 files changed, 433 insertions(+), 186 deletions(-) diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index 1f0bcc6b4fea..b473298ef7e6 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -25,20 +25,31 @@ #define FCORE_MIN (600000000) #define FCORE_MAX (1200000000) -#define PLL_MAX_ID 1 +#define PLL_MAX_ID 7 -struct sam9x60_pll { - struct clk_hw hw; +struct sam9x60_pll_core { struct regmap *regmap; spinlock_t *lock; const struct clk_pll_characteristics *characteristics; - u32 frac; + const struct clk_pll_layout *layout; + struct clk_hw hw; u8 id; - u8 div; +}; + +struct sam9x60_frac { + struct sam9x60_pll_core core; + u32 frac; u16 mul; }; -#define to_sam9x60_pll(hw) container_of(hw, struct sam9x60_pll, hw) +struct sam9x60_div { + struct sam9x60_pll_core core; + u8 div; +}; + +#define to_sam9x60_pll_core(hw) container_of(hw, struct sam9x60_pll_core, hw) +#define to_sam9x60_frac(core) container_of(core, struct sam9x60_frac, core) +#define to_sam9x60_div(core) container_of(core, struct sam9x60_div, core) static inline bool sam9x60_pll_ready(struct regmap *regmap, int id) { @@ -49,43 +60,53 @@ static inline bool sam9x60_pll_ready(struct regmap *regmap, int id) return !!(status & BIT(id)); } -static int sam9x60_pll_prepare(struct clk_hw *hw) +static bool sam9x60_frac_pll_ready(struct regmap *regmap, u8 id) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); - struct regmap *regmap = pll->regmap; - unsigned long flags; - u8 div; - u16 mul; - u32 val, frac; + return sam9x60_pll_ready(regmap, id); +} - spin_lock_irqsave(pll->lock, flags); - regmap_write(regmap, AT91_PMC_PLL_UPDT, pll->id); +static unsigned long sam9x60_frac_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_frac *frac = to_sam9x60_frac(core); - regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); - div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val); + return (parent_rate * (frac->mul + 1) + + ((u64)parent_rate * frac->frac >> 22)); +} +static int sam9x60_frac_pll_prepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_frac *frac = to_sam9x60_frac(core); + struct regmap *regmap = core->regmap; + unsigned int val, cfrac, cmul; + unsigned long flags; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); - mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, val); - frac = FIELD_GET(PMC_PLL_CTRL1_FRACR_MSK, val); + cmul = (val & core->layout->mul_mask) >> core->layout->mul_shift; + cfrac = (val & core->layout->frac_mask) >> core->layout->frac_shift; - if (sam9x60_pll_ready(regmap, pll->id) && - (div == pll->div && mul == pll->mul && frac == pll->frac)) { - spin_unlock_irqrestore(pll->lock, flags); - return 0; - } + if (sam9x60_frac_pll_ready(regmap, core->id) && + (cmul == frac->mul && cfrac == frac->frac)) + goto unlock; - /* Recommended value for AT91_PMC_PLL_ACR */ - if (pll->characteristics->upll) + /* Recommended value for PMC_PLL_ACR */ + if (core->characteristics->upll) val = AT91_PMC_PLL_ACR_DEFAULT_UPLL; else val = AT91_PMC_PLL_ACR_DEFAULT_PLLA; regmap_write(regmap, AT91_PMC_PLL_ACR, val); regmap_write(regmap, AT91_PMC_PLL_CTRL1, - FIELD_PREP(PMC_PLL_CTRL1_MUL_MSK, pll->mul) | - FIELD_PREP(PMC_PLL_CTRL1_FRACR_MSK, pll->frac)); + (frac->mul << core->layout->mul_shift) | + (frac->frac << core->layout->frac_shift)); - if (pll->characteristics->upll) { + if (core->characteristics->upll) { /* Enable the UTMI internal bandgap */ val |= AT91_PMC_PLL_ACR_UTMIBG; regmap_write(regmap, AT91_PMC_PLL_ACR, val); @@ -100,229 +121,409 @@ static int sam9x60_pll_prepare(struct clk_hw *hw) } regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - regmap_write(regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL | - AT91_PMC_PLL_CTRL0_ENPLLCK | pll->div); + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL, + AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL); regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - while (!sam9x60_pll_ready(regmap, pll->id)) + while (!sam9x60_pll_ready(regmap, core->id)) cpu_relax(); - spin_unlock_irqrestore(pll->lock, flags); +unlock: + spin_unlock_irqrestore(core->lock, flags); return 0; } -static int sam9x60_pll_is_prepared(struct clk_hw *hw) +static void sam9x60_frac_pll_unprepare(struct clk_hw *hw) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; - return sam9x60_pll_ready(pll->regmap, pll->id); + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, AT91_PMC_PLL_CTRL0_ENPLL, 0); + + if (core->characteristics->upll) + regmap_update_bits(regmap, AT91_PMC_PLL_ACR, + AT91_PMC_PLL_ACR_UTMIBG | AT91_PMC_PLL_ACR_UTMIVR, 0); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + spin_unlock_irqrestore(core->lock, flags); } -static void sam9x60_pll_unprepare(struct clk_hw *hw) +static int sam9x60_frac_pll_is_prepared(struct clk_hw *hw) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); - unsigned long flags; + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + + return sam9x60_pll_ready(core->regmap, core->id); +} - spin_lock_irqsave(pll->lock, flags); +static long sam9x60_frac_pll_compute_mul_frac(struct sam9x60_pll_core *core, + unsigned long rate, + unsigned long parent_rate, + bool update) +{ + struct sam9x60_frac *frac = to_sam9x60_frac(core); + unsigned long tmprate, remainder; + unsigned long nmul = 0; + unsigned long nfrac = 0; - regmap_write(pll->regmap, AT91_PMC_PLL_UPDT, pll->id); + if (rate < FCORE_MIN || rate > FCORE_MAX) + return -ERANGE; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENPLLCK, 0); + /* + * Calculate the multiplier associated with the current + * divider that provide the closest rate to the requested one. + */ + nmul = mult_frac(rate, 1, parent_rate); + tmprate = mult_frac(parent_rate, nmul, 1); + remainder = rate - tmprate; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + if (remainder) { + nfrac = DIV_ROUND_CLOSEST_ULL((u64)remainder * (1 << 22), + parent_rate); - regmap_update_bits(pll->regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENPLL, 0); + tmprate += DIV_ROUND_CLOSEST_ULL((u64)nfrac * parent_rate, + (1 << 22)); + } - if (pll->characteristics->upll) - regmap_update_bits(pll->regmap, AT91_PMC_PLL_ACR, - AT91_PMC_PLL_ACR_UTMIBG | - AT91_PMC_PLL_ACR_UTMIVR, 0); + /* Check if resulted rate is a valid. */ + if (tmprate < FCORE_MIN || tmprate > FCORE_MAX) + return -ERANGE; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + if (update) { + frac->mul = nmul - 1; + frac->frac = nfrac; + } - spin_unlock_irqrestore(pll->lock, flags); + return tmprate; } -static unsigned long sam9x60_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) +static long sam9x60_frac_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - return DIV_ROUND_CLOSEST_ULL((parent_rate * (pll->mul + 1) + - ((u64)parent_rate * pll->frac >> 22)), (pll->div + 1)); + return sam9x60_frac_pll_compute_mul_frac(core, rate, *parent_rate, false); } -static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, - unsigned long rate, - unsigned long parent_rate, - bool update) +static int sam9x60_frac_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { - const struct clk_pll_characteristics *characteristics = - pll->characteristics; - unsigned long bestremainder = ULONG_MAX; - unsigned long maxdiv, mindiv, tmpdiv; - long bestrate = -ERANGE; - unsigned long bestdiv = 0; - unsigned long bestmul = 0; - unsigned long bestfrac = 0; - u64 fcore = 0; + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - if (rate < characteristics->output[0].min || - rate > characteristics->output[0].max) - return -ERANGE; + return sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true); +} - if (!pll->characteristics->upll) { - mindiv = parent_rate / rate; - if (mindiv < 2) - mindiv = 2; +static const struct clk_ops sam9x60_frac_pll_ops = { + .prepare = sam9x60_frac_pll_prepare, + .unprepare = sam9x60_frac_pll_unprepare, + .is_prepared = sam9x60_frac_pll_is_prepared, + .recalc_rate = sam9x60_frac_pll_recalc_rate, + .round_rate = sam9x60_frac_pll_round_rate, + .set_rate = sam9x60_frac_pll_set_rate, +}; - maxdiv = DIV_ROUND_UP(parent_rate * PLL_MUL_MAX, rate); - if (maxdiv > PLL_DIV_MAX) - maxdiv = PLL_DIV_MAX; - } else { - mindiv = maxdiv = UPLL_DIV; - } +static int sam9x60_div_pll_prepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + struct regmap *regmap = core->regmap; + unsigned long flags; + unsigned int val, cdiv; - for (tmpdiv = mindiv; tmpdiv <= maxdiv; tmpdiv++) { - unsigned long remainder; - unsigned long tmprate; - unsigned long tmpmul; - unsigned long tmpfrac = 0; + spin_lock_irqsave(core->lock, flags); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + cdiv = (val & core->layout->div_mask) >> core->layout->div_shift; - /* - * Calculate the multiplier associated with the current - * divider that provide the closest rate to the requested one. - */ - tmpmul = mult_frac(rate, tmpdiv, parent_rate); - tmprate = mult_frac(parent_rate, tmpmul, tmpdiv); - remainder = rate - tmprate; + /* Stop if enabled an nothing changed. */ + if (!!(val & core->layout->endiv_mask) && cdiv == div->div) + goto unlock; - if (remainder) { - tmpfrac = DIV_ROUND_CLOSEST_ULL((u64)remainder * tmpdiv * (1 << 22), - parent_rate); + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + core->layout->div_mask | core->layout->endiv_mask, + (div->div << core->layout->div_shift) | + (1 << core->layout->endiv_shift)); - tmprate += DIV_ROUND_CLOSEST_ULL((u64)tmpfrac * parent_rate, - tmpdiv * (1 << 22)); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - if (tmprate > rate) - remainder = tmprate - rate; - else - remainder = rate - tmprate; - } + while (!sam9x60_pll_ready(regmap, core->id)) + cpu_relax(); + +unlock: + spin_unlock_irqrestore(core->lock, flags); + + return 0; +} + +static void sam9x60_div_pll_unprepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + core->layout->endiv_mask, 0); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + spin_unlock_irqrestore(core->lock, flags); +} + +static int sam9x60_div_pll_is_prepared(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; + unsigned int val; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + + spin_unlock_irqrestore(core->lock, flags); - fcore = parent_rate * (tmpmul + 1) + - ((u64)parent_rate * tmpfrac >> 22); - if (fcore < FCORE_MIN || fcore > FCORE_MAX) + return !!(val & core->layout->endiv_mask); +} + +static unsigned long sam9x60_div_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, (div->div + 1)); +} + +static long sam9x60_div_pll_compute_div(struct sam9x60_pll_core *core, + unsigned long *parent_rate, + unsigned long rate) +{ + const struct clk_pll_characteristics *characteristics = + core->characteristics; + struct clk_hw *parent = clk_hw_get_parent(&core->hw); + unsigned long tmp_rate, tmp_parent_rate, tmp_diff; + long best_diff = -1, best_rate = -EINVAL; + u32 divid, best_div; + + if (!rate) + return 0; + + if (rate < characteristics->output[0].min || + rate > characteristics->output[0].max) + return -ERANGE; + + for (divid = 1; divid < core->layout->div_mask; divid++) { + tmp_parent_rate = clk_hw_round_rate(parent, rate * divid); + if (!tmp_parent_rate) continue; - /* - * Compare the remainder with the best remainder found until - * now and elect a new best multiplier/divider pair if the - * current remainder is smaller than the best one. - */ - if (remainder < bestremainder) { - bestremainder = remainder; - bestdiv = tmpdiv; - bestmul = tmpmul; - bestrate = tmprate; - bestfrac = tmpfrac; + tmp_rate = DIV_ROUND_CLOSEST_ULL(tmp_parent_rate, divid); + tmp_diff = abs(rate - tmp_rate); + + if (best_diff < 0 || best_diff > tmp_diff) { + *parent_rate = tmp_parent_rate; + best_rate = tmp_rate; + best_diff = tmp_diff; + best_div = divid; } - /* We've found a perfect match! */ - if (!remainder) + if (!best_diff) break; } - /* Check if bestrate is a valid output rate */ - if (fcore < FCORE_MIN || fcore > FCORE_MAX || - bestrate < characteristics->output[0].min || - bestrate > characteristics->output[0].max) + if (best_rate < characteristics->output[0].min || + best_rate > characteristics->output[0].max) return -ERANGE; - if (update) { - pll->div = bestdiv - 1; - pll->mul = bestmul - 1; - pll->frac = bestfrac; - } - - return bestrate; + return best_rate; } -static long sam9x60_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static long sam9x60_div_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - return sam9x60_pll_get_best_div_mul(pll, rate, *parent_rate, false); + return sam9x60_div_pll_compute_div(core, parent_rate, rate); } -static int sam9x60_pll_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) +static int sam9x60_div_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + + div->div = DIV_ROUND_CLOSEST(parent_rate, rate) - 1; - return sam9x60_pll_get_best_div_mul(pll, rate, parent_rate, true); + return 0; } -static const struct clk_ops pll_ops = { - .prepare = sam9x60_pll_prepare, - .unprepare = sam9x60_pll_unprepare, - .is_prepared = sam9x60_pll_is_prepared, - .recalc_rate = sam9x60_pll_recalc_rate, - .round_rate = sam9x60_pll_round_rate, - .set_rate = sam9x60_pll_set_rate, +static const struct clk_ops sam9x60_div_pll_ops = { + .prepare = sam9x60_div_pll_prepare, + .unprepare = sam9x60_div_pll_unprepare, + .is_prepared = sam9x60_div_pll_is_prepared, + .recalc_rate = sam9x60_div_pll_recalc_rate, + .round_rate = sam9x60_div_pll_round_rate, + .set_rate = sam9x60_div_pll_set_rate, }; struct clk_hw * __init -sam9x60_clk_register_pll(struct regmap *regmap, spinlock_t *lock, - const char *name, const char *parent_name, u8 id, - const struct clk_pll_characteristics *characteristics) +sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, + struct clk_hw *parent_hw, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical) { - struct sam9x60_pll *pll; + struct sam9x60_frac *frac; struct clk_hw *hw; struct clk_init_data init; - unsigned int pllr; + unsigned long parent_rate, flags; + unsigned int val; int ret; - if (id > PLL_MAX_ID) + if (id > PLL_MAX_ID || !lock || !parent_hw) return ERR_PTR(-EINVAL); - pll = kzalloc(sizeof(*pll), GFP_KERNEL); - if (!pll) + frac = kzalloc(sizeof(*frac), GFP_KERNEL); + if (!frac) return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &pll_ops; init.parent_names = &parent_name; init.num_parents = 1; + init.ops = &sam9x60_frac_pll_ops; init.flags = CLK_SET_RATE_GATE; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + frac->core.id = id; + frac->core.hw.init = &init; + frac->core.characteristics = characteristics; + frac->core.layout = layout; + frac->core.regmap = regmap; + frac->core.lock = lock; + + spin_lock_irqsave(frac->core.lock, flags); + if (sam9x60_pll_ready(regmap, id)) { + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, id); + regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); + frac->mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, val); + frac->frac = FIELD_GET(PMC_PLL_CTRL1_FRACR_MSK, val); + } else { + /* + * This means the PLL is not setup by bootloaders. In this + * case we need to set the minimum rate for it. Otherwise + * a clock child of this PLL may be enabled before setting + * its rate leading to enabling this PLL with unsupported + * rate. This will lead to PLL not being locked at all. + */ + parent_rate = clk_hw_get_rate(parent_hw); + if (!parent_rate) { + hw = ERR_PTR(-EINVAL); + goto free; + } + + ret = sam9x60_frac_pll_compute_mul_frac(&frac->core, FCORE_MIN, + parent_rate, true); + if (ret <= 0) { + hw = ERR_PTR(ret); + goto free; + } + } + spin_unlock_irqrestore(frac->core.lock, flags); + + hw = &frac->core.hw; + ret = clk_hw_register(NULL, hw); + if (ret) { + kfree(frac); + hw = ERR_PTR(ret); + } - pll->id = id; - pll->hw.init = &init; - pll->characteristics = characteristics; - pll->regmap = regmap; - pll->lock = lock; + return hw; + +free: + spin_unlock_irqrestore(frac->core.lock, flags); + kfree(frac); + return hw; +} + +struct clk_hw * __init +sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical) +{ + struct sam9x60_div *div; + struct clk_hw *hw; + struct clk_init_data init; + unsigned long flags; + unsigned int val; + int ret; + + if (id > PLL_MAX_ID || !lock) + return ERR_PTR(-EINVAL); + + div = kzalloc(sizeof(*div), GFP_KERNEL); + if (!div) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.parent_names = &parent_name; + init.num_parents = 1; + init.ops = &sam9x60_div_pll_ops; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + div->core.id = id; + div->core.hw.init = &init; + div->core.characteristics = characteristics; + div->core.layout = layout; + div->core.regmap = regmap; + div->core.lock = lock; + + spin_lock_irqsave(div->core.lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + div->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val); - regmap_write(regmap, AT91_PMC_PLL_UPDT, id); - regmap_read(regmap, AT91_PMC_PLL_CTRL0, &pllr); - pll->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, pllr); - regmap_read(regmap, AT91_PMC_PLL_CTRL1, &pllr); - pll->mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, pllr); + spin_unlock_irqrestore(div->core.lock, flags); - hw = &pll->hw; + hw = &div->core.hw; ret = clk_hw_register(NULL, hw); if (ret) { - kfree(pll); + kfree(div); hw = ERR_PTR(ret); } diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 8d3e22f566cf..6340b9be8205 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -54,8 +54,14 @@ struct clk_master_characteristics { struct clk_pll_layout { u32 pllr_mask; - u16 mul_mask; + u32 mul_mask; + u32 frac_mask; + u32 div_mask; + u32 endiv_mask; u8 mul_shift; + u8 frac_shift; + u8 div_shift; + u8 endiv_shift; }; extern const struct clk_pll_layout at91rm9200_pll_layout; @@ -181,9 +187,17 @@ at91_clk_register_plldiv(struct regmap *regmap, const char *name, const char *parent_name); struct clk_hw * __init -sam9x60_clk_register_pll(struct regmap *regmap, spinlock_t *lock, - const char *name, const char *parent_name, u8 id, - const struct clk_pll_characteristics *characteristics); +sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical); + +struct clk_hw * __init +sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, + struct clk_hw *parent_hw, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical); struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index d8d4b28ec4cb..ab6318c0589e 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -42,6 +42,20 @@ static const struct clk_pll_characteristics upll_characteristics = { .upll = true, }; +static const struct clk_pll_layout pll_frac_layout = { + .mul_mask = GENMASK(31, 24), + .frac_mask = GENMASK(21, 0), + .mul_shift = 24, + .frac_shift = 0, +}; + +static const struct clk_pll_layout pll_div_layout = { + .div_mask = GENMASK(7, 0), + .endiv_mask = BIT(29), + .div_shift = 0, + .endiv_shift = 29, +}; + static const struct clk_programmable_layout sam9x60_programmable_layout = { .pres_mask = 0xff, .pres_shift = 8, @@ -156,6 +170,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) const char *td_slck_name, *md_slck_name, *mainxtal_name; struct pmc_data *sam9x60_pmc; const char *parent_names[6]; + struct clk_hw *main_osc_hw; struct regmap *regmap; struct clk_hw *hw; int i; @@ -200,6 +215,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) bypass); if (IS_ERR(hw)) goto err_free; + main_osc_hw = hw; parent_names[0] = "main_rc_osc"; parent_names[1] = "main_osc"; @@ -209,15 +225,31 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_pmc->chws[PMC_MAIN] = hw; - hw = sam9x60_clk_register_pll(regmap, &pmc_pll_lock, "pllack", - "mainck", 0, &plla_characteristics); + hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "pllack_fracck", + "mainck", sam9x60_pmc->chws[PMC_MAIN], + 0, &plla_characteristics, + &pll_frac_layout, true); + if (IS_ERR(hw)) + goto err_free; + + hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "pllack_divck", + "pllack_fracck", 0, &plla_characteristics, + &pll_div_layout, true); if (IS_ERR(hw)) goto err_free; sam9x60_pmc->chws[PMC_PLLACK] = hw; - hw = sam9x60_clk_register_pll(regmap, &pmc_pll_lock, "upllck", - "main_osc", 1, &upll_characteristics); + hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "upllck_fracck", + "main_osc", main_osc_hw, 1, + &upll_characteristics, + &pll_frac_layout, false); + if (IS_ERR(hw)) + goto err_free; + + hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "upllck_divck", + "upllck_fracck", 1, &upll_characteristics, + &pll_div_layout, false); if (IS_ERR(hw)) goto err_free; @@ -225,7 +257,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = "mainck"; - parent_names[2] = "pllack"; + parent_names[2] = "pllack_divck"; hw = at91_clk_register_master(regmap, "masterck", 3, parent_names, &sam9x60_master_layout, &mck_characteristics); @@ -234,8 +266,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_pmc->chws[PMC_MCK] = hw; - parent_names[0] = "pllack"; - parent_names[1] = "upllck"; + parent_names[0] = "pllack_divck"; + parent_names[1] = "upllck_divck"; parent_names[2] = "main_osc"; hw = sam9x60_clk_register_usb(regmap, "usbck", parent_names, 3); if (IS_ERR(hw)) @@ -245,8 +277,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[1] = td_slck_name; parent_names[2] = "mainck"; parent_names[3] = "masterck"; - parent_names[4] = "pllack"; - parent_names[5] = "upllck"; + parent_names[4] = "pllack_divck"; + parent_names[5] = "upllck_divck"; for (i = 0; i < 8; i++) { char name[6]; -- cgit From ef396df99251b848596c717b63ff4fe74a941193 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:25 +0300 Subject: clk: at91: clk-utmi: add utmi support for sama7g5 Add UTMI support for SAMA7G5. SAMA7G5's UTMI control is done via XTALF register. Values written at bits 2..0 in this register correspond to the on board crystal oscillator frequency. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-18-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-utmi.c | 103 ++++++++++++++++++++++++++++++++++++++++--- drivers/clk/at91/pmc.h | 4 ++ include/linux/clk/at91_pmc.h | 2 + 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index f1ef4e1f41a9..df9f3fc3b6a6 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -120,9 +120,11 @@ static const struct clk_ops utmi_ops = { .recalc_rate = clk_utmi_recalc_rate, }; -struct clk_hw * __init -at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, - const char *name, const char *parent_name) +static struct clk_hw * __init +at91_clk_register_utmi_internal(struct regmap *regmap_pmc, + struct regmap *regmap_sfr, + const char *name, const char *parent_name, + const struct clk_ops *ops, unsigned long flags) { struct clk_utmi *utmi; struct clk_hw *hw; @@ -134,10 +136,10 @@ at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &utmi_ops; + init.ops = ops; init.parent_names = parent_name ? &parent_name : NULL; init.num_parents = parent_name ? 1 : 0; - init.flags = CLK_SET_RATE_GATE; + init.flags = flags; utmi->hw.init = &init; utmi->regmap_pmc = regmap_pmc; @@ -152,3 +154,94 @@ at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, return hw; } + +struct clk_hw * __init +at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, + const char *name, const char *parent_name) +{ + return at91_clk_register_utmi_internal(regmap_pmc, regmap_sfr, name, + parent_name, &utmi_ops, CLK_SET_RATE_GATE); +} + +static int clk_utmi_sama7g5_prepare(struct clk_hw *hw) +{ + struct clk_utmi *utmi = to_clk_utmi(hw); + struct clk_hw *hw_parent; + unsigned long parent_rate; + unsigned int val; + + hw_parent = clk_hw_get_parent(hw); + parent_rate = clk_hw_get_rate(hw_parent); + + switch (parent_rate) { + case 16000000: + val = 0; + break; + case 20000000: + val = 2; + break; + case 24000000: + val = 3; + break; + case 32000000: + val = 5; + break; + default: + pr_err("UTMICK: unsupported main_xtal rate\n"); + return -EINVAL; + } + + regmap_write(utmi->regmap_pmc, AT91_PMC_XTALF, val); + + return 0; + +} + +static int clk_utmi_sama7g5_is_prepared(struct clk_hw *hw) +{ + struct clk_utmi *utmi = to_clk_utmi(hw); + struct clk_hw *hw_parent; + unsigned long parent_rate; + unsigned int val; + + hw_parent = clk_hw_get_parent(hw); + parent_rate = clk_hw_get_rate(hw_parent); + + regmap_read(utmi->regmap_pmc, AT91_PMC_XTALF, &val); + switch (val & 0x7) { + case 0: + if (parent_rate == 16000000) + return 1; + break; + case 2: + if (parent_rate == 20000000) + return 1; + break; + case 3: + if (parent_rate == 24000000) + return 1; + break; + case 5: + if (parent_rate == 32000000) + return 1; + break; + default: + break; + } + + return 0; +} + +static const struct clk_ops sama7g5_utmi_ops = { + .prepare = clk_utmi_sama7g5_prepare, + .is_prepared = clk_utmi_sama7g5_is_prepared, + .recalc_rate = clk_utmi_recalc_rate, +}; + +struct clk_hw * __init +at91_clk_sama7g5_register_utmi(struct regmap *regmap_pmc, const char *name, + const char *parent_name) +{ + return at91_clk_register_utmi_internal(regmap_pmc, NULL, name, + parent_name, &sama7g5_utmi_ops, 0); +} diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 6340b9be8205..7b86affc6d7c 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -236,6 +236,10 @@ struct clk_hw * __init at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, const char *name, const char *parent_name); +struct clk_hw * __init +at91_clk_sama7g5_register_utmi(struct regmap *regmap, const char *name, + const char *parent_name); + #ifdef CONFIG_PM void pmc_register_id(u8 id); void pmc_register_pck(u8 pck); diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index dc5e85f124e0..a4f82e836a7c 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -137,6 +137,8 @@ #define AT91_PMC_PLLADIV2_ON (1 << 12) #define AT91_PMC_H32MXDIV BIT(24) +#define AT91_PMC_XTALF 0x34 /* Main XTAL Frequency Register [SAMA7G5 only] */ + #define AT91_PMC_USB 0x38 /* USB Clock Register [some SAM9 only] */ #define AT91_PMC_USBS (0x1 << 0) /* USB OHCI Input clock selection */ #define AT91_PMC_USBS_PLLA (0 << 0) -- cgit From cb783bbbcf54c36256006895c215e86c5e7266d8 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:26 +0300 Subject: clk: at91: sama7g5: add clock support for sama7g5 Add clock support for SAMA7G5. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-19-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/at91/Makefile | 1 + drivers/clk/at91/sama7g5.c | 1059 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1060 insertions(+) create mode 100644 drivers/clk/at91/sama7g5.c diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile index 8b90357f2a93..79301e1c1c36 100644 --- a/drivers/clk/at91/Makefile +++ b/drivers/clk/at91/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_SOC_SAM9X60) += sam9x60.o obj-$(CONFIG_SOC_SAMA5D3) += sama5d3.o obj-$(CONFIG_SOC_SAMA5D4) += sama5d4.o obj-$(CONFIG_SOC_SAMA5D2) += sama5d2.o +obj-$(CONFIG_SOC_SAMA7G5) += sama7g5.o diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c new file mode 100644 index 000000000000..0db2ab3eca14 --- /dev/null +++ b/drivers/clk/at91/sama7g5.c @@ -0,0 +1,1059 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SAMA7G5 PMC code. + * + * Copyright (C) 2020 Microchip Technology Inc. and its subsidiaries + * + * Author: Claudiu Beznea + * + */ +#include +#include +#include +#include + +#include + +#include "pmc.h" + +#define SAMA7G5_INIT_TABLE(_table, _count) \ + do { \ + u8 _i; \ + for (_i = 0; _i < (_count); _i++) \ + (_table)[_i] = _i; \ + } while (0) + +#define SAMA7G5_FILL_TABLE(_to, _from, _count) \ + do { \ + u8 _i; \ + for (_i = 0; _i < (_count); _i++) { \ + (_to)[_i] = (_from)[_i]; \ + } \ + } while (0) + +static DEFINE_SPINLOCK(pmc_pll_lock); +static DEFINE_SPINLOCK(pmc_mckX_lock); + +/** + * PLL clocks identifiers + * @PLL_ID_CPU: CPU PLL identifier + * @PLL_ID_SYS: System PLL identifier + * @PLL_ID_DDR: DDR PLL identifier + * @PLL_ID_IMG: Image subsystem PLL identifier + * @PLL_ID_BAUD: Baud PLL identifier + * @PLL_ID_AUDIO: Audio PLL identifier + * @PLL_ID_ETH: Ethernet PLL identifier + */ +enum pll_ids { + PLL_ID_CPU, + PLL_ID_SYS, + PLL_ID_DDR, + PLL_ID_IMG, + PLL_ID_BAUD, + PLL_ID_AUDIO, + PLL_ID_ETH, + PLL_ID_MAX, +}; + +/** + * PLL type identifiers + * @PLL_TYPE_FRAC: fractional PLL identifier + * @PLL_TYPE_DIV: divider PLL identifier + */ +enum pll_type { + PLL_TYPE_FRAC, + PLL_TYPE_DIV, +}; + +/* Layout for fractional PLLs. */ +static const struct clk_pll_layout pll_layout_frac = { + .mul_mask = GENMASK(31, 24), + .frac_mask = GENMASK(21, 0), + .mul_shift = 24, + .frac_shift = 0, +}; + +/* Layout for DIVPMC dividers. */ +static const struct clk_pll_layout pll_layout_divpmc = { + .div_mask = GENMASK(7, 0), + .endiv_mask = BIT(29), + .div_shift = 0, + .endiv_shift = 29, +}; + +/* Layout for DIVIO dividers. */ +static const struct clk_pll_layout pll_layout_divio = { + .div_mask = GENMASK(19, 12), + .endiv_mask = BIT(30), + .div_shift = 12, + .endiv_shift = 30, +}; + +/** + * PLL clocks description + * @n: clock name + * @p: clock parent + * @l: clock layout + * @t: clock type + * @f: true if clock is critical and cannot be disabled + * @eid: export index in sama7g5->chws[] array + */ +static const struct { + const char *n; + const char *p; + const struct clk_pll_layout *l; + u8 t; + u8 c; + u8 eid; +} sama7g5_plls[][PLL_ID_MAX] = { + [PLL_ID_CPU] = { + { .n = "cpupll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "cpupll_divpmcck", + .p = "cpupll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_SYS] = { + { .n = "syspll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "syspll_divpmcck", + .p = "syspll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_DDR] = { + { .n = "ddrpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "ddrpll_divpmcck", + .p = "ddrpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_IMG] = { + { .n = "imgpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "imgpll_divpmcck", + .p = "imgpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, + + [PLL_ID_BAUD] = { + { .n = "baudpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "baudpll_divpmcck", + .p = "baudpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, + + [PLL_ID_AUDIO] = { + { .n = "audiopll_fracck", + .p = "main_xtal", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "audiopll_divpmcck", + .p = "audiopll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .eid = PMC_I2S0_MUX, }, + + { .n = "audiopll_diviock", + .p = "audiopll_fracck", + .l = &pll_layout_divio, + .t = PLL_TYPE_DIV, + .eid = PMC_I2S1_MUX, }, + }, + + [PLL_ID_ETH] = { + { .n = "ethpll_fracck", + .p = "main_xtal", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "ethpll_divpmcck", + .p = "ethpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, +}; + +/** + * Master clock (MCK[1..4]) description + * @n: clock name + * @ep: extra parents names array + * @ep_chg_chg_id: index in parents array that specifies the changeable + * parent + * @ep_count: extra parents count + * @ep_mux_table: mux table for extra parents + * @id: clock id + * @c: true if clock is critical and cannot be disabled + */ +static const struct { + const char *n; + const char *ep[4]; + int ep_chg_id; + u8 ep_count; + u8 ep_mux_table[4]; + u8 id; + u8 c; +} sama7g5_mckx[] = { + { .n = "mck1", + .id = 1, + .ep = { "syspll_divpmcck", }, + .ep_mux_table = { 5, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, + + { .n = "mck2", + .id = 2, + .ep = { "ddrpll_divpmcck", }, + .ep_mux_table = { 6, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, + + { .n = "mck3", + .id = 3, + .ep = { "syspll_divpmcck", "ddrpll_divpmcck", "imgpll_divpmcck", }, + .ep_mux_table = { 5, 6, 7, }, + .ep_count = 3, + .ep_chg_id = 6, }, + + { .n = "mck4", + .id = 4, + .ep = { "syspll_divpmcck", }, + .ep_mux_table = { 5, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, +}; + +/** + * System clock description + * @n: clock name + * @p: clock parent name + * @id: clock id + */ +static const struct { + const char *n; + const char *p; + u8 id; +} sama7g5_systemck[] = { + { .n = "pck0", .p = "prog0", .id = 8, }, + { .n = "pck1", .p = "prog1", .id = 9, }, + { .n = "pck2", .p = "prog2", .id = 10, }, + { .n = "pck3", .p = "prog3", .id = 11, }, + { .n = "pck4", .p = "prog4", .id = 12, }, + { .n = "pck5", .p = "prog5", .id = 13, }, + { .n = "pck6", .p = "prog6", .id = 14, }, + { .n = "pck7", .p = "prog7", .id = 15, }, +}; + +/* Mux table for programmable clocks. */ +static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, }; + +/** + * Peripheral clock description + * @n: clock name + * @p: clock parent name + * @r: clock range values + * @id: clock id + * @chgp: index in parent array of the changeable parent + */ +static const struct { + const char *n; + const char *p; + struct clk_range r; + u8 chgp; + u8 id; +} sama7g5_periphck[] = { + { .n = "pioA_clk", .p = "mck0", .id = 11, }, + { .n = "sfr_clk", .p = "mck1", .id = 19, }, + { .n = "hsmc_clk", .p = "mck1", .id = 21, }, + { .n = "xdmac0_clk", .p = "mck1", .id = 22, }, + { .n = "xdmac1_clk", .p = "mck1", .id = 23, }, + { .n = "xdmac2_clk", .p = "mck1", .id = 24, }, + { .n = "acc_clk", .p = "mck1", .id = 25, }, + { .n = "aes_clk", .p = "mck1", .id = 27, }, + { .n = "tzaesbasc_clk", .p = "mck1", .id = 28, }, + { .n = "asrc_clk", .p = "mck1", .id = 30, .r = { .max = 200000000, }, }, + { .n = "cpkcc_clk", .p = "mck0", .id = 32, }, + { .n = "csi_clk", .p = "mck3", .id = 33, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "csi2dc_clk", .p = "mck3", .id = 34, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "eic_clk", .p = "mck1", .id = 37, }, + { .n = "flex0_clk", .p = "mck1", .id = 38, }, + { .n = "flex1_clk", .p = "mck1", .id = 39, }, + { .n = "flex2_clk", .p = "mck1", .id = 40, }, + { .n = "flex3_clk", .p = "mck1", .id = 41, }, + { .n = "flex4_clk", .p = "mck1", .id = 42, }, + { .n = "flex5_clk", .p = "mck1", .id = 43, }, + { .n = "flex6_clk", .p = "mck1", .id = 44, }, + { .n = "flex7_clk", .p = "mck1", .id = 45, }, + { .n = "flex8_clk", .p = "mck1", .id = 46, }, + { .n = "flex9_clk", .p = "mck1", .id = 47, }, + { .n = "flex10_clk", .p = "mck1", .id = 48, }, + { .n = "flex11_clk", .p = "mck1", .id = 49, }, + { .n = "gmac0_clk", .p = "mck1", .id = 51, }, + { .n = "gmac1_clk", .p = "mck1", .id = 52, }, + { .n = "icm_clk", .p = "mck1", .id = 55, }, + { .n = "isc_clk", .p = "mck3", .id = 56, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "i2smcc0_clk", .p = "mck1", .id = 57, .r = { .max = 200000000, }, }, + { .n = "i2smcc1_clk", .p = "mck1", .id = 58, .r = { .max = 200000000, }, }, + { .n = "matrix_clk", .p = "mck1", .id = 60, }, + { .n = "mcan0_clk", .p = "mck1", .id = 61, .r = { .max = 200000000, }, }, + { .n = "mcan1_clk", .p = "mck1", .id = 62, .r = { .max = 200000000, }, }, + { .n = "mcan2_clk", .p = "mck1", .id = 63, .r = { .max = 200000000, }, }, + { .n = "mcan3_clk", .p = "mck1", .id = 64, .r = { .max = 200000000, }, }, + { .n = "mcan4_clk", .p = "mck1", .id = 65, .r = { .max = 200000000, }, }, + { .n = "mcan5_clk", .p = "mck1", .id = 66, .r = { .max = 200000000, }, }, + { .n = "pdmc0_clk", .p = "mck1", .id = 68, .r = { .max = 200000000, }, }, + { .n = "pdmc1_clk", .p = "mck1", .id = 69, .r = { .max = 200000000, }, }, + { .n = "pit64b0_clk", .p = "mck1", .id = 70, }, + { .n = "pit64b1_clk", .p = "mck1", .id = 71, }, + { .n = "pit64b2_clk", .p = "mck1", .id = 72, }, + { .n = "pit64b3_clk", .p = "mck1", .id = 73, }, + { .n = "pit64b4_clk", .p = "mck1", .id = 74, }, + { .n = "pit64b5_clk", .p = "mck1", .id = 75, }, + { .n = "pwm_clk", .p = "mck1", .id = 77, }, + { .n = "qspi0_clk", .p = "mck1", .id = 78, }, + { .n = "qspi1_clk", .p = "mck1", .id = 79, }, + { .n = "sdmmc0_clk", .p = "mck1", .id = 80, }, + { .n = "sdmmc1_clk", .p = "mck1", .id = 81, }, + { .n = "sdmmc2_clk", .p = "mck1", .id = 82, }, + { .n = "sha_clk", .p = "mck1", .id = 83, }, + { .n = "spdifrx_clk", .p = "mck1", .id = 84, .r = { .max = 200000000, }, }, + { .n = "spdiftx_clk", .p = "mck1", .id = 85, .r = { .max = 200000000, }, }, + { .n = "ssc0_clk", .p = "mck1", .id = 86, .r = { .max = 200000000, }, }, + { .n = "ssc1_clk", .p = "mck1", .id = 87, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch0_clk", .p = "mck1", .id = 88, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch1_clk", .p = "mck1", .id = 89, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch2_clk", .p = "mck1", .id = 90, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch0_clk", .p = "mck1", .id = 91, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch1_clk", .p = "mck1", .id = 92, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch2_clk", .p = "mck1", .id = 93, .r = { .max = 200000000, }, }, + { .n = "tcpca_clk", .p = "mck1", .id = 94, }, + { .n = "tcpcb_clk", .p = "mck1", .id = 95, }, + { .n = "tdes_clk", .p = "mck1", .id = 96, }, + { .n = "trng_clk", .p = "mck1", .id = 97, }, + { .n = "udphsa_clk", .p = "mck1", .id = 104, }, + { .n = "udphsb_clk", .p = "mck1", .id = 105, }, + { .n = "uhphs_clk", .p = "mck1", .id = 106, }, +}; + +/** + * Generic clock description + * @n: clock name + * @pp: PLL parents + * @pp_mux_table: PLL parents mux table + * @r: clock output range + * @pp_chg_id: id in parrent array of changeable PLL parent + * @pp_count: PLL parents count + * @id: clock id + */ +static const struct { + const char *n; + const char *pp[8]; + const char pp_mux_table[8]; + struct clk_range r; + int pp_chg_id; + u8 pp_count; + u8 id; +} sama7g5_gck[] = { + { .n = "adc_gclk", + .id = 26, + .r = { .max = 100000000, }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 7, 9, }, + .pp_count = 3, + .pp_chg_id = INT_MIN, }, + + { .n = "asrc_gclk", + .id = 30, + .r = { .max = 200000000 }, + .pp = { "audiopll_divpmcck", }, + .pp_mux_table = { 9, }, + .pp_count = 1, + .pp_chg_id = 4, }, + + { .n = "csi_gclk", + .id = 33, + .r = { .max = 27000000 }, + .pp = { "ddrpll_divpmcck", "imgpll_divpmcck", }, + .pp_mux_table = { 6, 7, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex0_gclk", + .id = 38, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex1_gclk", + .id = 39, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex2_gclk", + .id = 40, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex3_gclk", + .id = 41, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex4_gclk", + .id = 42, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex5_gclk", + .id = 43, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex6_gclk", + .id = 44, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex7_gclk", + .id = 45, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex8_gclk", + .id = 46, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex9_gclk", + .id = 47, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex10_gclk", + .id = 48, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex11_gclk", + .id = 49, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac0_gclk", + .id = 51, + .r = { .max = 125000000 }, + .pp = { "ethpll_divpmcck", }, + .pp_mux_table = { 10, }, + .pp_count = 1, + .pp_chg_id = 4, }, + + { .n = "gmac1_gclk", + .id = 52, + .r = { .max = 50000000 }, + .pp = { "ethpll_divpmcck", }, + .pp_mux_table = { 10, }, + .pp_count = 1, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac0_tsu_gclk", + .id = 53, + .r = { .max = 300000000 }, + .pp = { "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 9, 10, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac1_tsu_gclk", + .id = 54, + .r = { .max = 300000000 }, + .pp = { "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 9, 10, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "i2smcc0_gclk", + .id = 57, + .r = { .max = 100000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "i2smcc1_gclk", + .id = 58, + .r = { .max = 100000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "mcan0_gclk", + .id = 61, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan1_gclk", + .id = 62, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan2_gclk", + .id = 63, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan3_gclk", + .id = 64, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan4_gclk", + .id = 65, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan5_gclk", + .id = 66, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pdmc0_gclk", + .id = 68, + .r = { .max = 50000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pdmc1_gclk", + .id = 69, + .r = { .max = 50000000, }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b0_gclk", + .id = 70, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b1_gclk", + .id = 71, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b2_gclk", + .id = 72, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b3_gclk", + .id = 73, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b4_gclk", + .id = 74, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b5_gclk", + .id = 75, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "qspi0_gclk", + .id = 78, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "qspi1_gclk", + .id = 79, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "sdmmc0_gclk", + .id = 80, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "sdmmc1_gclk", + .id = 81, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "sdmmc2_gclk", + .id = 82, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "spdifrx_gclk", + .id = 84, + .r = { .max = 150000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "spdiftx_gclk", + .id = 85, + .r = { .max = 25000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "tcb0_ch0_gclk", + .id = 88, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "tcb1_ch0_gclk", + .id = 91, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "tcpca_gclk", + .id = 94, + .r = { .max = 32768, }, + .pp_chg_id = INT_MIN, }, + + { .n = "tcpcb_gclk", + .id = 95, + .r = { .max = 32768, }, + .pp_chg_id = INT_MIN, }, +}; + +/* PLL output range. */ +static const struct clk_range pll_outputs[] = { + { .min = 2343750, .max = 1200000000 }, +}; + +/* PLL characteristics. */ +static const struct clk_pll_characteristics pll_characteristics = { + .input = { .min = 12000000, .max = 50000000 }, + .num_output = ARRAY_SIZE(pll_outputs), + .output = pll_outputs, +}; + +/* MCK0 characteristics. */ +static const struct clk_master_characteristics mck0_characteristics = { + .output = { .min = 140000000, .max = 200000000 }, + .divisors = { 1, 2, 4, 3 }, + .have_div3_pres = 1, +}; + +/* MCK0 layout. */ +static const struct clk_master_layout mck0_layout = { + .mask = 0x373, + .pres_shift = 4, + .offset = 0x28, +}; + +/* Programmable clock layout. */ +static const struct clk_programmable_layout programmable_layout = { + .pres_mask = 0xff, + .pres_shift = 8, + .css_mask = 0x1f, + .have_slck_mck = 0, + .is_pres_direct = 1, +}; + +/* Peripheral clock layout. */ +static const struct clk_pcr_layout sama7g5_pcr_layout = { + .offset = 0x88, + .cmd = BIT(31), + .gckcss_mask = GENMASK(12, 8), + .pid_mask = GENMASK(6, 0), +}; + +static void __init sama7g5_pmc_setup(struct device_node *np) +{ + const char *td_slck_name, *md_slck_name, *mainxtal_name; + struct pmc_data *sama7g5_pmc; + const char *parent_names[10]; + void **alloc_mem = NULL; + int alloc_mem_size = 0; + struct regmap *regmap; + struct clk_hw *hw; + bool bypass; + int i, j; + + i = of_property_match_string(np, "clock-names", "td_slck"); + if (i < 0) + return; + + td_slck_name = of_clk_get_parent_name(np, i); + + i = of_property_match_string(np, "clock-names", "md_slck"); + if (i < 0) + return; + + md_slck_name = of_clk_get_parent_name(np, i); + + i = of_property_match_string(np, "clock-names", "main_xtal"); + if (i < 0) + return; + + mainxtal_name = of_clk_get_parent_name(np, i); + + regmap = device_node_to_regmap(np); + if (IS_ERR(regmap)) + return; + + sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1, + nck(sama7g5_systemck), + nck(sama7g5_periphck), + nck(sama7g5_gck)); + if (!sama7g5_pmc) + return; + + alloc_mem = kmalloc(sizeof(void *) * + (ARRAY_SIZE(sama7g5_mckx) + ARRAY_SIZE(sama7g5_gck)), + GFP_KERNEL); + if (!alloc_mem) + goto err_free; + + hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 12000000, + 50000000); + if (IS_ERR(hw)) + goto err_free; + + bypass = of_property_read_bool(np, "atmel,osc-bypass"); + + hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, + bypass); + if (IS_ERR(hw)) + goto err_free; + + parent_names[0] = "main_rc_osc"; + parent_names[1] = "main_osc"; + hw = at91_clk_register_sam9x5_main(regmap, "mainck", parent_names, 2); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_MAIN] = hw; + + for (i = 0; i < PLL_ID_MAX; i++) { + for (j = 0; j < 3; j++) { + struct clk_hw *parent_hw; + + if (!sama7g5_plls[i][j].n) + continue; + + switch (sama7g5_plls[i][j].t) { + case PLL_TYPE_FRAC: + if (!strcmp(sama7g5_plls[i][j].p, "mainck")) + parent_hw = sama7g5_pmc->chws[PMC_MAIN]; + else + parent_hw = __clk_get_hw(of_clk_get_by_name(np, + sama7g5_plls[i][j].p)); + + hw = sam9x60_clk_register_frac_pll(regmap, + &pmc_pll_lock, sama7g5_plls[i][j].n, + sama7g5_plls[i][j].p, parent_hw, i, + &pll_characteristics, + sama7g5_plls[i][j].l, + sama7g5_plls[i][j].c); + break; + + case PLL_TYPE_DIV: + hw = sam9x60_clk_register_div_pll(regmap, + &pmc_pll_lock, sama7g5_plls[i][j].n, + sama7g5_plls[i][j].p, i, + &pll_characteristics, + sama7g5_plls[i][j].l, + sama7g5_plls[i][j].c); + break; + + default: + continue; + } + + if (IS_ERR(hw)) + goto err_free; + + if (sama7g5_plls[i][j].eid) + sama7g5_pmc->chws[sama7g5_plls[i][j].eid] = hw; + } + } + + parent_names[0] = md_slck_name; + parent_names[1] = "mainck"; + parent_names[2] = "cpupll_divpmcck"; + parent_names[3] = "syspll_divpmcck"; + hw = at91_clk_register_master(regmap, "mck0", 4, parent_names, + &mck0_layout, &mck0_characteristics); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_MCK] = hw; + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + for (i = 0; i < ARRAY_SIZE(sama7g5_mckx); i++) { + u8 num_parents = 4 + sama7g5_mckx[i].ep_count; + u32 *mux_table; + + mux_table = kmalloc_array(num_parents, sizeof(*mux_table), + GFP_KERNEL); + if (!mux_table) + goto err_free; + + SAMA7G5_INIT_TABLE(mux_table, 4); + SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_mckx[i].ep_mux_table, + sama7g5_mckx[i].ep_count); + SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_mckx[i].ep, + sama7g5_mckx[i].ep_count); + + hw = at91_clk_sama7g5_register_master(regmap, sama7g5_mckx[i].n, + num_parents, parent_names, mux_table, + &pmc_mckX_lock, sama7g5_mckx[i].id, + sama7g5_mckx[i].c, + sama7g5_mckx[i].ep_chg_id); + if (IS_ERR(hw)) + goto err_free; + + alloc_mem[alloc_mem_size++] = mux_table; + } + + hw = at91_clk_sama7g5_register_utmi(regmap, "utmick", "main_xtal"); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_UTMI] = hw; + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + parent_names[4] = "syspll_divpmcck"; + parent_names[5] = "ddrpll_divpmcck"; + parent_names[6] = "imgpll_divpmcck"; + parent_names[7] = "baudpll_divpmcck"; + parent_names[8] = "audiopll_divpmcck"; + parent_names[9] = "ethpll_divpmcck"; + for (i = 0; i < 8; i++) { + char name[6]; + + snprintf(name, sizeof(name), "prog%d", i); + + hw = at91_clk_register_programmable(regmap, name, parent_names, + 10, i, + &programmable_layout, + sama7g5_prog_mux_table); + if (IS_ERR(hw)) + goto err_free; + } + + for (i = 0; i < ARRAY_SIZE(sama7g5_systemck); i++) { + hw = at91_clk_register_system(regmap, sama7g5_systemck[i].n, + sama7g5_systemck[i].p, + sama7g5_systemck[i].id); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->shws[sama7g5_systemck[i].id] = hw; + } + + for (i = 0; i < ARRAY_SIZE(sama7g5_periphck); i++) { + hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, + &sama7g5_pcr_layout, + sama7g5_periphck[i].n, + sama7g5_periphck[i].p, + sama7g5_periphck[i].id, + &sama7g5_periphck[i].r, + sama7g5_periphck[i].chgp ? 0 : + INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->phws[sama7g5_periphck[i].id] = hw; + } + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + for (i = 0; i < ARRAY_SIZE(sama7g5_gck); i++) { + u8 num_parents = 4 + sama7g5_gck[i].pp_count; + u32 *mux_table; + + mux_table = kmalloc_array(num_parents, sizeof(*mux_table), + GFP_KERNEL); + if (!mux_table) + goto err_free; + + SAMA7G5_INIT_TABLE(mux_table, 4); + SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_gck[i].pp_mux_table, + sama7g5_gck[i].pp_count); + SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_gck[i].pp, + sama7g5_gck[i].pp_count); + + hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, + &sama7g5_pcr_layout, + sama7g5_gck[i].n, + parent_names, mux_table, + num_parents, + sama7g5_gck[i].id, + &sama7g5_gck[i].r, + sama7g5_gck[i].pp_chg_id); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->ghws[sama7g5_gck[i].id] = hw; + alloc_mem[alloc_mem_size++] = mux_table; + } + + of_clk_add_hw_provider(np, of_clk_hw_pmc_get, sama7g5_pmc); + + return; + +err_free: + if (alloc_mem) { + for (i = 0; i < alloc_mem_size; i++) + kfree(alloc_mem[i]); + kfree(alloc_mem); + } + + pmc_data_free(sama7g5_pmc); +} + +/* Some clks are used for a clocksource */ +CLK_OF_DECLARE(sama7g5_pmc, "microchip,sama7g5-pmc", sama7g5_pmc_setup); -- cgit From 413d84b88bee08ed59713e782053ff38d206c606 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 24 Jul 2020 02:40:25 -0700 Subject: clk: qcom: Export gdsc_gx_do_nothing_enable() to modules A clk driver can be a module but the gdsc code is in the common module. Export this symbol so that allmodconfig builds keep working. Cc: Jonathan Marek Fixes: 0638226dd095 ("clk: qcom: add common gdsc_gx_do_nothing_enable for gpucc drivers") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20200724094025.3261266-1-sboyd@kernel.org --- drivers/clk/qcom/gdsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 628397703717..7e4273f314f7 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -458,3 +459,4 @@ int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain) /* Do nothing but give genpd the impression that we were successful */ return 0; } +EXPORT_SYMBOL_GPL(gdsc_gx_do_nothing_enable); -- cgit From 6378cfdcc32d3790b04c953e7766324d8ee04a90 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 22 Jun 2020 02:09:35 -0700 Subject: clk: Clean up kernel-doc errors Two things aren't documented causing kernel-doc to fail when checking the core clk.c file. Fix them so that this file is clean. Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20200622090935.213833-1-sboyd@kernel.org --- drivers/clk/clk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 236923b25543..47c0ee9da462 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4135,6 +4135,7 @@ static int devm_clk_hw_match(struct device *dev, void *res, void *data) /** * devm_clk_unregister - resource managed clk_unregister() + * @dev: device that is unregistering the clock data * @clk: clock to unregister * * Deallocate a clock allocated with devm_clk_register(). Normally @@ -4324,6 +4325,8 @@ static void clk_core_reparent_orphans(void) * @node: Pointer to device tree node of clock provider * @get: Get clock callback. Returns NULL or a struct clk for the * given clock specifier + * @get_hw: Get clk_hw callback. Returns NULL, ERR_PTR or a + * struct clk_hw for the given clock specifier * @data: context pointer to be passed into @get callback */ struct of_clk_provider { -- cgit From 58031a26bf2b0c458511df146143c78ab569f4da Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 9 Jun 2020 15:15:55 -0500 Subject: i2c: fsi: Prevent adding adapters for ports without dts nodes Ports should be defined in the devicetree if they are to be enabled on the system. Signed-off-by: Eddie James Signed-off-by: Joel Stanley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-fsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index 977d6f524649..10332693edf0 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c @@ -703,7 +703,7 @@ static int fsi_i2c_probe(struct device *dev) for (port_no = 0; port_no < ports; port_no++) { np = fsi_i2c_find_port_of_node(dev->of_node, port_no); - if (np && !of_device_is_available(np)) + if (!of_device_is_available(np)) continue; port = kzalloc(sizeof(*port), GFP_KERNEL); -- cgit From 0c2a34937f7e4c4776bb261114c475392da2355c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jun 2020 18:24:40 +0200 Subject: i2c: revert "i2c: core: Allow drivers to disable i2c-core irq mapping" This manually reverts commit d1d84bb95364ed604015c2b788caaf3dbca0262f. The only user has gone two years ago with commit 589edb56b424 ("ACPI / scan: Create platform device for INT33FE ACPI nodes") and no new user has showed up. Remove and hope we will never need it again. Signed-off-by: Wolfram Sang Acked-by: Hans de Goede Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 6 +++--- include/linux/i2c.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 26f03a14a478..dc43242a85ba 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -319,11 +319,9 @@ static int i2c_device_probe(struct device *dev) if (!client) return 0; - driver = to_i2c_driver(dev->driver); - client->irq = client->init_irq; - if (!client->irq && !driver->disable_i2c_core_irq_mapping) { + if (!client->irq) { int irq = -ENOENT; if (client->flags & I2C_CLIENT_HOST_NOTIFY) { @@ -349,6 +347,8 @@ static int i2c_device_probe(struct device *dev) client->irq = irq; } + driver = to_i2c_driver(dev->driver); + /* * An I2C ID table is not mandatory, if and only if, a suitable OF * or ACPI ID table is supplied for the probing device. diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b8b8963f8bb9..098405df431f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -231,7 +231,6 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) - * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -290,8 +289,6 @@ struct i2c_driver { int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; - - bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) -- cgit From 7d98404c8162b7f1d1b22eccce92f5ee7f93cab8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 24 Jul 2020 22:07:18 +0300 Subject: drm/omap: Use {} to zero initialize the mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first member of drm_display_mode is no longer a structure, but the code is still using {{0}} to zero initialize it. Make that just {} so it works regardless of what lies inside. Cc: Dave Airlie Cc: Laurent Pinchart Cc: Tomi Valkeinen Fixes: 42acb06b01b1 ("drm: pahole struct drm_display_mode") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200724190718.23567-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/omapdrm/omap_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c index 528764566b17..de95dc1b861c 100644 --- a/drivers/gpu/drm/omapdrm/omap_connector.c +++ b/drivers/gpu/drm/omapdrm/omap_connector.c @@ -89,7 +89,7 @@ static enum drm_mode_status omap_connector_mode_valid(struct drm_connector *conn struct drm_display_mode *mode) { struct omap_connector *omap_connector = to_omap_connector(connector); - struct drm_display_mode new_mode = { { 0 } }; + struct drm_display_mode new_mode = {}; enum drm_mode_status status; status = omap_connector_mode_fixup(omap_connector->output, mode, -- cgit From 173722995cdb7c22d21abd484a5001c903df1e20 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 24 Jul 2020 21:37:55 +0530 Subject: clk: qcom: gdsc: Add support to enable retention of GSDCR Add support for the RETAIN_FF_ENABLE feature which enables the usage of retention registers. These registers maintain their state after disabling and re-enabling a GDSC. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1595606878-2664-2-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gdsc.c | 12 ++++++++++++ drivers/clk/qcom/gdsc.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7e4273f314f7..bfc4ac02f9ea 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -30,6 +30,7 @@ /* CFG_GDSCR */ #define GDSC_POWER_UP_COMPLETE BIT(16) #define GDSC_POWER_DOWN_COMPLETE BIT(15) +#define GDSC_RETAIN_FF_ENABLE BIT(11) #define CFG_GDSCR_OFFSET 0x4 /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */ @@ -217,6 +218,14 @@ static inline void gdsc_assert_reset_aon(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->clamp_io_ctrl, GMEM_RESET_MASK, 0); } + +static void gdsc_retain_ff_on(struct gdsc *sc) +{ + u32 mask = GDSC_RETAIN_FF_ENABLE; + + regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); +} + static int gdsc_enable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); @@ -269,6 +278,9 @@ static int gdsc_enable(struct generic_pm_domain *domain) udelay(1); } + if (sc->flags & RETAIN_FF_ENABLE) + gdsc_retain_ff_on(sc); + return 0; } diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 1896bfb2bbd1..bd537438c793 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -50,6 +50,7 @@ struct gdsc { #define AON_RESET BIT(4) #define POLL_CFG_GDSCR BIT(5) #define ALWAYS_ON BIT(6) +#define RETAIN_FF_ENABLE BIT(7) struct reset_controller_dev *rcdev; unsigned int *resets; unsigned int reset_count; -- cgit From 381cc6f97cdaf15dabceb1b48fa82c9de0163e58 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 24 Jul 2020 21:37:56 +0530 Subject: dt-bindings: clock: Add YAML schemas for LPASS clocks on SC7180 The LPASS(Low Power Audio Subsystem) clock provider have a bunch of generic properties that are needed in a device tree. Also add clock ids for GCC LPASS and LPASS Core clock IDs for LPASS client to request for the clocks. Signed-off-by: Taniya Das Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1595606878-2664-3-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- .../bindings/clock/qcom,sc7180-lpasscorecc.yaml | 102 +++++++++++++++++++++ include/dt-bindings/clock/qcom,gcc-sc7180.h | 1 + .../dt-bindings/clock/qcom,lpasscorecc-sc7180.h | 29 ++++++ 3 files changed, 132 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml create mode 100644 include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml new file mode 100644 index 000000000000..a838250b33e7 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-lpasscorecc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm LPASS Core Clock Controller Binding for SC7180 + +maintainers: + - Taniya Das + +description: | + Qualcomm LPASS core clock control module which supports the clocks and + power domains on SC7180. + + See also: + - dt-bindings/clock/qcom,lpasscorecc-sc7180.h + +properties: + compatible: + enum: + - qcom,sc7180-lpasshm + - qcom,sc7180-lpasscorecc + + clocks: + items: + - description: gcc_lpass_sway clock from GCC + + clock-names: + items: + - const: iface + + power-domains: + maxItems: 1 + + '#clock-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + minItems: 1 + items: + - description: lpass core cc register + - description: lpass audio cc register + + reg-names: + items: + - const: lpass_core_cc + - const: lpass_audio_cc + +if: + properties: + compatible: + contains: + const: qcom,sc7180-lpasshm +then: + properties: + reg: + maxItems: 1 + +else: + properties: + reg: + minItems: 2 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + #include + #include + clock-controller@63000000 { + compatible = "qcom,sc7180-lpasshm"; + reg = <0x63000000 0x28>; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>; + clock-names = "iface"; + #clock-cells = <1>; + #power-domain-cells = <1>; + }; + + - | + clock-controller@62d00000 { + compatible = "qcom,sc7180-lpasscorecc"; + reg = <0x62d00000 0x50000>, <0x62780000 0x30000>; + reg-names = "lpass_core_cc", "lpass_audio_cc"; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>; + clock-names = "iface"; + power-domains = <&lpass_hm LPASS_CORE_HM_GDSCR>; + #clock-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,gcc-sc7180.h b/include/dt-bindings/clock/qcom,gcc-sc7180.h index 992b67b7e5e4..bdf43adc7897 100644 --- a/include/dt-bindings/clock/qcom,gcc-sc7180.h +++ b/include/dt-bindings/clock/qcom,gcc-sc7180.h @@ -138,6 +138,7 @@ #define GCC_MSS_Q6_MEMNOC_AXI_CLK 128 #define GCC_MSS_SNOC_AXI_CLK 129 #define GCC_SEC_CTRL_CLK_SRC 130 +#define GCC_LPASS_CFG_NOC_SWAY_CLK 131 /* GCC resets */ #define GCC_QUSB2PHY_PRIM_BCR 0 diff --git a/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h b/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h new file mode 100644 index 000000000000..a55d01db2b20 --- /dev/null +++ b/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_LPASS_CORE_CC_SC7180_H +#define _DT_BINDINGS_CLK_QCOM_LPASS_CORE_CC_SC7180_H + +/* LPASS_CORE_CC clocks */ +#define LPASS_LPAAUDIO_DIG_PLL 0 +#define LPASS_LPAAUDIO_DIG_PLL_OUT_ODD 1 +#define CORE_CLK_SRC 2 +#define EXT_MCLK0_CLK_SRC 3 +#define LPAIF_PRI_CLK_SRC 4 +#define LPAIF_SEC_CLK_SRC 5 +#define LPASS_AUDIO_CORE_CORE_CLK 6 +#define LPASS_AUDIO_CORE_EXT_MCLK0_CLK 7 +#define LPASS_AUDIO_CORE_LPAIF_PRI_IBIT_CLK 8 +#define LPASS_AUDIO_CORE_LPAIF_SEC_IBIT_CLK 9 +#define LPASS_AUDIO_CORE_SYSNOC_MPORT_CORE_CLK 10 + +/* LPASS Core power domains */ +#define LPASS_CORE_HM_GDSCR 0 + +/* LPASS Audio power domains */ +#define LPASS_AUDIO_HM_GDSCR 0 +#define LPASS_PDC_HM_GDSCR 1 + +#endif -- cgit From 47110b6aa5c8269da4a3b5129b066211f978aa86 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 24 Jul 2020 21:37:57 +0530 Subject: clk: qcom: gcc: Add support for GCC LPASS clock for SC7180 Add the GCC lpass clock which is required to access the LPASS core clocks. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1595606878-2664-4-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7180.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index 538677befb86..68d8f7aaf64e 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -2251,6 +2251,19 @@ static struct clk_branch gcc_mss_q6_memnoc_axi_clk = { }, }; +static struct clk_branch gcc_lpass_cfg_noc_sway_clk = { + .halt_reg = 0x47018, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x47018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_cfg_noc_sway_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + static struct gdsc ufs_phy_gdsc = { .gdscr = 0x77004, .pd = { @@ -2428,6 +2441,7 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_MSS_Q6_MEMNOC_AXI_CLK] = &gcc_mss_q6_memnoc_axi_clk.clkr, [GCC_MSS_SNOC_AXI_CLK] = &gcc_mss_snoc_axi_clk.clkr, [GCC_SEC_CTRL_CLK_SRC] = &gcc_sec_ctrl_clk_src.clkr, + [GCC_LPASS_CFG_NOC_SWAY_CLK] = &gcc_lpass_cfg_noc_sway_clk.clkr, }; static const struct qcom_reset_map gcc_sc7180_resets[] = { -- cgit From edab812d802d248e3d07719c036a865c67ad3a87 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 24 Jul 2020 21:37:58 +0530 Subject: clk: qcom: lpass: Add support for LPASS clock controller for SC7180 The Low Power Audio subsystem clocks are required for Audio client to be able to request for the clocks and power domains. Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1595606878-2664-5-git-send-email-tdas@codeaurora.org [sboyd@kernel.org: Drop unused ret in probe function] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 9 + drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/lpasscorecc-sc7180.c | 476 ++++++++++++++++++++++++++++++++++ 3 files changed, 486 insertions(+) create mode 100644 drivers/clk/qcom/lpasscorecc-sc7180.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index cd0f01136039..058327310c25 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -308,6 +308,15 @@ config SC_GCC_7180 Say Y if you want to use peripheral devices such as UART, SPI, I2C, USB, UFS, SDCC, etc. +config SC_LPASS_CORECC_7180 + tristate "SC7180 LPASS Core Clock Controller" + select SC_GCC_7180 + help + Support for the LPASS(Low Power Audio Subsystem) core clock controller + on SC7180 devices. + Say Y if you want to use LPASS clocks and power domains of the LPASS + core clock controller. + config SC_GPUCC_7180 tristate "SC7180 Graphics Clock Controller" select SC_GCC_7180 diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 1e9a21882ed5..9677e769e7e9 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o obj-$(CONFIG_SC_DISPCC_7180) += dispcc-sc7180.o obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o obj-$(CONFIG_SC_GPUCC_7180) += gpucc-sc7180.o +obj-$(CONFIG_SC_LPASS_CORECC_7180) += lpasscorecc-sc7180.o obj-$(CONFIG_SC_MSS_7180) += mss-sc7180.o obj-$(CONFIG_SC_VIDEOCC_7180) += videocc-sc7180.o obj-$(CONFIG_SDM_CAMCC_845) += camcc-sdm845.o diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c new file mode 100644 index 000000000000..d4c1864e1ee9 --- /dev/null +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "common.h" +#include "gdsc.h" + +enum { + P_BI_TCXO, + P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, + P_SLEEP_CLK, +}; + +static struct pll_vco fabia_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static const struct alpha_pll_config lpass_lpaaudio_dig_pll_config = { + .l = 0x20, + .alpha = 0x0, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002067, + .test_ctl_val = 0x40000000, + .test_ctl_hi_val = 0x00000000, + .user_ctl_val = 0x00005105, + .user_ctl_hi_val = 0x00004805, +}; + +static const u8 clk_alpha_pll_regs_offset[][PLL_OFF_MAX_REGS] = { + [CLK_ALPHA_PLL_TYPE_FABIA] = { + [PLL_OFF_L_VAL] = 0x04, + [PLL_OFF_CAL_L_VAL] = 0x8, + [PLL_OFF_USER_CTL] = 0x0c, + [PLL_OFF_USER_CTL_U] = 0x10, + [PLL_OFF_USER_CTL_U1] = 0x14, + [PLL_OFF_CONFIG_CTL] = 0x18, + [PLL_OFF_CONFIG_CTL_U] = 0x1C, + [PLL_OFF_CONFIG_CTL_U1] = 0x20, + [PLL_OFF_TEST_CTL] = 0x24, + [PLL_OFF_TEST_CTL_U] = 0x28, + [PLL_OFF_STATUS] = 0x30, + [PLL_OFF_OPMODE] = 0x38, + [PLL_OFF_FRAC] = 0x40, + }, +}; + +static struct clk_alpha_pll lpass_lpaaudio_dig_pll = { + .offset = 0x1000, + .vco_table = fabia_vco, + .num_vco = ARRAY_SIZE(fabia_vco), + .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "lpass_lpaaudio_dig_pll", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fabia_ops, + }, + }, +}; + +static const struct clk_div_table + post_div_table_lpass_lpaaudio_dig_pll_out_odd[] = { + { 0x5, 5 }, + { } +}; + +static struct clk_alpha_pll_postdiv lpass_lpaaudio_dig_pll_out_odd = { + .offset = 0x1000, + .post_div_shift = 12, + .post_div_table = post_div_table_lpass_lpaaudio_dig_pll_out_odd, + .num_post_div = + ARRAY_SIZE(post_div_table_lpass_lpaaudio_dig_pll_out_odd), + .width = 4, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpass_lpaaudio_dig_pll_out_odd", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpass_lpaaudio_dig_pll.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_alpha_pll_postdiv_fabia_ops, + }, +}; + +static const struct parent_map lpass_core_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 5 }, +}; + +static const struct clk_parent_data lpass_core_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &lpass_lpaaudio_dig_pll_out_odd.clkr.hw }, +}; + +static const struct parent_map lpass_core_cc_parent_map_2[] = { + { P_BI_TCXO, 0 }, +}; + +static struct clk_rcg2 core_clk_src = { + .cmd_rcgr = 0x1d000, + .mnd_width = 8, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_2, + .clkr.hw.init = &(struct clk_init_data){ + .name = "core_clk_src", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_ext_mclk0_clk_src[] = { + F(9600000, P_BI_TCXO, 2, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + { } +}; + +static const struct freq_tbl ftbl_ext_lpaif_clk_src[] = { + F(256000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 32), + F(512000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 16), + F(768000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 16), + F(1024000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 8), + F(1536000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 8), + F(2048000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 4), + F(3072000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 4), + F(4096000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 2), + F(6144000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 2), + F(8192000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 0, 0), + F(9600000, P_BI_TCXO, 2, 0, 0), + F(12288000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(24576000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 5, 0, 0), + { } +}; + +static struct clk_rcg2 ext_mclk0_clk_src = { + .cmd_rcgr = 0x20000, + .mnd_width = 8, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "ext_mclk0_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 lpaif_pri_clk_src = { + .cmd_rcgr = 0x10000, + .mnd_width = 16, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_lpaif_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpaif_pri_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 lpaif_sec_clk_src = { + .cmd_rcgr = 0x11000, + .mnd_width = 16, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_lpaif_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpaif_sec_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch lpass_audio_core_ext_mclk0_clk = { + .halt_reg = 0x20014, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x20014, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x20014, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_ext_mclk0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &ext_mclk0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_lpaif_pri_ibit_clk = { + .halt_reg = 0x10018, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x10018, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x10018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_lpaif_pri_ibit_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpaif_pri_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_lpaif_sec_ibit_clk = { + .halt_reg = 0x11018, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x11018, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x11018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_lpaif_sec_ibit_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpaif_sec_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_sysnoc_mport_core_clk = { + .halt_reg = 0x23000, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x23000, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x23000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_sysnoc_mport_core_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &core_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_regmap *lpass_core_cc_sc7180_clocks[] = { + [EXT_MCLK0_CLK_SRC] = &ext_mclk0_clk_src.clkr, + [LPAIF_PRI_CLK_SRC] = &lpaif_pri_clk_src.clkr, + [LPAIF_SEC_CLK_SRC] = &lpaif_sec_clk_src.clkr, + [CORE_CLK_SRC] = &core_clk_src.clkr, + [LPASS_AUDIO_CORE_EXT_MCLK0_CLK] = &lpass_audio_core_ext_mclk0_clk.clkr, + [LPASS_AUDIO_CORE_LPAIF_PRI_IBIT_CLK] = + &lpass_audio_core_lpaif_pri_ibit_clk.clkr, + [LPASS_AUDIO_CORE_LPAIF_SEC_IBIT_CLK] = + &lpass_audio_core_lpaif_sec_ibit_clk.clkr, + [LPASS_AUDIO_CORE_SYSNOC_MPORT_CORE_CLK] = + &lpass_audio_core_sysnoc_mport_core_clk.clkr, + [LPASS_LPAAUDIO_DIG_PLL] = &lpass_lpaaudio_dig_pll.clkr, + [LPASS_LPAAUDIO_DIG_PLL_OUT_ODD] = &lpass_lpaaudio_dig_pll_out_odd.clkr, +}; + +static struct gdsc lpass_pdc_hm_gdsc = { + .gdscr = 0x3090, + .pd = { + .name = "lpass_pdc_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc lpass_audio_hm_gdsc = { + .gdscr = 0x9090, + .pd = { + .name = "lpass_audio_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc lpass_core_hm_gdsc = { + .gdscr = 0x0, + .pd = { + .name = "lpass_core_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = RETAIN_FF_ENABLE, +}; + +static struct gdsc *lpass_core_hm_sc7180_gdscs[] = { + [LPASS_CORE_HM_GDSCR] = &lpass_core_hm_gdsc, +}; + +static struct gdsc *lpass_audio_hm_sc7180_gdscs[] = { + [LPASS_PDC_HM_GDSCR] = &lpass_pdc_hm_gdsc, + [LPASS_AUDIO_HM_GDSCR] = &lpass_audio_hm_gdsc, +}; + +static struct regmap_config lpass_core_cc_sc7180_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .fast_io = true, +}; + +static const struct qcom_cc_desc lpass_core_hm_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .gdscs = lpass_core_hm_sc7180_gdscs, + .num_gdscs = ARRAY_SIZE(lpass_core_hm_sc7180_gdscs), +}; + +static const struct qcom_cc_desc lpass_core_cc_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .clks = lpass_core_cc_sc7180_clocks, + .num_clks = ARRAY_SIZE(lpass_core_cc_sc7180_clocks), +}; + +static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .gdscs = lpass_audio_hm_sc7180_gdscs, + .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs), +}; + +static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) +{ + const struct qcom_cc_desc *desc; + struct regmap *regmap; + int ret; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_audio_cc"; + desc = &lpass_audio_hm_sc7180_desc; + ret = qcom_cc_probe_by_index(pdev, 1, desc); + if (ret) + return ret; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_core_cc"; + regmap = qcom_cc_map(pdev, &lpass_core_cc_sc7180_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + /* + * Keep the CLK always-ON + * LPASS_AUDIO_CORE_SYSNOC_SWAY_CORE_CLK + */ + regmap_update_bits(regmap, 0x24000, BIT(0), BIT(0)); + + /* PLL settings */ + regmap_write(regmap, 0x1008, 0x20); + regmap_update_bits(regmap, 0x1014, BIT(0), BIT(0)); + + clk_fabia_pll_configure(&lpass_lpaaudio_dig_pll, regmap, + &lpass_lpaaudio_dig_pll_config); + + return qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap); +} + +static int lpass_hm_core_probe(struct platform_device *pdev) +{ + const struct qcom_cc_desc *desc; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core"; + desc = &lpass_core_hm_sc7180_desc; + + return qcom_cc_probe_by_index(pdev, 0, desc); +} + +static const struct of_device_id lpass_core_cc_sc7180_match_table[] = { + { + .compatible = "qcom,sc7180-lpasshm", + .data = lpass_hm_core_probe, + }, + { + .compatible = "qcom,sc7180-lpasscorecc", + .data = lpass_core_cc_sc7180_probe, + }, + { } +}; +MODULE_DEVICE_TABLE(of, lpass_core_cc_sc7180_match_table); + +static int lpass_core_sc7180_probe(struct platform_device *pdev) +{ + int (*clk_probe)(struct platform_device *p); + int ret; + + pm_runtime_enable(&pdev->dev); + ret = pm_clk_create(&pdev->dev); + if (ret) + return ret; + + ret = pm_clk_add(&pdev->dev, "iface"); + if (ret < 0) { + dev_err(&pdev->dev, "failed to acquire iface clock\n"); + goto disable_pm_runtime; + } + + clk_probe = of_device_get_match_data(&pdev->dev); + if (!clk_probe) + return -EINVAL; + + ret = clk_probe(pdev); + if (ret) + goto destroy_pm_clk; + + return 0; + +destroy_pm_clk: + pm_clk_destroy(&pdev->dev); + +disable_pm_runtime: + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static const struct dev_pm_ops lpass_core_cc_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + +static struct platform_driver lpass_core_cc_sc7180_driver = { + .probe = lpass_core_sc7180_probe, + .driver = { + .name = "lpass_core_cc-sc7180", + .of_match_table = lpass_core_cc_sc7180_match_table, + .pm = &lpass_core_cc_pm_ops, + }, +}; + +static int __init lpass_core_cc_sc7180_init(void) +{ + return platform_driver_register(&lpass_core_cc_sc7180_driver); +} +subsys_initcall(lpass_core_cc_sc7180_init); + +static void __exit lpass_core_cc_sc7180_exit(void) +{ + platform_driver_unregister(&lpass_core_cc_sc7180_driver); +} +module_exit(lpass_core_cc_sc7180_exit); + +MODULE_DESCRIPTION("QTI LPASS_CORE_CC SC7180 Driver"); +MODULE_LICENSE("GPL v2"); -- cgit From 314139f9f0abdba61ed9a8463bbcb0bf900ac5a2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 29 Jun 2020 17:38:07 +0200 Subject: i2c: rcar: slave: only send STOP event when we have been addressed When the SSR interrupt is activated, it will detect every STOP condition on the bus, not only the ones after we have been addressed. So, enable this interrupt only after we have been addressed, and disable it otherwise. Fixes: de20d1857dd6 ("i2c: rcar: add slave support") Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index a45c4bf1ec01..c2921e1d8c4c 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -583,13 +583,14 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) rcar_i2c_write(priv, ICSIER, SDR | SSR | SAR); } - rcar_i2c_write(priv, ICSSR, ~SAR & 0xff); + /* Clear SSR, too, because of old STOPs to other clients than us */ + rcar_i2c_write(priv, ICSSR, ~(SAR | SSR) & 0xff); } /* master sent stop */ if (ssr_filtered & SSR) { i2c_slave_event(priv->slave, I2C_SLAVE_STOP, &value); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSSR, ~SSR & 0xff); } @@ -853,7 +854,7 @@ static int rcar_reg_slave(struct i2c_client *slave) priv->slave = slave; rcar_i2c_write(priv, ICSAR, slave->addr); rcar_i2c_write(priv, ICSSR, 0); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSCR, SIE | SDBS); return 0; -- cgit From 471fb8c55c24a1b50a07d7708aa2d7c202ec896e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Jul 2020 12:09:01 +0100 Subject: dt-bindings: i2c: renesas,i2c: Document r8a774e1 support Document i2c controller for RZ/G2H (R8A774E1) SoC, which is compatible with R-Car Gen3 SoC family. Signed-off-by: Lad Prabhakar Reviewed-by: Marian-Cristian Rotariu Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,i2c.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt index a03f9f5cb378..96d869ac3839 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt @@ -10,6 +10,7 @@ Required properties: "renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC. "renesas,i2c-r8a774b1" if the device is a part of a R8A774B1 SoC. "renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC. + "renesas,i2c-r8a774e1" if the device is a part of a R8A774E1 SoC. "renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC. "renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC. "renesas,i2c-r8a7790" if the device is a part of a R8A7790 SoC. -- cgit From 99f0975d760b6320dd5490bd0bc3a31900284757 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Jul 2020 12:09:02 +0100 Subject: dt-bindings: i2c: renesas,iic: Document r8a774e1 support Document IIC controller for RZ/G2H (R8A774E1) SoC, which is compatible with R-Car Gen3 SoC family. Signed-off-by: Lad Prabhakar Reviewed-by: Marian-Cristian Rotariu Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/renesas,iic.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt index 89facb09337a..93d412832e66 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,iic.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt @@ -11,6 +11,7 @@ Required properties: - "renesas,iic-r8a774a1" (RZ/G2M) - "renesas,iic-r8a774b1" (RZ/G2N) - "renesas,iic-r8a774c0" (RZ/G2E) + - "renesas,iic-r8a774e1" (RZ/G2H) - "renesas,iic-r8a7790" (R-Car H2) - "renesas,iic-r8a7791" (R-Car M2-W) - "renesas,iic-r8a7792" (R-Car V2H) -- cgit From da3a9e9a6aa96ef589c153078f66e0646bf06b55 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 23 Jul 2020 11:05:50 +0200 Subject: drm/fourcc: fix Amlogic Video Framebuffer Compression macro Fix the Amlogic Video Framebuffer Compression modifier macro to correctly add the layout options, a pair of parenthesis was missing. Fixes: d6528ec88309 ("drm/fourcc: Add modifier definitions for describing Amlogic Video Framebuffer Compression") Signed-off-by: Neil Armstrong Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200723090551.27529-1-narmstrong@baylibre.com --- include/uapi/drm/drm_fourcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4bee7de5f306..82f327801267 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1004,7 +1004,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ fourcc_mod_code(AMLOGIC, \ ((__layout) & __fourcc_mod_amlogic_layout_mask) | \ - ((__options) & __fourcc_mod_amlogic_options_mask \ + (((__options) & __fourcc_mod_amlogic_options_mask) \ << __fourcc_mod_amlogic_options_shift)) /* Amlogic FBC Layouts */ -- cgit From 26afdd98821bd36e4e1de88dd00a8c70d83accdf Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 27 Jul 2020 13:43:36 -0600 Subject: vfio: Cleanup allowed driver naming No functional change, avoid non-inclusive naming schemes. Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 580099afeaff..262ab0efd06c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -627,9 +627,10 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, * that error notification via MSI can be affected for platforms that handle * MSI within the same IOVA space as DMA. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; +static const char * const vfio_driver_allowed[] = { "pci-stub" }; -static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) +static bool vfio_dev_driver_allowed(struct device *dev, + struct device_driver *drv) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -638,8 +639,8 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) return true; } - return match_string(vfio_driver_whitelist, - ARRAY_SIZE(vfio_driver_whitelist), + return match_string(vfio_driver_allowed, + ARRAY_SIZE(vfio_driver_allowed), drv->name) >= 0; } @@ -648,7 +649,7 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) * one of the following states: * - driver-less * - bound to a vfio driver - * - bound to a whitelisted driver + * - bound to an otherwise allowed driver * - a PCI interconnect device * * We use two methods to determine whether a device is bound to a vfio @@ -674,7 +675,7 @@ static int vfio_dev_viable(struct device *dev, void *data) } mutex_unlock(&group->unbound_lock); - if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) + if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) return 0; device = vfio_group_get_device(group, dev); -- cgit From bf3551e150e5a231ac60f2bd750f63ab5484010a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 27 Jul 2020 13:43:37 -0600 Subject: vfio/pci: Add Intel X550 to hidden INTx devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel document 333717-008, "Intel® Ethernet Controller X550 Specification Update", version 2.7, dated June 2020, includes errata #22, added in version 2.1, May 2016, indicating X550 NICs suffer from the same implementation deficiency as the 700-series NICs: "The Interrupt Status bit in the Status register of the PCIe configuration space is not implemented and is not set as described in the PCIe specification." Without the interrupt status bit, vfio-pci cannot determine when these devices signal INTx. They are therefore added to the nointx quirk. Cc: Jesse Brandeburg Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index de881a6cff35..b0258b79bb5b 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,6 +207,8 @@ static bool vfio_pci_nointx(struct pci_dev *pdev) case 0x1580 ... 0x1581: case 0x1583 ... 0x158b: case 0x37d0 ... 0x37d2: + /* X550 */ + case 0x1563: return true; default: return false; -- cgit From e1907d6752a26ccfcda1573b5a7542b056553046 Mon Sep 17 00:00:00 2001 From: Xiang Zheng Date: Mon, 27 Jul 2020 13:43:37 -0600 Subject: vfio/type1: Add conditional rescheduling after iommu map failed Commit c5e6688752c2 ("vfio/type1: Add conditional rescheduling") missed a "cond_resched()" in vfio_iommu_map if iommu map failed. This is a very tiny optimization and the case can hardly happen. Signed-off-by: Xiang Zheng Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5e556ac9102a..48fb9cc4a40a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1225,8 +1225,10 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, return 0; unwind: - list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) { iommu_unmap(d->domain, iova, npage << PAGE_SHIFT); + cond_resched(); + } return ret; } -- cgit From 9d532f286914add61e02312881733a90355f153c Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 27 Jul 2020 13:43:38 -0600 Subject: vfio/spapr_tce: convert get_user_pages() --> pin_user_pages() This code was using get_user_pages*(), in a "Case 2" scenario (DMA/RDMA), using the categorization from [1]. That means that it's time to convert the get_user_pages*() + put_page() calls to pin_user_pages*() + unpin_user_pages() calls. There is some helpful background in [2]: basically, this is a small part of fixing a long-standing disconnect between pinning pages, and file systems' use of those pages. [1] Documentation/core-api/pin_user_pages.rst [2] "Explicit pinning of user-space pages": https://lwn.net/Articles/807108/ Cc: Alex Williamson Cc: Cornelia Huck Cc: kvm@vger.kernel.org Signed-off-by: John Hubbard Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 16b3adc508db..fe888b5dcc00 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -383,7 +383,7 @@ static void tce_iommu_unuse_page(struct tce_container *container, struct page *page; page = pfn_to_page(hpa >> PAGE_SHIFT); - put_page(page); + unpin_user_page(page); } static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, @@ -486,7 +486,7 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) struct page *page = NULL; enum dma_data_direction direction = iommu_tce_direction(tce); - if (get_user_pages_fast(tce & PAGE_MASK, 1, + if (pin_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE ? FOLL_WRITE : 0, &page) != 1) return -EFAULT; -- cgit From 924b51abf961f1d888143fcfa3d63c92dc517679 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 27 Jul 2020 13:43:38 -0600 Subject: vfio/pci: Hold igate across releasing eventfd contexts No need to release and immediately re-acquire igate while clearing out the eventfd ctxs. Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b0258b79bb5b..dabca0450e6d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -523,14 +523,12 @@ static void vfio_pci_release(void *device_data) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); + mutex_lock(&vdev->igate); if (vdev->err_trigger) { eventfd_ctx_put(vdev->err_trigger); vdev->err_trigger = NULL; } - mutex_unlock(&vdev->igate); - - mutex_lock(&vdev->igate); if (vdev->req_trigger) { eventfd_ctx_put(vdev->req_trigger); vdev->req_trigger = NULL; -- cgit From 8b7beaf9f185249f29912b5e2d7bc4147c5c2a6a Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 27 Jul 2020 13:43:39 -0600 Subject: PCI: Add Intel QuickAssist device IDs Add device IDs for the following Intel QuickAssist devices: DH895XCC, C3XXX and C62X. The defines in this patch are going to be referenced in two independent drivers, qat and vfio-pci. Signed-off-by: Giovanni Cabiddu Acked-by: Bjorn Helgaas Reviewed-by: Fiona Trahe Reviewed-by: Andy Shevchenko Signed-off-by: Alex Williamson --- include/linux/pci_ids.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0ad57693f392..f3166b1425ca 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2659,6 +2659,8 @@ #define PCI_DEVICE_ID_INTEL_80332_1 0x0332 #define PCI_DEVICE_ID_INTEL_80333_0 0x0370 #define PCI_DEVICE_ID_INTEL_80333_1 0x0372 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC 0x0435 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF 0x0443 #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 @@ -2708,6 +2710,8 @@ #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 @@ -2924,6 +2928,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 #define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 +#define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8 +#define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit From 1f97970e6c8e60349b045599f4525608db895009 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 27 Jul 2020 13:43:40 -0600 Subject: vfio/pci: Add device denylist Add denylist of devices that by default are not probed by vfio-pci. Devices in this list may be susceptible to untrusted application, even if the IOMMU is enabled. To be accessed via vfio-pci, the user has to explicitly disable the denylist. The denylist can be disabled via the module parameter disable_denylist. Signed-off-by: Giovanni Cabiddu Reviewed-by: Cornelia Huck Reviewed-by: Fiona Trahe Reviewed-by: Andy Shevchenko Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index dabca0450e6d..f368a3993ac1 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -60,6 +60,10 @@ module_param(enable_sriov, bool, 0644); MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); #endif +static bool disable_denylist; +module_param(disable_denylist, bool, 0444); +MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); + static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -69,6 +73,29 @@ static inline bool vfio_vga_disabled(void) #endif } +static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) +{ + return false; +} + +static bool vfio_pci_is_denylisted(struct pci_dev *pdev) +{ + if (!vfio_pci_dev_in_denylist(pdev)) + return false; + + if (disable_denylist) { + pci_warn(pdev, + "device denylist disabled - allowing device %04x:%04x.\n", + pdev->vendor, pdev->device); + return false; + } + + pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", + pdev->vendor, pdev->device); + + return true; +} + /* * Our VGA arbiter participation is limited since we don't know anything * about the device itself. However, if the device is the only VGA device @@ -1856,6 +1883,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct iommu_group *group; int ret; + if (vfio_pci_is_denylisted(pdev)) + return -EINVAL; + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; @@ -2345,6 +2375,9 @@ static int __init vfio_pci_init(void) vfio_pci_fill_ids(); + if (disable_denylist) + pr_warn("device denylist disabled.\n"); + return 0; out_driver: -- cgit From 50173329c8cc0c892eaa7a9d0f0692ac39cd7b04 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 27 Jul 2020 13:43:40 -0600 Subject: vfio/pci: Add QAT devices to denylist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current generation of Intel® QuickAssist Technology devices are not designed to run in an untrusted environment because of the following issues reported in the document "Intel® QuickAssist Technology (Intel® QAT) Software for Linux" (document number 336211-014): QATE-39220 - GEN - Intel® QAT API submissions with bad addresses that trigger DMA to invalid or unmapped addresses can cause a platform hang QATE-7495 - GEN - An incorrectly formatted request to Intel® QAT can hang the entire Intel® QAT Endpoint The document is downloadable from https://01.org/intel-quickassist-technology at the following link: https://01.org/sites/default/files/downloads/336211-014-qatforlinux-releasenotes-hwv1.7_0.pdf This patch adds the following QAT devices to the denylist: DH895XCC, C3XXX and C62X. Signed-off-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Reviewed-by: Andy Shevchenko Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f368a3993ac1..620465c2a1da 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -75,6 +75,21 @@ static inline bool vfio_vga_disabled(void) static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) { + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_QAT_C3XXX: + case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: + case PCI_DEVICE_ID_INTEL_QAT_C62X: + case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: + return true; + default: + return false; + } + } + return false; } -- cgit From ccd59dce1a21f473518bf273bdf5b182bab955b3 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Sun, 12 Jul 2020 04:20:56 -0700 Subject: vfio/type1: Refactor vfio_iommu_type1_ioctl() This patch refactors the vfio_iommu_type1_ioctl() to use switch instead of if-else, and each command got a helper function. Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Reviewed-by: Eric Auger Suggested-by: Christoph Hellwig Signed-off-by: Liu Yi L Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 394 ++++++++++++++++++++++------------------ 1 file changed, 213 insertions(+), 181 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 48fb9cc4a40a..e1c00de50ff9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2455,6 +2455,23 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, + unsigned long arg) +{ + switch (arg) { + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_NESTING_IOMMU: + return 1; + case VFIO_DMA_CC_IOMMU: + if (!iommu) + return 0; + return vfio_domains_have_iommu_cache(iommu); + default: + return 0; + } +} + static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, size_t size) @@ -2531,241 +2548,256 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); } -static long vfio_iommu_type1_ioctl(void *iommu_data, - unsigned int cmd, unsigned long arg) +static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, + unsigned long arg) { - struct vfio_iommu *iommu = iommu_data; + struct vfio_iommu_type1_info info; unsigned long minsz; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; + int ret; - if (cmd == VFIO_CHECK_EXTENSION) { - switch (arg) { - case VFIO_TYPE1_IOMMU: - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_NESTING_IOMMU: - return 1; - case VFIO_DMA_CC_IOMMU: - if (!iommu) - return 0; - return vfio_domains_have_iommu_cache(iommu); - default: - return 0; - } - } else if (cmd == VFIO_IOMMU_GET_INFO) { - struct vfio_iommu_type1_info info; - struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - unsigned long capsz; - int ret; - - minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); - /* For backward compatibility, cannot require this */ - capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + if (info.argsz < minsz) + return -EINVAL; - if (info.argsz >= capsz) { - minsz = capsz; - info.cap_offset = 0; /* output, no-recopy necessary */ - } + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; /* output, no-recopy necessary */ + } - mutex_lock(&iommu->lock); - info.flags = VFIO_IOMMU_INFO_PGSIZES; + mutex_lock(&iommu->lock); + info.flags = VFIO_IOMMU_INFO_PGSIZES; - info.iova_pgsizes = iommu->pgsize_bitmap; + info.iova_pgsizes = iommu->pgsize_bitmap; - ret = vfio_iommu_migration_build_caps(iommu, &caps); + ret = vfio_iommu_migration_build_caps(iommu, &caps); - if (!ret) - ret = vfio_iommu_iova_build_caps(iommu, &caps); + if (!ret) + ret = vfio_iommu_iova_build_caps(iommu, &caps); - mutex_unlock(&iommu->lock); + mutex_unlock(&iommu->lock); - if (ret) - return ret; + if (ret) + return ret; - if (caps.size) { - info.flags |= VFIO_IOMMU_INFO_CAPS; + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; - if (info.argsz < sizeof(info) + caps.size) { - info.argsz = sizeof(info) + caps.size; - } else { - vfio_info_cap_shift(&caps, sizeof(info)); - if (copy_to_user((void __user *)arg + - sizeof(info), caps.buf, - caps.size)) { - kfree(caps.buf); - return -EFAULT; - } - info.cap_offset = sizeof(info); + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; } - - kfree(caps.buf); + info.cap_offset = sizeof(info); } - return copy_to_user((void __user *)arg, &info, minsz) ? - -EFAULT : 0; + kfree(caps.buf); + } - } else if (cmd == VFIO_IOMMU_MAP_DMA) { - struct vfio_iommu_type1_dma_map map; - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | - VFIO_DMA_MAP_FLAG_WRITE; + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; +} - minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); +static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_map map; + unsigned long minsz; + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; - if (copy_from_user(&map, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); - if (map.argsz < minsz || map.flags & ~mask) - return -EINVAL; + if (copy_from_user(&map, (void __user *)arg, minsz)) + return -EFAULT; - return vfio_dma_do_map(iommu, &map); + if (map.argsz < minsz || map.flags & ~mask) + return -EINVAL; - } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { - struct vfio_iommu_type1_dma_unmap unmap; - struct vfio_bitmap bitmap = { 0 }; - int ret; + return vfio_dma_do_map(iommu, &map); +} - minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); +static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_unmap unmap; + struct vfio_bitmap bitmap = { 0 }; + unsigned long minsz; + int ret; - if (copy_from_user(&unmap, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); - if (unmap.argsz < minsz || - unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) - return -EINVAL; + if (copy_from_user(&unmap, (void __user *)arg, minsz)) + return -EFAULT; - if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { - unsigned long pgshift; + if (unmap.argsz < minsz || + unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) + return -EINVAL; - if (unmap.argsz < (minsz + sizeof(bitmap))) - return -EINVAL; + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + unsigned long pgshift; - if (copy_from_user(&bitmap, - (void __user *)(arg + minsz), - sizeof(bitmap))) - return -EFAULT; + if (unmap.argsz < (minsz + sizeof(bitmap))) + return -EINVAL; - if (!access_ok((void __user *)bitmap.data, bitmap.size)) - return -EINVAL; + if (copy_from_user(&bitmap, + (void __user *)(arg + minsz), + sizeof(bitmap))) + return -EFAULT; - pgshift = __ffs(bitmap.pgsize); - ret = verify_bitmap_size(unmap.size >> pgshift, - bitmap.size); - if (ret) - return ret; - } + if (!access_ok((void __user *)bitmap.data, bitmap.size)) + return -EINVAL; - ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + pgshift = __ffs(bitmap.pgsize); + ret = verify_bitmap_size(unmap.size >> pgshift, + bitmap.size); if (ret) return ret; + } + + ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + if (ret) + return ret; - return copy_to_user((void __user *)arg, &unmap, minsz) ? + return copy_to_user((void __user *)arg, &unmap, minsz) ? -EFAULT : 0; - } else if (cmd == VFIO_IOMMU_DIRTY_PAGES) { - struct vfio_iommu_type1_dirty_bitmap dirty; - uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | - VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - int ret = 0; +} - if (!iommu->v2) - return -EACCES; +static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dirty_bitmap dirty; + uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | + VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + unsigned long minsz; + int ret = 0; - minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, - flags); + if (!iommu->v2) + return -EACCES; - if (copy_from_user(&dirty, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags); - if (dirty.argsz < minsz || dirty.flags & ~mask) - return -EINVAL; + if (copy_from_user(&dirty, (void __user *)arg, minsz)) + return -EFAULT; - /* only one flag should be set at a time */ - if (__ffs(dirty.flags) != __fls(dirty.flags)) - return -EINVAL; + if (dirty.argsz < minsz || dirty.flags & ~mask) + return -EINVAL; - if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { - size_t pgsize; + /* only one flag should be set at a time */ + if (__ffs(dirty.flags) != __fls(dirty.flags)) + return -EINVAL; - mutex_lock(&iommu->lock); - pgsize = 1 << __ffs(iommu->pgsize_bitmap); - if (!iommu->dirty_page_tracking) { - ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); - if (!ret) - iommu->dirty_page_tracking = true; - } - mutex_unlock(&iommu->lock); - return ret; - } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { - mutex_lock(&iommu->lock); - if (iommu->dirty_page_tracking) { - iommu->dirty_page_tracking = false; - vfio_dma_bitmap_free_all(iommu); - } - mutex_unlock(&iommu->lock); - return 0; - } else if (dirty.flags & - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { - struct vfio_iommu_type1_dirty_bitmap_get range; - unsigned long pgshift; - size_t data_size = dirty.argsz - minsz; - size_t iommu_pgsize; - - if (!data_size || data_size < sizeof(range)) - return -EINVAL; - - if (copy_from_user(&range, (void __user *)(arg + minsz), - sizeof(range))) - return -EFAULT; + if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + size_t pgsize; - if (range.iova + range.size < range.iova) - return -EINVAL; - if (!access_ok((void __user *)range.bitmap.data, - range.bitmap.size)) - return -EINVAL; + mutex_lock(&iommu->lock); + pgsize = 1 << __ffs(iommu->pgsize_bitmap); + if (!iommu->dirty_page_tracking) { + ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); + if (!ret) + iommu->dirty_page_tracking = true; + } + mutex_unlock(&iommu->lock); + return ret; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + mutex_lock(&iommu->lock); + if (iommu->dirty_page_tracking) { + iommu->dirty_page_tracking = false; + vfio_dma_bitmap_free_all(iommu); + } + mutex_unlock(&iommu->lock); + return 0; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + struct vfio_iommu_type1_dirty_bitmap_get range; + unsigned long pgshift; + size_t data_size = dirty.argsz - minsz; + size_t iommu_pgsize; - pgshift = __ffs(range.bitmap.pgsize); - ret = verify_bitmap_size(range.size >> pgshift, - range.bitmap.size); - if (ret) - return ret; + if (!data_size || data_size < sizeof(range)) + return -EINVAL; - mutex_lock(&iommu->lock); + if (copy_from_user(&range, (void __user *)(arg + minsz), + sizeof(range))) + return -EFAULT; - iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + if (range.iova + range.size < range.iova) + return -EINVAL; + if (!access_ok((void __user *)range.bitmap.data, + range.bitmap.size)) + return -EINVAL; - /* allow only smallest supported pgsize */ - if (range.bitmap.pgsize != iommu_pgsize) { - ret = -EINVAL; - goto out_unlock; - } - if (range.iova & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } - if (!range.size || range.size & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } + pgshift = __ffs(range.bitmap.pgsize); + ret = verify_bitmap_size(range.size >> pgshift, + range.bitmap.size); + if (ret) + return ret; - if (iommu->dirty_page_tracking) - ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, range.iova, range.size, - range.bitmap.pgsize); - else - ret = -EINVAL; -out_unlock: - mutex_unlock(&iommu->lock); + mutex_lock(&iommu->lock); - return ret; + iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + + /* allow only smallest supported pgsize */ + if (range.bitmap.pgsize != iommu_pgsize) { + ret = -EINVAL; + goto out_unlock; + } + if (range.iova & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } + if (!range.size || range.size & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; } + + if (iommu->dirty_page_tracking) + ret = vfio_iova_dirty_bitmap(range.bitmap.data, + iommu, range.iova, + range.size, + range.bitmap.pgsize); + else + ret = -EINVAL; +out_unlock: + mutex_unlock(&iommu->lock); + + return ret; } - return -ENOTTY; + return -EINVAL; +} + +static long vfio_iommu_type1_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + struct vfio_iommu *iommu = iommu_data; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + return vfio_iommu_type1_check_extension(iommu, arg); + case VFIO_IOMMU_GET_INFO: + return vfio_iommu_type1_get_info(iommu, arg); + case VFIO_IOMMU_MAP_DMA: + return vfio_iommu_type1_map_dma(iommu, arg); + case VFIO_IOMMU_UNMAP_DMA: + return vfio_iommu_type1_unmap_dma(iommu, arg); + case VFIO_IOMMU_DIRTY_PAGES: + return vfio_iommu_type1_dirty_pages(iommu, arg); + default: + return -ENOTTY; + } } static int vfio_iommu_type1_register_notifier(void *iommu_data, -- cgit From 175233f92e80d09f78be51e5b9abf5ca1fc9dc26 Mon Sep 17 00:00:00 2001 From: Li Heng Date: Thu, 23 Jul 2020 11:27:42 +0800 Subject: drm: Remove redundant NULL check Fix below warnings reported by coccicheck: ./drivers/gpu/drm/drm_drv.c:819:2-7: WARNING: NULL check before some freeing functions is not needed. Fixes: 5dad34f3c444 ("drm: Cleanups after drmm_add_final_kfree rollout") Signed-off-by: Li Heng Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1595474863-33112-1-git-send-email-liheng40@huawei.com --- drivers/gpu/drm/drm_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index bc38322f306e..13068fdf4331 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -815,8 +815,7 @@ static void drm_dev_release(struct kref *ref) drm_managed_release(dev); - if (dev->managed.final_kfree) - kfree(dev->managed.final_kfree); + kfree(dev->managed.final_kfree); } /** -- cgit From 8682dc1245099cccd7e75f72ffb911341bf45ba7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 1 Jul 2020 10:23:18 +0200 Subject: i2c: core: do not use logical device when creating irq domain Let's rather use its physical parent device to give proper namings and connections in debugfs. Signed-off-by: Wolfram Sang Reviewed-by: Benjamin Tissoires Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index dc43242a85ba..69217d2193da 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1227,7 +1227,7 @@ static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap) if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY)) return 0; - domain = irq_domain_create_linear(adap->dev.fwnode, + domain = irq_domain_create_linear(adap->dev.parent->fwnode, I2C_ADDR_7BITS_COUNT, &i2c_host_notify_irq_ops, adap); if (!domain) -- cgit From b538304da7855f4e31f91f915f259936eb67a1e6 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 26 Jul 2020 13:11:58 +0200 Subject: clk: qcom: gcc-sdm660: Add missing modem reset This will be required in order to support the modem upstream. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20200726111215.22361-2-konradybcio@gmail.com Fixes: f2a76a2955c0 ("clk: qcom: Add Global Clock controller (GCC) driver for SDM660") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sdm660.c | 1 + include/dt-bindings/clock/qcom,gcc-sdm660.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-sdm660.c b/drivers/clk/qcom/gcc-sdm660.c index bf5730832ef3..a85283786278 100644 --- a/drivers/clk/qcom/gcc-sdm660.c +++ b/drivers/clk/qcom/gcc-sdm660.c @@ -2402,6 +2402,7 @@ static const struct qcom_reset_map gcc_sdm660_resets[] = { [GCC_USB_20_BCR] = { 0x2f000 }, [GCC_USB_30_BCR] = { 0xf000 }, [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 }, + [GCC_MSS_RESTART] = { 0x79000 }, }; static const struct regmap_config gcc_sdm660_regmap_config = { diff --git a/include/dt-bindings/clock/qcom,gcc-sdm660.h b/include/dt-bindings/clock/qcom,gcc-sdm660.h index 468302282913..df8a6f3d367e 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdm660.h +++ b/include/dt-bindings/clock/qcom,gcc-sdm660.h @@ -152,5 +152,6 @@ #define GCC_USB_20_BCR 6 #define GCC_USB_30_BCR 7 #define GCC_USB_PHY_CFG_AHB2PHY_BCR 8 +#define GCC_MSS_RESTART 9 #endif -- cgit From 3386af51d3bcebcba3f7becdb1ef2e384abe90cf Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 26 Jul 2020 13:12:05 +0200 Subject: clk: qcom: gcc-sdm660: Fix up gcc_mss_mnoc_bimc_axi_clk Add missing halt_check, hwcg_reg and hwcg_bit properties. These were likely omitted when porting the driver upstream. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20200726111215.22361-9-konradybcio@gmail.com Fixes: f2a76a2955c0 ("clk: qcom: Add Global Clock controller (GCC) driver for SDM660") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sdm660.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/qcom/gcc-sdm660.c b/drivers/clk/qcom/gcc-sdm660.c index a85283786278..f0b47b7d50ca 100644 --- a/drivers/clk/qcom/gcc-sdm660.c +++ b/drivers/clk/qcom/gcc-sdm660.c @@ -1715,6 +1715,9 @@ static struct clk_branch gcc_mss_cfg_ahb_clk = { static struct clk_branch gcc_mss_mnoc_bimc_axi_clk = { .halt_reg = 0x8a004, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x8a004, + .hwcg_bit = 1, .clkr = { .enable_reg = 0x8a004, .enable_mask = BIT(0), -- cgit From d388e18f63b90a1256af490a856d67abd6558a38 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Jul 2020 15:09:22 -0500 Subject: clk: imx: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200727200922.GA2326@embeddedor Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-pllv3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-pllv3.c b/drivers/clk/imx/clk-pllv3.c index a7db93030e02..b20cdea3e9cc 100644 --- a/drivers/clk/imx/clk-pllv3.c +++ b/drivers/clk/imx/clk-pllv3.c @@ -433,7 +433,7 @@ struct clk_hw *imx_clk_hw_pllv3(enum imx_pllv3_type type, const char *name, break; case IMX_PLLV3_USB_VF610: pll->div_shift = 1; - /* fall through */ + fallthrough; case IMX_PLLV3_USB: ops = &clk_pllv3_ops; pll->powerup_set = true; @@ -441,7 +441,7 @@ struct clk_hw *imx_clk_hw_pllv3(enum imx_pllv3_type type, const char *name, case IMX_PLLV3_AV_IMX7: pll->num_offset = PLL_IMX7_NUM_OFFSET; pll->denom_offset = PLL_IMX7_DENOM_OFFSET; - /* fall through */ + fallthrough; case IMX_PLLV3_AV: ops = &clk_pllv3_av_ops; break; -- cgit From 7a346a37e46abd9bfe0e24aa2aabb68d832280b8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Jul 2020 15:11:22 -0500 Subject: clk: davinci: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200727201122.GA2593@embeddedor Reviewed-by: David Lechner Signed-off-by: Stephen Boyd --- drivers/clk/davinci/pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c index 8a23d5dfd1f8..6c35e4bb7940 100644 --- a/drivers/clk/davinci/pll.c +++ b/drivers/clk/davinci/pll.c @@ -651,7 +651,7 @@ static int davinci_pll_sysclk_rate_change(struct notifier_block *nb, pllcmd = readl(pll->base + PLLCMD); pllcmd |= PLLCMD_GOSET; writel(pllcmd, pll->base + PLLCMD); - /* fallthrough */ + fallthrough; case PRE_RATE_CHANGE: /* Wait until for outstanding changes to take effect */ do { -- cgit From 316810e883fbcfd88167fd53858294bf0c2bcd40 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Sat, 25 Jul 2020 13:11:34 +0800 Subject: dt-bindings: clock: Add RTC related clocks for Ingenic SoCs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RTC related clocks bindings for the JZ4780 SoC, the X1000 SoC, and the X1830 SoC from Ingenic. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200725051136.58220-2-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/jz4780-cgu.h | 2 ++ include/dt-bindings/clock/x1000-cgu.h | 2 ++ include/dt-bindings/clock/x1830-cgu.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/jz4780-cgu.h index 1859ce53ee38..cb07a0978301 100644 --- a/include/dt-bindings/clock/jz4780-cgu.h +++ b/include/dt-bindings/clock/jz4780-cgu.h @@ -85,5 +85,7 @@ #define JZ4780_CLK_DES 70 #define JZ4780_CLK_X2D 71 #define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_EXCLK_DIV512 73 +#define JZ4780_CLK_RTC 74 #endif /* __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ */ diff --git a/include/dt-bindings/clock/x1000-cgu.h b/include/dt-bindings/clock/x1000-cgu.h index 0367c8c02e16..f187e0719fd3 100644 --- a/include/dt-bindings/clock/x1000-cgu.h +++ b/include/dt-bindings/clock/x1000-cgu.h @@ -48,5 +48,7 @@ #define X1000_CLK_SSI 33 #define X1000_CLK_OST 34 #define X1000_CLK_PDMA 35 +#define X1000_CLK_EXCLK_DIV512 36 +#define X1000_CLK_RTC 37 #endif /* __DT_BINDINGS_CLOCK_X1000_CGU_H__ */ diff --git a/include/dt-bindings/clock/x1830-cgu.h b/include/dt-bindings/clock/x1830-cgu.h index 801e1d09c881..88455376a950 100644 --- a/include/dt-bindings/clock/x1830-cgu.h +++ b/include/dt-bindings/clock/x1830-cgu.h @@ -51,5 +51,7 @@ #define X1830_CLK_TCU 36 #define X1830_CLK_DTRNG 37 #define X1830_CLK_OST 38 +#define X1830_CLK_EXCLK_DIV512 39 +#define X1830_CLK_RTC 40 #endif /* __DT_BINDINGS_CLOCK_X1830_CGU_H__ */ -- cgit From acb3b78de3f7698724e59719fbe50b8c92928e83 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Sat, 25 Jul 2020 13:11:35 +0800 Subject: dt-bindings: clock: Add tabs to align code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "JZ4780_CLK_LCD0PIXCLK" and the "JZ4780_CLK_LCD1PIXCLK" in the "jz4780.h" and the new added "JZ4780_CLK_EXCLK_DIV512" in the previous patch is too long, add tabs to other lines to align them. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200725051136.58220-3-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/jz4780-cgu.h | 144 ++++++++++++++++----------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/jz4780-cgu.h index cb07a0978301..85cf8eb5081b 100644 --- a/include/dt-bindings/clock/jz4780-cgu.h +++ b/include/dt-bindings/clock/jz4780-cgu.h @@ -12,80 +12,80 @@ #ifndef __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ #define __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ -#define JZ4780_CLK_EXCLK 0 -#define JZ4780_CLK_RTCLK 1 -#define JZ4780_CLK_APLL 2 -#define JZ4780_CLK_MPLL 3 -#define JZ4780_CLK_EPLL 4 -#define JZ4780_CLK_VPLL 5 -#define JZ4780_CLK_OTGPHY 6 -#define JZ4780_CLK_SCLKA 7 -#define JZ4780_CLK_CPUMUX 8 -#define JZ4780_CLK_CPU 9 -#define JZ4780_CLK_L2CACHE 10 -#define JZ4780_CLK_AHB0 11 -#define JZ4780_CLK_AHB2PMUX 12 -#define JZ4780_CLK_AHB2 13 -#define JZ4780_CLK_PCLK 14 -#define JZ4780_CLK_DDR 15 -#define JZ4780_CLK_VPU 16 -#define JZ4780_CLK_I2SPLL 17 -#define JZ4780_CLK_I2S 18 +#define JZ4780_CLK_EXCLK 0 +#define JZ4780_CLK_RTCLK 1 +#define JZ4780_CLK_APLL 2 +#define JZ4780_CLK_MPLL 3 +#define JZ4780_CLK_EPLL 4 +#define JZ4780_CLK_VPLL 5 +#define JZ4780_CLK_OTGPHY 6 +#define JZ4780_CLK_SCLKA 7 +#define JZ4780_CLK_CPUMUX 8 +#define JZ4780_CLK_CPU 9 +#define JZ4780_CLK_L2CACHE 10 +#define JZ4780_CLK_AHB0 11 +#define JZ4780_CLK_AHB2PMUX 12 +#define JZ4780_CLK_AHB2 13 +#define JZ4780_CLK_PCLK 14 +#define JZ4780_CLK_DDR 15 +#define JZ4780_CLK_VPU 16 +#define JZ4780_CLK_I2SPLL 17 +#define JZ4780_CLK_I2S 18 #define JZ4780_CLK_LCD0PIXCLK 19 #define JZ4780_CLK_LCD1PIXCLK 20 -#define JZ4780_CLK_MSCMUX 21 -#define JZ4780_CLK_MSC0 22 -#define JZ4780_CLK_MSC1 23 -#define JZ4780_CLK_MSC2 24 -#define JZ4780_CLK_UHC 25 -#define JZ4780_CLK_SSIPLL 26 -#define JZ4780_CLK_SSI 27 -#define JZ4780_CLK_CIMMCLK 28 -#define JZ4780_CLK_PCMPLL 29 -#define JZ4780_CLK_PCM 30 -#define JZ4780_CLK_GPU 31 -#define JZ4780_CLK_HDMI 32 -#define JZ4780_CLK_BCH 33 -#define JZ4780_CLK_NEMC 34 -#define JZ4780_CLK_OTG0 35 -#define JZ4780_CLK_SSI0 36 -#define JZ4780_CLK_SMB0 37 -#define JZ4780_CLK_SMB1 38 -#define JZ4780_CLK_SCC 39 -#define JZ4780_CLK_AIC 40 -#define JZ4780_CLK_TSSI0 41 -#define JZ4780_CLK_OWI 42 -#define JZ4780_CLK_KBC 43 -#define JZ4780_CLK_SADC 44 -#define JZ4780_CLK_UART0 45 -#define JZ4780_CLK_UART1 46 -#define JZ4780_CLK_UART2 47 -#define JZ4780_CLK_UART3 48 -#define JZ4780_CLK_SSI1 49 -#define JZ4780_CLK_SSI2 50 -#define JZ4780_CLK_PDMA 51 -#define JZ4780_CLK_GPS 52 -#define JZ4780_CLK_MAC 53 -#define JZ4780_CLK_SMB2 54 -#define JZ4780_CLK_CIM 55 -#define JZ4780_CLK_LCD 56 -#define JZ4780_CLK_TVE 57 -#define JZ4780_CLK_IPU 58 -#define JZ4780_CLK_DDR0 59 -#define JZ4780_CLK_DDR1 60 -#define JZ4780_CLK_SMB3 61 -#define JZ4780_CLK_TSSI1 62 -#define JZ4780_CLK_COMPRESS 63 -#define JZ4780_CLK_AIC1 64 -#define JZ4780_CLK_GPVLC 65 -#define JZ4780_CLK_OTG1 66 -#define JZ4780_CLK_UART4 67 -#define JZ4780_CLK_AHBMON 68 -#define JZ4780_CLK_SMB4 69 -#define JZ4780_CLK_DES 70 -#define JZ4780_CLK_X2D 71 -#define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_MSCMUX 21 +#define JZ4780_CLK_MSC0 22 +#define JZ4780_CLK_MSC1 23 +#define JZ4780_CLK_MSC2 24 +#define JZ4780_CLK_UHC 25 +#define JZ4780_CLK_SSIPLL 26 +#define JZ4780_CLK_SSI 27 +#define JZ4780_CLK_CIMMCLK 28 +#define JZ4780_CLK_PCMPLL 29 +#define JZ4780_CLK_PCM 30 +#define JZ4780_CLK_GPU 31 +#define JZ4780_CLK_HDMI 32 +#define JZ4780_CLK_BCH 33 +#define JZ4780_CLK_NEMC 34 +#define JZ4780_CLK_OTG0 35 +#define JZ4780_CLK_SSI0 36 +#define JZ4780_CLK_SMB0 37 +#define JZ4780_CLK_SMB1 38 +#define JZ4780_CLK_SCC 39 +#define JZ4780_CLK_AIC 40 +#define JZ4780_CLK_TSSI0 41 +#define JZ4780_CLK_OWI 42 +#define JZ4780_CLK_KBC 43 +#define JZ4780_CLK_SADC 44 +#define JZ4780_CLK_UART0 45 +#define JZ4780_CLK_UART1 46 +#define JZ4780_CLK_UART2 47 +#define JZ4780_CLK_UART3 48 +#define JZ4780_CLK_SSI1 49 +#define JZ4780_CLK_SSI2 50 +#define JZ4780_CLK_PDMA 51 +#define JZ4780_CLK_GPS 52 +#define JZ4780_CLK_MAC 53 +#define JZ4780_CLK_SMB2 54 +#define JZ4780_CLK_CIM 55 +#define JZ4780_CLK_LCD 56 +#define JZ4780_CLK_TVE 57 +#define JZ4780_CLK_IPU 58 +#define JZ4780_CLK_DDR0 59 +#define JZ4780_CLK_DDR1 60 +#define JZ4780_CLK_SMB3 61 +#define JZ4780_CLK_TSSI1 62 +#define JZ4780_CLK_COMPRESS 63 +#define JZ4780_CLK_AIC1 64 +#define JZ4780_CLK_GPVLC 65 +#define JZ4780_CLK_OTG1 66 +#define JZ4780_CLK_UART4 67 +#define JZ4780_CLK_AHBMON 68 +#define JZ4780_CLK_SMB4 69 +#define JZ4780_CLK_DES 70 +#define JZ4780_CLK_X2D 71 +#define JZ4780_CLK_CORE1 72 #define JZ4780_CLK_EXCLK_DIV512 73 -#define JZ4780_CLK_RTC 74 +#define JZ4780_CLK_RTC 74 #endif /* __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ */ -- cgit From 82df5b7329aaeb21b3e8fc86fa2d62a3d68602aa Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Sat, 25 Jul 2020 13:11:36 +0800 Subject: clk: Ingenic: Add RTC related clocks for Ingenic SoCs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTC unit in the Ingenic SoCs has two clock sources, one is from an external 32.768kHz clock, and the other is from an external 24MHz/48MHz main clock that is divided by 512. The choice of these two clocks is controlled by the ERCS bit in the OPCR register. The RNG unit will also use this clock. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200725051136.58220-4-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/jz4780-cgu.c | 12 ++++++++++++ drivers/clk/ingenic/x1000-cgu.c | 13 +++++++++++++ drivers/clk/ingenic/x1830-cgu.c | 13 +++++++++++++ 3 files changed, 38 insertions(+) diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c index 6c5b8029cc8a..288e9694285b 100644 --- a/drivers/clk/ingenic/jz4780-cgu.c +++ b/drivers/clk/ingenic/jz4780-cgu.c @@ -516,6 +516,18 @@ static const struct ingenic_cgu_clk_info jz4780_cgu_clocks[] = { .gate = { CGU_REG_CLKGR0, 1 }, }, + [JZ4780_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { JZ4780_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [JZ4780_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK_DIV512, JZ4780_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + }, + /* Gate-only clocks */ [JZ4780_CLK_NEMC] = { diff --git a/drivers/clk/ingenic/x1000-cgu.c b/drivers/clk/ingenic/x1000-cgu.c index 453f3323cb99..3cc37466ce6b 100644 --- a/drivers/clk/ingenic/x1000-cgu.c +++ b/drivers/clk/ingenic/x1000-cgu.c @@ -278,6 +278,19 @@ static const struct ingenic_cgu_clk_info x1000_cgu_clocks[] = { .mux = { CGU_REG_SSICDR, 30, 1 }, }, + [X1000_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { X1000_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [X1000_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { X1000_CLK_EXCLK_DIV512, X1000_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + .gate = { CGU_REG_CLKGR, 27 }, + }, + /* Gate-only clocks */ [X1000_CLK_EMC] = { diff --git a/drivers/clk/ingenic/x1830-cgu.c b/drivers/clk/ingenic/x1830-cgu.c index a1b2ff0ee487..950aee243364 100644 --- a/drivers/clk/ingenic/x1830-cgu.c +++ b/drivers/clk/ingenic/x1830-cgu.c @@ -329,6 +329,19 @@ static const struct ingenic_cgu_clk_info x1830_cgu_clocks[] = { .mux = { CGU_REG_SSICDR, 29, 1 }, }, + [X1830_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { X1830_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [X1830_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { X1830_CLK_EXCLK_DIV512, X1830_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + .gate = { CGU_REG_CLKGR0, 29 }, + }, + /* Gate-only clocks */ [X1830_CLK_EMC] = { -- cgit From dcd062a88e46d4d2aeaf701e82e128117fc0f3a1 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Wed, 1 Jul 2020 00:38:50 +0800 Subject: clk: JZ4780: Add functions for enable and disable USB PHY. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new functions to "jz4780_otg_phy_ops" to enable or disable the USB PHY in the JZ4780 SoC. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200630163852.47267-2-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/jz4780-cgu.c | 65 +++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c index 288e9694285b..fa54d6e3a3a8 100644 --- a/drivers/clk/ingenic/jz4780-cgu.c +++ b/drivers/clk/ingenic/jz4780-cgu.c @@ -4,6 +4,7 @@ * * Copyright (c) 2013-2015 Imagination Technologies * Author: Paul Burton + * Copyright (c) 2020 周琰杰 (Zhou Yanjie) */ #include @@ -59,6 +60,7 @@ #define USBPCR_VBUSVLDEXT BIT(24) #define USBPCR_VBUSVLDEXTSEL BIT(23) #define USBPCR_POR BIT(22) +#define USBPCR_SIDDQ BIT(21) #define USBPCR_OTG_DISABLE BIT(20) #define USBPCR_COMPDISTUNE_MASK (0x7 << 17) #define USBPCR_OTGTUNE_MASK (0x7 << 14) @@ -100,32 +102,6 @@ static struct ingenic_cgu *cgu; -static u8 jz4780_otg_phy_get_parent(struct clk_hw *hw) -{ - /* we only use CLKCORE, revisit if that ever changes */ - return 0; -} - -static int jz4780_otg_phy_set_parent(struct clk_hw *hw, u8 idx) -{ - unsigned long flags; - u32 usbpcr1; - - if (idx > 0) - return -EINVAL; - - spin_lock_irqsave(&cgu->lock, flags); - - usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); - usbpcr1 &= ~USBPCR1_REFCLKSEL_MASK; - /* we only use CLKCORE */ - usbpcr1 |= USBPCR1_REFCLKSEL_CORE; - writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); - - spin_unlock_irqrestore(&cgu->lock, flags); - return 0; -} - static unsigned long jz4780_otg_phy_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { @@ -149,7 +125,6 @@ static unsigned long jz4780_otg_phy_recalc_rate(struct clk_hw *hw, return 19200000; } - BUG(); return parent_rate; } @@ -206,13 +181,43 @@ static int jz4780_otg_phy_set_rate(struct clk_hw *hw, unsigned long req_rate, return 0; } -static const struct clk_ops jz4780_otg_phy_ops = { - .get_parent = jz4780_otg_phy_get_parent, - .set_parent = jz4780_otg_phy_set_parent, +static int jz4780_otg_phy_enable(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + + writel(readl(reg_opcr) | OPCR_SPENDN0, reg_opcr); + writel(readl(reg_usbpcr) & ~USBPCR_OTG_DISABLE & ~USBPCR_SIDDQ, reg_usbpcr); + return 0; +} + +static void jz4780_otg_phy_disable(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + writel(readl(reg_opcr) & ~OPCR_SPENDN0, reg_opcr); + writel(readl(reg_usbpcr) | USBPCR_OTG_DISABLE | USBPCR_SIDDQ, reg_usbpcr); +} + +static int jz4780_otg_phy_is_enabled(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + + return (readl(reg_opcr) & OPCR_SPENDN0) && + !(readl(reg_usbpcr) & USBPCR_SIDDQ) && + !(readl(reg_usbpcr) & USBPCR_OTG_DISABLE); +} + +static const struct clk_ops jz4780_otg_phy_ops = { .recalc_rate = jz4780_otg_phy_recalc_rate, .round_rate = jz4780_otg_phy_round_rate, .set_rate = jz4780_otg_phy_set_rate, + + .enable = jz4780_otg_phy_enable, + .disable = jz4780_otg_phy_disable, + .is_enabled = jz4780_otg_phy_is_enabled, }; static int jz4780_core1_enable(struct clk_hw *hw) -- cgit From beb61eb06880c06005b9855873d77547d43bab4b Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Wed, 1 Jul 2020 00:38:51 +0800 Subject: clk: JZ4780: Reformat the code to align it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reformat the code (add one level of indentation before the values), to align the code in the macro definition section. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200630163852.47267-3-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/jz4780-cgu.c | 90 ++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c index fa54d6e3a3a8..0268d23ebe2e 100644 --- a/drivers/clk/ingenic/jz4780-cgu.c +++ b/drivers/clk/ingenic/jz4780-cgu.c @@ -20,50 +20,50 @@ /* CGU register offsets */ #define CGU_REG_CLOCKCONTROL 0x00 -#define CGU_REG_LCR 0x04 -#define CGU_REG_APLL 0x10 -#define CGU_REG_MPLL 0x14 -#define CGU_REG_EPLL 0x18 -#define CGU_REG_VPLL 0x1c -#define CGU_REG_CLKGR0 0x20 -#define CGU_REG_OPCR 0x24 -#define CGU_REG_CLKGR1 0x28 -#define CGU_REG_DDRCDR 0x2c -#define CGU_REG_VPUCDR 0x30 -#define CGU_REG_USBPCR 0x3c -#define CGU_REG_USBRDT 0x40 -#define CGU_REG_USBVBFIL 0x44 -#define CGU_REG_USBPCR1 0x48 -#define CGU_REG_LP0CDR 0x54 -#define CGU_REG_I2SCDR 0x60 -#define CGU_REG_LP1CDR 0x64 -#define CGU_REG_MSC0CDR 0x68 -#define CGU_REG_UHCCDR 0x6c -#define CGU_REG_SSICDR 0x74 -#define CGU_REG_CIMCDR 0x7c -#define CGU_REG_PCMCDR 0x84 -#define CGU_REG_GPUCDR 0x88 -#define CGU_REG_HDMICDR 0x8c -#define CGU_REG_MSC1CDR 0xa4 -#define CGU_REG_MSC2CDR 0xa8 -#define CGU_REG_BCHCDR 0xac -#define CGU_REG_CLOCKSTATUS 0xd4 +#define CGU_REG_LCR 0x04 +#define CGU_REG_APLL 0x10 +#define CGU_REG_MPLL 0x14 +#define CGU_REG_EPLL 0x18 +#define CGU_REG_VPLL 0x1c +#define CGU_REG_CLKGR0 0x20 +#define CGU_REG_OPCR 0x24 +#define CGU_REG_CLKGR1 0x28 +#define CGU_REG_DDRCDR 0x2c +#define CGU_REG_VPUCDR 0x30 +#define CGU_REG_USBPCR 0x3c +#define CGU_REG_USBRDT 0x40 +#define CGU_REG_USBVBFIL 0x44 +#define CGU_REG_USBPCR1 0x48 +#define CGU_REG_LP0CDR 0x54 +#define CGU_REG_I2SCDR 0x60 +#define CGU_REG_LP1CDR 0x64 +#define CGU_REG_MSC0CDR 0x68 +#define CGU_REG_UHCCDR 0x6c +#define CGU_REG_SSICDR 0x74 +#define CGU_REG_CIMCDR 0x7c +#define CGU_REG_PCMCDR 0x84 +#define CGU_REG_GPUCDR 0x88 +#define CGU_REG_HDMICDR 0x8c +#define CGU_REG_MSC1CDR 0xa4 +#define CGU_REG_MSC2CDR 0xa8 +#define CGU_REG_BCHCDR 0xac +#define CGU_REG_CLOCKSTATUS 0xd4 /* bits within the OPCR register */ -#define OPCR_SPENDN0 BIT(7) -#define OPCR_SPENDN1 BIT(6) +#define OPCR_SPENDN0 BIT(7) +#define OPCR_SPENDN1 BIT(6) /* bits within the USBPCR register */ -#define USBPCR_USB_MODE BIT(31) +#define USBPCR_USB_MODE BIT(31) #define USBPCR_IDPULLUP_MASK (0x3 << 28) -#define USBPCR_COMMONONN BIT(25) -#define USBPCR_VBUSVLDEXT BIT(24) +#define USBPCR_COMMONONN BIT(25) +#define USBPCR_VBUSVLDEXT BIT(24) #define USBPCR_VBUSVLDEXTSEL BIT(23) -#define USBPCR_POR BIT(22) -#define USBPCR_SIDDQ BIT(21) -#define USBPCR_OTG_DISABLE BIT(20) +#define USBPCR_POR BIT(22) +#define USBPCR_SIDDQ BIT(21) +#define USBPCR_OTG_DISABLE BIT(20) #define USBPCR_COMPDISTUNE_MASK (0x7 << 17) -#define USBPCR_OTGTUNE_MASK (0x7 << 14) +#define USBPCR_OTGTUNE_MASK (0x7 << 14) #define USBPCR_SQRXTUNE_MASK (0x7 << 11) #define USBPCR_TXFSLSTUNE_MASK (0xf << 7) #define USBPCR_TXPREEMPHTUNE BIT(6) @@ -80,13 +80,13 @@ #define USBPCR1_REFCLKDIV_48 (0x2 << USBPCR1_REFCLKDIV_SHIFT) #define USBPCR1_REFCLKDIV_24 (0x1 << USBPCR1_REFCLKDIV_SHIFT) #define USBPCR1_REFCLKDIV_12 (0x0 << USBPCR1_REFCLKDIV_SHIFT) -#define USBPCR1_USB_SEL BIT(28) -#define USBPCR1_WORD_IF0 BIT(19) -#define USBPCR1_WORD_IF1 BIT(18) +#define USBPCR1_USB_SEL BIT(28) +#define USBPCR1_WORD_IF0 BIT(19) +#define USBPCR1_WORD_IF1 BIT(18) /* bits within the USBRDT register */ -#define USBRDT_VBFIL_LD_EN BIT(25) -#define USBRDT_USBRDT_MASK 0x7fffff +#define USBRDT_VBFIL_LD_EN BIT(25) +#define USBRDT_USBRDT_MASK 0x7fffff /* bits within the USBVBFIL register */ #define USBVBFIL_IDDIGFIL_SHIFT 16 @@ -94,11 +94,11 @@ #define USBVBFIL_USBVBFIL_MASK (0xffff) /* bits within the LCR register */ -#define LCR_PD_SCPU BIT(31) -#define LCR_SCPUS BIT(27) +#define LCR_PD_SCPU BIT(31) +#define LCR_SCPUS BIT(27) /* bits within the CLKGR1 register */ -#define CLKGR1_CORE1 BIT(15) +#define CLKGR1_CORE1 BIT(15) static struct ingenic_cgu *cgu; -- cgit From 810797c14508dae3c80eacaa483a900b03fdb780 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Wed, 1 Jul 2020 00:38:52 +0800 Subject: clk: X1000: Add support for calculat REFCLK of USB PHY. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add functions for calculat the rate of REFCLK, which is needed by USB PHY in Ingenic X1000 SoC. Tested-by: 周正 (Zhou Zheng) Signed-off-by: 周琰杰 (Zhou Yanjie) Link: https://lore.kernel.org/r/20200630163852.47267-4-zhouyanjie@wanyeetech.com Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/x1000-cgu.c | 84 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/clk/ingenic/x1000-cgu.c b/drivers/clk/ingenic/x1000-cgu.c index 3cc37466ce6b..9aa20b52e1c3 100644 --- a/drivers/clk/ingenic/x1000-cgu.c +++ b/drivers/clk/ingenic/x1000-cgu.c @@ -48,8 +48,87 @@ #define USBPCR_SIDDQ BIT(21) #define USBPCR_OTG_DISABLE BIT(20) +/* bits within the USBPCR1 register */ +#define USBPCR1_REFCLKSEL_SHIFT 26 +#define USBPCR1_REFCLKSEL_MASK (0x3 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKSEL_CORE (0x2 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKDIV_SHIFT 24 +#define USBPCR1_REFCLKDIV_MASK (0x3 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_48 (0x2 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_24 (0x1 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_12 (0x0 << USBPCR1_REFCLKDIV_SHIFT) + static struct ingenic_cgu *cgu; +static unsigned long x1000_otg_phy_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + u32 usbpcr1; + unsigned refclk_div; + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + refclk_div = usbpcr1 & USBPCR1_REFCLKDIV_MASK; + + switch (refclk_div) { + case USBPCR1_REFCLKDIV_12: + return 12000000; + + case USBPCR1_REFCLKDIV_24: + return 24000000; + + case USBPCR1_REFCLKDIV_48: + return 48000000; + } + + return parent_rate; +} + +static long x1000_otg_phy_round_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long *parent_rate) +{ + if (req_rate < 18000000) + return 12000000; + + if (req_rate < 36000000) + return 24000000; + + return 48000000; +} + +static int x1000_otg_phy_set_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long parent_rate) +{ + unsigned long flags; + u32 usbpcr1, div_bits; + + switch (req_rate) { + case 12000000: + div_bits = USBPCR1_REFCLKDIV_12; + break; + + case 24000000: + div_bits = USBPCR1_REFCLKDIV_24; + break; + + case 48000000: + div_bits = USBPCR1_REFCLKDIV_48; + break; + + default: + return -EINVAL; + } + + spin_lock_irqsave(&cgu->lock, flags); + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + usbpcr1 &= ~USBPCR1_REFCLKDIV_MASK; + usbpcr1 |= div_bits; + writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); + + spin_unlock_irqrestore(&cgu->lock, flags); + return 0; +} + static int x1000_usb_phy_enable(struct clk_hw *hw) { void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; @@ -80,6 +159,10 @@ static int x1000_usb_phy_is_enabled(struct clk_hw *hw) } static const struct clk_ops x1000_otg_phy_ops = { + .recalc_rate = x1000_otg_phy_recalc_rate, + .round_rate = x1000_otg_phy_round_rate, + .set_rate = x1000_otg_phy_set_rate, + .enable = x1000_usb_phy_enable, .disable = x1000_usb_phy_disable, .is_enabled = x1000_usb_phy_is_enabled, @@ -144,7 +227,6 @@ static const struct ingenic_cgu_clk_info x1000_cgu_clocks[] = { }, }, - /* Custom (SoC-specific) OTG PHY */ [X1000_CLK_OTGPHY] = { -- cgit From fa64023763cf1a3da8bf3341df6c2a47e54fcead Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 9 Jul 2020 20:20:57 +0300 Subject: clk: tegra: pll: Improve PLLM enable-state detection Power Management Controller (PMC) can override the PLLM clock settings, including the enable-state. Although PMC could only act as a second level gate, meaning that PLLM needs to be enabled by the Clock and Reset Controller (CaR) anyways if we want it to be enabled. Hence, when PLLM is overridden by PMC, it needs to be enabled by CaR and ungated by PMC in order to be functional. Please note that this patch doesn't fix any known problem, and thus, it's merely a minor improvement. Link: https://lore.kernel.org/linux-arm-kernel/20191210120909.GA2703785@ulmo/T/ Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200709172057.13951-1-digetx@gmail.com Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-pll.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 0b212cf2e794..f180c055d33f 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -327,16 +327,26 @@ int tegra_pll_wait_for_lock(struct tegra_clk_pll *pll) return clk_pll_wait_for_lock(pll); } +static bool pllm_clk_is_gated_by_pmc(struct tegra_clk_pll *pll) +{ + u32 val = readl_relaxed(pll->pmc + PMC_PLLP_WB0_OVERRIDE); + + return (val & PMC_PLLP_WB0_OVERRIDE_PLLM_OVERRIDE) && + !(val & PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE); +} + static int clk_pll_is_enabled(struct clk_hw *hw) { struct tegra_clk_pll *pll = to_clk_pll(hw); u32 val; - if (pll->params->flags & TEGRA_PLLM) { - val = readl_relaxed(pll->pmc + PMC_PLLP_WB0_OVERRIDE); - if (val & PMC_PLLP_WB0_OVERRIDE_PLLM_OVERRIDE) - return val & PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE ? 1 : 0; - } + /* + * Power Management Controller (PMC) can override the PLLM clock + * settings, including the enable-state. The PLLM is enabled when + * PLLM's CaR state is ON and when PLLM isn't gated by PMC. + */ + if ((pll->params->flags & TEGRA_PLLM) && pllm_clk_is_gated_by_pmc(pll)) + return 0; val = pll_readl_base(pll); -- cgit From 12b90b40854a8461a02ef19f6f4474cc88d64b66 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 13 Jul 2020 03:21:43 +0000 Subject: clk: clk-atlas6: fix return value check in atlas6_clk_init() In case of error, the function clk_register() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20200713032143.21362-1-vulab@iscas.ac.cn Acked-by: Barry Song Fixes: 7bf21bc81f28 ("clk: sirf: re-arch to make the codes support both prima2 and atlas6") Signed-off-by: Stephen Boyd --- drivers/clk/sirf/clk-atlas6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sirf/clk-atlas6.c b/drivers/clk/sirf/clk-atlas6.c index c84d5bab7ac2..b95483bb6a5e 100644 --- a/drivers/clk/sirf/clk-atlas6.c +++ b/drivers/clk/sirf/clk-atlas6.c @@ -135,7 +135,7 @@ static void __init atlas6_clk_init(struct device_node *np) for (i = pll1; i < maxclk; i++) { atlas6_clks[i] = clk_register(NULL, atlas6_clk_hw_array[i]); - BUG_ON(!atlas6_clks[i]); + BUG_ON(IS_ERR(atlas6_clks[i])); } clk_register_clkdev(atlas6_clks[cpu], NULL, "cpu"); clk_register_clkdev(atlas6_clks[io], NULL, "io"); -- cgit From 92df3a9bf3fd975949122b1779c142c2d007a7dc Mon Sep 17 00:00:00 2001 From: Michael Krummsdorf Date: Wed, 10 Jun 2020 13:38:37 +0200 Subject: clk: qoriq: add LS1021A core pll mux options This allows to clock the cores with 1 GHz, 500 MHz and 250 MHz. Signed-off-by: Michael Krummsdorf Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20200610113837.27117-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-qoriq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 374afcab89af..5942e9874bc0 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -244,6 +244,14 @@ static const struct clockgen_muxinfo clockgen2_cmux_cgb = { }, }; +static const struct clockgen_muxinfo ls1021a_cmux = { + { + { CLKSEL_VALID, CGA_PLL1, PLL_DIV1 }, + { CLKSEL_VALID, CGA_PLL1, PLL_DIV2 }, + { CLKSEL_VALID, CGA_PLL1, PLL_DIV4 }, + } +}; + static const struct clockgen_muxinfo ls1028a_hwa1 = { { { CLKSEL_VALID, PLATFORM_PLL, PLL_DIV1 }, @@ -577,7 +585,7 @@ static const struct clockgen_chipinfo chipinfo[] = { { .compat = "fsl,ls1021a-clockgen", .cmux_groups = { - &t1023_cmux + &ls1021a_cmux }, .cmux_to_group = { 0, -1 -- cgit From 48908a3833cc76c56e626e8e80ba8f808c8d03a5 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 27 Jul 2020 11:07:14 -0700 Subject: KVM: PPC: Book3S HV: Fix function definition in book3s_hv_uvmem.c Without this fix, git is confused. It generates wrong function context for code changes in subsequent patches. Weird, but true. Signed-off-by: Ram Pai Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 09d8119024db..e6f76bc16d04 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -382,8 +382,7 @@ out: * Alloc a PFN from private device memory pool and copy page from normal * memory to secure memory using UV_PAGE_IN uvcall. */ -static int -kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, +static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long gpa, struct kvm *kvm, unsigned long page_shift, bool *downgrade) { @@ -450,8 +449,8 @@ out_finalize: * In the former case, uses dev_pagemap_ops.migrate_to_ram handler * to unmap the device page from QEMU's page tables. */ -static unsigned long -kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) +static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, + unsigned long page_shift) { int ret = H_PARAMETER; @@ -500,9 +499,9 @@ out: * H_PAGE_IN_SHARED flag makes the page shared which means that the same * memory in is visible from both UV and HV. */ -unsigned long -kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, - unsigned long flags, unsigned long page_shift) +unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, + unsigned long flags, + unsigned long page_shift) { bool downgrade = false; unsigned long start, end; @@ -559,10 +558,10 @@ out: * Provision a new page on HV side and copy over the contents * from secure memory using UV_PAGE_OUT uvcall. */ -static int -kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) +static int kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; -- cgit From 2027a24a75ce32cf48a22cc25a9d87b8c5b19328 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 27 Jul 2020 11:07:15 -0700 Subject: KVM: PPC: Book3S HV: Disable page merging in H_SVM_INIT_START Page-merging of pages in memory-slots associated with a Secure VM is disabled in H_SVM_PAGE_IN handler. This operation should have been done the much earlier; the moment the VM is initiated for secure-transition. Delaying this operation increases the probability for those pages to acquire new references, making it impossible to migrate those pages in H_SVM_PAGE_IN handler. Disable page-migration in H_SVM_INIT_START handling. Reviewed-by: Bharata B Rao Signed-off-by: Ram Pai Signed-off-by: Paul Mackerras --- Documentation/powerpc/ultravisor.rst | 1 + arch/powerpc/kvm/book3s_hv_uvmem.c | 123 +++++++++++++++++++++++++---------- 2 files changed, 89 insertions(+), 35 deletions(-) diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst index df136c8f91fa..a1c8c37159a1 100644 --- a/Documentation/powerpc/ultravisor.rst +++ b/Documentation/powerpc/ultravisor.rst @@ -895,6 +895,7 @@ Return values One of the following values: * H_SUCCESS on success. + * H_STATE if the VM is not in a position to switch to secure. Description ~~~~~~~~~~~ diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e6f76bc16d04..533b608c8d20 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -211,10 +211,79 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, return false; } +static int kvmppc_memslot_page_merge(struct kvm *kvm, + const struct kvm_memory_slot *memslot, bool merge) +{ + unsigned long gfn = memslot->base_gfn; + unsigned long end, start = gfn_to_hva(kvm, gfn); + int ret = 0; + struct vm_area_struct *vma; + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; + + if (kvm_is_error_hva(start)) + return H_STATE; + + end = start + (memslot->npages << PAGE_SHIFT); + + mmap_write_lock(kvm->mm); + do { + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma) { + ret = H_STATE; + break; + } + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, + merge_flag, &vma->vm_flags); + if (ret) { + ret = H_STATE; + break; + } + start = vma->vm_end; + } while (end > vma->vm_end); + + mmap_write_unlock(kvm->mm); + return ret; +} + +static void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); + kvmppc_uvmem_slot_free(kvm, memslot); + kvmppc_memslot_page_merge(kvm, memslot, true); +} + +static int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + int ret = H_PARAMETER; + + if (kvmppc_memslot_page_merge(kvm, memslot, false)) + return ret; + + if (kvmppc_uvmem_slot_init(kvm, memslot)) + goto out1; + + ret = uv_register_mem_slot(kvm->arch.lpid, + memslot->base_gfn << PAGE_SHIFT, + memslot->npages * PAGE_SIZE, + 0, memslot->id); + if (ret < 0) { + ret = H_PARAMETER; + goto out; + } + return 0; +out: + kvmppc_uvmem_slot_free(kvm, memslot); +out1: + kvmppc_memslot_page_merge(kvm, memslot, true); + return ret; +} + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; + struct kvm_memory_slot *memslot, *m; int ret = H_SUCCESS; int srcu_idx; @@ -232,23 +301,24 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return H_AUTHORITY; srcu_idx = srcu_read_lock(&kvm->srcu); + + /* register the memslot */ slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - if (kvmppc_uvmem_slot_init(kvm, memslot)) { - ret = H_PARAMETER; - goto out; - } - ret = uv_register_mem_slot(kvm->arch.lpid, - memslot->base_gfn << PAGE_SHIFT, - memslot->npages * PAGE_SIZE, - 0, memslot->id); - if (ret < 0) { - kvmppc_uvmem_slot_free(kvm, memslot); - ret = H_PARAMETER; - goto out; + ret = kvmppc_uvmem_memslot_create(kvm, memslot); + if (ret) + break; + } + + if (ret) { + slots = kvm_memslots(kvm); + kvm_for_each_memslot(m, slots) { + if (m == memslot) + break; + kvmppc_uvmem_memslot_delete(kvm, memslot); } } -out: + srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } @@ -384,7 +454,7 @@ out: */ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long gpa, struct kvm *kvm, - unsigned long page_shift, bool *downgrade) + unsigned long page_shift) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; @@ -400,18 +470,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, mig.src = &src_pfn; mig.dst = &dst_pfn; - /* - * We come here with mmap_lock write lock held just for - * ksm_madvise(), otherwise we only need read mmap_lock. - * Hence downgrade to read lock once ksm_madvise() is done. - */ - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags); - mmap_write_downgrade(kvm->mm); - *downgrade = true; - if (ret) - return ret; - ret = migrate_vma_setup(&mig); if (ret) return ret; @@ -503,7 +561,6 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, unsigned long flags, unsigned long page_shift) { - bool downgrade = false; unsigned long start, end; struct vm_area_struct *vma; int srcu_idx; @@ -524,7 +581,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, ret = H_PARAMETER; srcu_idx = srcu_read_lock(&kvm->srcu); - mmap_write_lock(kvm->mm); + mmap_read_lock(kvm->mm); start = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(start)) @@ -540,16 +597,12 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out_unlock; - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, - &downgrade)) + if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift)) ret = H_SUCCESS; out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); out: - if (downgrade) - mmap_read_unlock(kvm->mm); - else - mmap_write_unlock(kvm->mm); + mmap_read_unlock(kvm->mm); srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -- cgit From 651a6310111ef8e09394e77d6d959ab3d066930d Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 27 Jul 2020 11:07:16 -0700 Subject: KVM: PPC: Book3S HV: Track the state GFNs associated with secure VMs During the life of SVM, its GFNs transition through normal, secure and shared states. Since the kernel does not track GFNs that are shared, it is not possible to disambiguate a shared GFN from a GFN whose PFN has not yet been migrated to a secure-PFN. Also it is not possible to disambiguate a secure-GFN from a GFN whose GFN has been pagedout from the ultravisor. The ability to identify the state of a GFN is needed to skip migration of its PFN to secure-PFN during ESM transition. The code is re-organized to track the states of a GFN as explained below. ************************************************************************ 1. States of a GFN --------------- The GFN can be in one of the following states. (a) Secure - The GFN is secure. The GFN is associated with a Secure VM, the contents of the GFN is not accessible to the Hypervisor. This GFN can be backed by a secure-PFN, or can be backed by a normal-PFN with contents encrypted. The former is true when the GFN is paged-in into the ultravisor. The latter is true when the GFN is paged-out of the ultravisor. (b) Shared - The GFN is shared. The GFN is associated with a a secure VM. The contents of the GFN is accessible to Hypervisor. This GFN is backed by a normal-PFN and its content is un-encrypted. (c) Normal - The GFN is a normal. The GFN is associated with a normal VM. The contents of the GFN is accesible to the Hypervisor. Its content is never encrypted. 2. States of a VM. --------------- (a) Normal VM: A VM whose contents are always accessible to the hypervisor. All its GFNs are normal-GFNs. (b) Secure VM: A VM whose contents are not accessible to the hypervisor without the VM's consent. Its GFNs are either Shared-GFN or Secure-GFNs. (c) Transient VM: A Normal VM that is transitioning to secure VM. The transition starts on successful return of H_SVM_INIT_START, and ends on successful return of H_SVM_INIT_DONE. This transient VM, can have GFNs in any of the three states; i.e Secure-GFN, Shared-GFN, and Normal-GFN. The VM never executes in this state in supervisor-mode. 3. Memory slot State. ------------------ The state of a memory slot mirrors the state of the VM the memory slot is associated with. 4. VM State transition. -------------------- A VM always starts in Normal Mode. H_SVM_INIT_START moves the VM into transient state. During this time the Ultravisor may request some of its GFNs to be shared or secured. So its GFNs can be in one of the three GFN states. H_SVM_INIT_DONE moves the VM entirely from transient state to secure-state. At this point any left-over normal-GFNs are transitioned to Secure-GFN. H_SVM_INIT_ABORT moves the transient VM back to normal VM. All its GFNs are moved to Normal-GFNs. UV_TERMINATE transitions the secure-VM back to normal-VM. All the secure-GFN and shared-GFNs are tranistioned to normal-GFN Note: The contents of the normal-GFN is undefined at this point. 5. GFN state implementation: ------------------------- Secure GFN is associated with a secure-PFN; also called uvmem_pfn, when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag set, and contains the value of the secure-PFN. It is associated with a normal-PFN; also called mem_pfn, when the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. The value of the normal-PFN is not tracked. Shared GFN is associated with a normal-PFN. Its pfn[] has KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN is not tracked. Normal GFN is associated with normal-PFN. Its pfn[] has no flag set. The value of the normal-PFN is not tracked. 6. Life cycle of a GFN -------------------- -------------------------------------------------------------- | | Share | Unshare | SVM |H_SVM_INIT_DONE| | |operation |operation | abort/ | | | | | | terminate | | ------------------------------------------------------------- | | | | | | | Secure | Shared | Secure |Normal |Secure | | | | | | | | Shared | Shared | Secure |Normal |Shared | | | | | | | | Normal | Shared | Secure |Normal |Secure | -------------------------------------------------------------- 7. Life cycle of a VM -------------------- -------------------------------------------------------------------- | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | | | | | | | | --------- ---------------------------------------------------------- | | | | | | | | Normal | Normal | Transient|Error |Error |Normal | | | | | | | | | Secure | Error | Error |Error |Error |Normal | | | | | | | | |Transient| N/A | Error |Secure |Normal |Normal | -------------------------------------------------------------------- ************************************************************************ Reviewed-by: Bharata B Rao Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 191 +++++++++++++++++++++++++++++++++---- 1 file changed, 172 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 533b608c8d20..1b2b0293da40 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -98,7 +98,127 @@ static struct dev_pagemap kvmppc_uvmem_pgmap; static unsigned long *kvmppc_uvmem_bitmap; static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); -#define KVMPPC_UVMEM_PFN (1UL << 63) +/* + * States of a GFN + * --------------- + * The GFN can be in one of the following states. + * + * (a) Secure - The GFN is secure. The GFN is associated with + * a Secure VM, the contents of the GFN is not accessible + * to the Hypervisor. This GFN can be backed by a secure-PFN, + * or can be backed by a normal-PFN with contents encrypted. + * The former is true when the GFN is paged-in into the + * ultravisor. The latter is true when the GFN is paged-out + * of the ultravisor. + * + * (b) Shared - The GFN is shared. The GFN is associated with a + * a secure VM. The contents of the GFN is accessible to + * Hypervisor. This GFN is backed by a normal-PFN and its + * content is un-encrypted. + * + * (c) Normal - The GFN is a normal. The GFN is associated with + * a normal VM. The contents of the GFN is accesible to + * the Hypervisor. Its content is never encrypted. + * + * States of a VM. + * --------------- + * + * Normal VM: A VM whose contents are always accessible to + * the hypervisor. All its GFNs are normal-GFNs. + * + * Secure VM: A VM whose contents are not accessible to the + * hypervisor without the VM's consent. Its GFNs are + * either Shared-GFN or Secure-GFNs. + * + * Transient VM: A Normal VM that is transitioning to secure VM. + * The transition starts on successful return of + * H_SVM_INIT_START, and ends on successful return + * of H_SVM_INIT_DONE. This transient VM, can have GFNs + * in any of the three states; i.e Secure-GFN, Shared-GFN, + * and Normal-GFN. The VM never executes in this state + * in supervisor-mode. + * + * Memory slot State. + * ----------------------------- + * The state of a memory slot mirrors the state of the + * VM the memory slot is associated with. + * + * VM State transition. + * -------------------- + * + * A VM always starts in Normal Mode. + * + * H_SVM_INIT_START moves the VM into transient state. During this + * time the Ultravisor may request some of its GFNs to be shared or + * secured. So its GFNs can be in one of the three GFN states. + * + * H_SVM_INIT_DONE moves the VM entirely from transient state to + * secure-state. At this point any left-over normal-GFNs are + * transitioned to Secure-GFN. + * + * H_SVM_INIT_ABORT moves the transient VM back to normal VM. + * All its GFNs are moved to Normal-GFNs. + * + * UV_TERMINATE transitions the secure-VM back to normal-VM. All + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN + * Note: The contents of the normal-GFN is undefined at this point. + * + * GFN state implementation: + * ------------------------- + * + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag + * set, and contains the value of the secure-PFN. + * It is associated with a normal-PFN; also called mem_pfn, when + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. + * The value of the normal-PFN is not tracked. + * + * Shared GFN is associated with a normal-PFN. Its pfn[] has + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN + * is not tracked. + * + * Normal GFN is associated with normal-PFN. Its pfn[] has + * no flag set. The value of the normal-PFN is not tracked. + * + * Life cycle of a GFN + * -------------------- + * + * -------------------------------------------------------------- + * | | Share | Unshare | SVM |H_SVM_INIT_DONE| + * | |operation |operation | abort/ | | + * | | | | terminate | | + * ------------------------------------------------------------- + * | | | | | | + * | Secure | Shared | Secure |Normal |Secure | + * | | | | | | + * | Shared | Shared | Secure |Normal |Shared | + * | | | | | | + * | Normal | Shared | Secure |Normal |Secure | + * -------------------------------------------------------------- + * + * Life cycle of a VM + * -------------------- + * + * -------------------------------------------------------------------- + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | + * | | | | | | | + * --------- ---------------------------------------------------------- + * | | | | | | | + * | Normal | Normal | Transient|Error |Error |Normal | + * | | | | | | | + * | Secure | Error | Error |Error |Error |Normal | + * | | | | | | | + * |Transient| N/A | Error |Secure |Normal |Normal | + * -------------------------------------------------------------------- + */ + +#define KVMPPC_GFN_UVMEM_PFN (1UL << 63) +#define KVMPPC_GFN_MEM_PFN (1UL << 62) +#define KVMPPC_GFN_SHARED (1UL << 61) +#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) +#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) +#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) struct kvmppc_uvmem_slot { struct list_head list; @@ -106,11 +226,11 @@ struct kvmppc_uvmem_slot { unsigned long base_pfn; unsigned long *pfns; }; - struct kvmppc_uvmem_page_pvt { struct kvm *kvm; unsigned long gpa; bool skip_page_out; + bool remove_gfn; }; bool kvmppc_uvmem_available(void) @@ -163,8 +283,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) mutex_unlock(&kvm->arch.uvmem_lock); } -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, - struct kvm *kvm) +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, + unsigned long flag, unsigned long uvmem_pfn) { struct kvmppc_uvmem_slot *p; @@ -172,24 +292,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; + if (flag == KVMPPC_GFN_UVMEM_PFN) + p->pfns[index] = uvmem_pfn | flag; + else + p->pfns[index] = flag; return; } } } -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, + unsigned long uvmem_pfn, struct kvm *kvm) { - struct kvmppc_uvmem_slot *p; + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); +} - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { - p->pfns[gfn - p->base_pfn] = 0; - return; - } - } +/* mark the GFN as secure-GFN associated with a memory-PFN. */ +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); } +/* mark the GFN as a shared GFN. */ +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); +} + +/* mark the GFN as a non-existent GFN. */ +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, 0, 0); +} + +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, unsigned long *uvmem_pfn) { @@ -199,10 +336,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - if (p->pfns[index] & KVMPPC_UVMEM_PFN) { + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { if (uvmem_pfn) *uvmem_pfn = p->pfns[index] & - ~KVMPPC_UVMEM_PFN; + KVMPPC_GFN_PFN_MASK; return true; } else return false; @@ -354,6 +491,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, mutex_lock(&kvm->arch.uvmem_lock); if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + kvmppc_gfn_remove(gfn, kvm); mutex_unlock(&kvm->arch.uvmem_lock); continue; } @@ -361,6 +499,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = skip_page_out; + pvt->remove_gfn = true; mutex_unlock(&kvm->arch.uvmem_lock); pfn = gfn_to_pfn(kvm, gfn); @@ -430,7 +569,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) goto out_clear; uvmem_pfn = bit + pfn_first; - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); pvt->gpa = gpa; pvt->kvm = kvm; @@ -525,6 +664,11 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + /* + * do not drop the GFN. It is a valid GFN + * that is transitioned to a shared GFN. + */ + pvt->remove_gfn = false; } retry: @@ -538,12 +682,16 @@ retry: uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + pvt->remove_gfn = false; /* it continues to be a valid GFN */ kvm_release_pfn_clean(pfn); goto retry; } - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, + page_shift)) { + kvmppc_gfn_shared(gfn, kvm); ret = H_SUCCESS; + } kvm_release_pfn_clean(pfn); mutex_unlock(&kvm->arch.uvmem_lock); out: @@ -599,6 +747,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift)) ret = H_SUCCESS; + out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); out: @@ -707,7 +856,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) /* * Release the device PFN back to the pool * - * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. + * Gets called when secure GFN tranistions from a secure-PFN + * to a normal PFN during H_SVM_PAGE_OUT. * Gets called with kvm->arch.uvmem_lock held. */ static void kvmppc_uvmem_page_free(struct page *page) @@ -722,7 +872,10 @@ static void kvmppc_uvmem_page_free(struct page *page) pvt = page->zone_device_data; page->zone_device_data = NULL; - kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + if (pvt->remove_gfn) + kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + else + kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); kfree(pvt); } -- cgit From dfaa973ae9605e1731eaef9f30c2b056346a4efc Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 27 Jul 2020 11:07:17 -0700 Subject: KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs The Ultravisor is expected to explicitly call H_SVM_PAGE_IN for all the pages of the SVM before calling H_SVM_INIT_DONE. This causes a huge delay in tranistioning the VM to SVM. The Ultravisor is only interested in the pages that contain the kernel, initrd and other important data structures. The rest contain throw-away content. However if not all pages are requested by the Ultravisor, the Hypervisor continues to consider the GFNs corresponding to the non-requested pages as normal GFNs. This can lead to data-corruption and undefined behavior. In H_SVM_INIT_DONE handler, move all the PFNs associated with the SVM's GFNs to secure-PFNs. Skip the GFNs that are already Paged-in or Shared or Paged-in followed by a Paged-out. Reviewed-by: Bharata B Rao Signed-off-by: Ram Pai Signed-off-by: Paul Mackerras --- Documentation/powerpc/ultravisor.rst | 2 + arch/powerpc/kvm/book3s_hv_uvmem.c | 154 ++++++++++++++++++++++++++++++----- 2 files changed, 134 insertions(+), 22 deletions(-) diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst index a1c8c37159a1..ba6b1bf1cc44 100644 --- a/Documentation/powerpc/ultravisor.rst +++ b/Documentation/powerpc/ultravisor.rst @@ -934,6 +934,8 @@ Return values * H_UNSUPPORTED if called from the wrong context (e.g. from an SVM or before an H_SVM_INIT_START hypercall). + * H_STATE if the hypervisor could not successfully + transition the VM to Secure VM. Description ~~~~~~~~~~~ diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 1b2b0293da40..a1664ae4d81f 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -93,6 +93,7 @@ #include #include #include +#include static struct dev_pagemap kvmppc_uvmem_pgmap; static unsigned long *kvmppc_uvmem_bitmap; @@ -348,6 +349,41 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, return false; } +/* + * starting from *gfn search for the next available GFN that is not yet + * transitioned to a secure GFN. return the value of that GFN in *gfn. If a + * GFN is found, return true, else return false + * + * Must be called with kvm->arch.uvmem_lock held. + */ +static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, + struct kvm *kvm, unsigned long *gfn) +{ + struct kvmppc_uvmem_slot *p; + bool ret = false; + unsigned long i; + + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) + if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + break; + if (!p) + return ret; + /* + * The code below assumes, one to one correspondence between + * kvmppc_uvmem_slot and memslot. + */ + for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { + unsigned long index = i - p->base_pfn; + + if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { + *gfn = i; + ret = true; + break; + } + } + return ret; +} + static int kvmppc_memslot_page_merge(struct kvm *kvm, const struct kvm_memory_slot *memslot, bool merge) { @@ -460,16 +496,6 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return ret; } -unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) -{ - if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) - return H_UNSUPPORTED; - - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; - pr_info("LPID %d went secure\n", kvm->arch.lpid); - return H_SUCCESS; -} - /* * Drop device pages that we maintain for the secure guest * @@ -588,12 +614,14 @@ out: } /* - * Alloc a PFN from private device memory pool and copy page from normal - * memory to secure memory using UV_PAGE_IN uvcall. + * Alloc a PFN from private device memory pool. If @pagein is true, + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. */ -static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long gpa, struct kvm *kvm, - unsigned long page_shift) +static int kvmppc_svm_page_in(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long gpa, struct kvm *kvm, + unsigned long page_shift, + bool pagein) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; @@ -624,11 +652,16 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, goto out_finalize; } - pfn = *mig.src >> MIGRATE_PFN_SHIFT; - spage = migrate_pfn_to_page(*mig.src); - if (spage) - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, - page_shift); + if (pagein) { + pfn = *mig.src >> MIGRATE_PFN_SHIFT; + spage = migrate_pfn_to_page(*mig.src); + if (spage) { + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + if (ret) + goto out_finalize; + } + } *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; migrate_vma_pages(&mig); @@ -637,6 +670,80 @@ out_finalize: return ret; } +static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long gfn = memslot->base_gfn; + struct vm_area_struct *vma; + unsigned long start, end; + int ret = 0; + + mmap_read_lock(kvm->mm); + mutex_lock(&kvm->arch.uvmem_lock); + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { + ret = H_STATE; + start = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(start)) + break; + + end = start + (1UL << PAGE_SHIFT); + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma || vma->vm_start > start || vma->vm_end < end) + break; + + ret = kvmppc_svm_page_in(vma, start, end, + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); + if (ret) { + ret = H_STATE; + break; + } + + /* relinquish the cpu if needed */ + cond_resched(); + } + mutex_unlock(&kvm->arch.uvmem_lock); + mmap_read_unlock(kvm->mm); + return ret; +} + +unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int srcu_idx; + long ret = H_SUCCESS; + + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + /* migrate any unmoved normal pfn to device pfns*/ + srcu_idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); + if (ret) { + /* + * The pages will remain transitioned. + * Its the callers responsibility to + * terminate the VM, which will undo + * all state of the VM. Till then + * this VM is in a erroneous state. + * Its KVMPPC_SECURE_INIT_DONE will + * remain unset. + */ + ret = H_STATE; + goto out; + } + } + + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; + pr_info("LPID %d went secure\n", kvm->arch.lpid); + +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return ret; +} + /* * Shares the page with HV, thus making it a normal page. * @@ -745,8 +852,11 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out_unlock; - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift)) - ret = H_SUCCESS; + if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, + true)) + goto out_unlock; + + ret = H_SUCCESS; out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); -- cgit From a2ce72003863d0dcf680f0c49de4678ab2c6812b Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 27 Jul 2020 11:07:18 -0700 Subject: KVM: PPC: Book3S HV: Migrate hot plugged memory When a memory slot is hot plugged to a SVM, PFNs associated with the GFNs in that slot must be migrated to the secure-PFNs, aka device-PFNs. Call kvmppc_uv_migrate_mem_slot() to accomplish this. Disable page-merge for all pages in the memory slot. Reviewed-by: Bharata B Rao Signed-off-by: Ram Pai [rearranged the code, and modified the commit log] Signed-off-by: Laurent Dufour Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s_uvmem.h | 14 ++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 14 ++++++-------- arch/powerpc/kvm/book3s_hv_uvmem.c | 23 +++++++++++++++++++---- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 9cb7d8be2366..0a6319448cb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out); +int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new); +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old); #else static inline int kvmppc_uvmem_init(void) { @@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out) { } + +static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new) +{ + return H_UNSUPPORTED; +} + +static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old) { } + #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d64a2dc1ccca..d9a9dd743b8f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4523,16 +4523,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, switch (change) { case KVM_MR_CREATE: - if (kvmppc_uvmem_slot_init(kvm, new)) - return; - uv_register_mem_slot(kvm->arch.lpid, - new->base_gfn << PAGE_SHIFT, - new->npages * PAGE_SIZE, - 0, new->id); + /* + * @TODO kvmppc_uvmem_memslot_create() can fail and + * return error. Fix this. + */ + kvmppc_uvmem_memslot_create(kvm, new); break; case KVM_MR_DELETE: - uv_unregister_mem_slot(kvm->arch.lpid, old->id); - kvmppc_uvmem_slot_free(kvm, old); + kvmppc_uvmem_memslot_delete(kvm, old); break; default: /* TODO: Handle KVM_MR_MOVE */ diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index a1664ae4d81f..5b917ea2fbf6 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -418,7 +418,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm, return ret; } -static void kvmppc_uvmem_memslot_delete(struct kvm *kvm, +static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *memslot) { uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); @@ -426,7 +426,7 @@ static void kvmppc_uvmem_memslot_delete(struct kvm *kvm, kvmppc_memslot_page_merge(kvm, memslot, true); } -static int kvmppc_uvmem_memslot_create(struct kvm *kvm, +static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *memslot) { int ret = H_PARAMETER; @@ -478,7 +478,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) /* register the memslot */ slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - ret = kvmppc_uvmem_memslot_create(kvm, memslot); + ret = __kvmppc_uvmem_memslot_create(kvm, memslot); if (ret) break; } @@ -488,7 +488,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) kvm_for_each_memslot(m, slots) { if (m == memslot) break; - kvmppc_uvmem_memslot_delete(kvm, memslot); + __kvmppc_uvmem_memslot_delete(kvm, memslot); } } @@ -1057,6 +1057,21 @@ out: return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; } +int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) +{ + int ret = __kvmppc_uvmem_memslot_create(kvm, new); + + if (!ret) + ret = kvmppc_uv_migrate_mem_slot(kvm, new); + + return ret; +} + +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) +{ + __kvmppc_uvmem_memslot_delete(kvm, old); +} + static u64 kvmppc_get_secmem_size(void) { struct device_node *np; -- cgit From f1b87ea8784b8c79d9bffad60fe0720c2cb21ec7 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 27 Jul 2020 12:24:28 -0700 Subject: KVM: PPC: Book3S HV: Move kvmppc_svm_page_out up kvmppc_svm_page_out() will need to be called by kvmppc_uvmem_drop_pages() so move it up earlier in this file. Furthermore it will be interesting to call this function when already holding the kvm->arch.uvmem_lock, so prefix the original function with __ and remove the locking in it, and introduce a wrapper which call that function with the lock held. There is no functional change. Reviewed-by: Bharata B Rao Signed-off-by: Laurent Dufour Signed-off-by: Ram Pai Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 166 ++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 5b917ea2fbf6..565f24bbccf1 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -496,6 +496,96 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return ret; } +/* + * Provision a new page on HV side and copy over the contents + * from secure memory using UV_PAGE_OUT uvcall. + * Caller must held kvm->arch.uvmem_lock. + */ +static int __kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) +{ + unsigned long src_pfn, dst_pfn = 0; + struct migrate_vma mig; + struct page *dpage, *spage; + struct kvmppc_uvmem_page_pvt *pvt; + unsigned long pfn; + int ret = U_SUCCESS; + + memset(&mig, 0, sizeof(mig)); + mig.vma = vma; + mig.start = start; + mig.end = end; + mig.src = &src_pfn; + mig.dst = &dst_pfn; + mig.src_owner = &kvmppc_uvmem_pgmap; + + /* The requested page is already paged-out, nothing to do */ + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) + return ret; + + ret = migrate_vma_setup(&mig); + if (ret) + return -1; + + spage = migrate_pfn_to_page(*mig.src); + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) + goto out_finalize; + + if (!is_zone_device_page(spage)) + goto out_finalize; + + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); + if (!dpage) { + ret = -1; + goto out_finalize; + } + + lock_page(dpage); + pvt = spage->zone_device_data; + pfn = page_to_pfn(dpage); + + /* + * This function is used in two cases: + * - When HV touches a secure page, for which we do UV_PAGE_OUT + * - When a secure page is converted to shared page, we *get* + * the page to essentially unmap the device page. In this + * case we skip page-out. + */ + if (!pvt->skip_page_out) + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + + if (ret == U_SUCCESS) + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + else { + unlock_page(dpage); + __free_page(dpage); + goto out_finalize; + } + + migrate_vma_pages(&mig); + +out_finalize: + migrate_vma_finalize(&mig); + return ret; +} + +static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) +{ + int ret; + + mutex_lock(&kvm->arch.uvmem_lock); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + mutex_unlock(&kvm->arch.uvmem_lock); + + return ret; +} + /* * Drop device pages that we maintain for the secure guest * @@ -866,82 +956,6 @@ out: return ret; } -/* - * Provision a new page on HV side and copy over the contents - * from secure memory using UV_PAGE_OUT uvcall. - */ -static int kvmppc_svm_page_out(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) -{ - unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; - struct page *dpage, *spage; - struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn; - int ret = U_SUCCESS; - - memset(&mig, 0, sizeof(mig)); - mig.vma = vma; - mig.start = start; - mig.end = end; - mig.src = &src_pfn; - mig.dst = &dst_pfn; - mig.src_owner = &kvmppc_uvmem_pgmap; - - mutex_lock(&kvm->arch.uvmem_lock); - /* The requested page is already paged-out, nothing to do */ - if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) - goto out; - - ret = migrate_vma_setup(&mig); - if (ret) - goto out; - - spage = migrate_pfn_to_page(*mig.src); - if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) - goto out_finalize; - - if (!is_zone_device_page(spage)) - goto out_finalize; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); - if (!dpage) { - ret = -1; - goto out_finalize; - } - - lock_page(dpage); - pvt = spage->zone_device_data; - pfn = page_to_pfn(dpage); - - /* - * This function is used in two cases: - * - When HV touches a secure page, for which we do UV_PAGE_OUT - * - When a secure page is converted to shared page, we *get* - * the page to essentially unmap the device page. In this - * case we skip page-out. - */ - if (!pvt->skip_page_out) - ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, - gpa, 0, page_shift); - - if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; - else { - unlock_page(dpage); - __free_page(dpage); - goto out_finalize; - } - - migrate_vma_pages(&mig); -out_finalize: - migrate_vma_finalize(&mig); -out: - mutex_unlock(&kvm->arch.uvmem_lock); - return ret; -} /* * Fault handler callback that gets called when HV touches any page that -- cgit From 81ab595ddd3c3036806b460526e1fbc5b271ff33 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 27 Jul 2020 12:24:29 -0700 Subject: KVM: PPC: Book3S HV: Rework secure mem slot dropping When a secure memslot is dropped, all the pages backed in the secure device (aka really backed by secure memory by the Ultravisor) should be paged out to a normal page. Previously, this was achieved by triggering the page fault mechanism which is calling kvmppc_svm_page_out() on each pages. This can't work when hot unplugging a memory slot because the memory slot is flagged as invalid and gfn_to_pfn() is then not trying to access the page, so the page fault mechanism is not triggered. Since the final goal is to make a call to kvmppc_svm_page_out() it seems simpler to call directly instead of triggering such a mechanism. This way kvmppc_uvmem_drop_pages() can be called even when hot unplugging a memslot. Since kvmppc_uvmem_drop_pages() is already holding kvm->arch.uvmem_lock, the call to __kvmppc_svm_page_out() is made. As __kvmppc_svm_page_out needs the vma pointer to migrate the pages, the VMA is fetched in a lazy way, to not trigger find_vma() all the time. In addition, the mmap_sem is held in read mode during that time, not in write mode since the virual memory layout is not impacted, and kvm->arch.uvmem_lock prevents concurrent operation on the secure device. Reviewed-by: Bharata B Rao Signed-off-by: Laurent Dufour [modified check on the VMA in kvmppc_uvmem_drop_pages] Signed-off-by: Ram Pai [modified the changelog description] Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 52 +++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 565f24bbccf1..0d49e3425a12 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -594,35 +594,53 @@ static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, * fault on them, do fault time migration to replace the device PTEs in * QEMU page table with normal PTEs from newly allocated pages. */ -void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, +void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn, uvmem_pfn; - unsigned long gfn = free->base_gfn; + struct page *uvmem_page; + struct vm_area_struct *vma = NULL; + unsigned long uvmem_pfn, gfn; + unsigned long addr; + + mmap_read_lock(kvm->mm); + + addr = slot->userspace_addr; - for (i = free->npages; i; --i, ++gfn) { - struct page *uvmem_page; + gfn = slot->base_gfn; + for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { + + /* Fetch the VMA if addr is not in the latest fetched one */ + if (!vma || addr >= vma->vm_end) { + vma = find_vma_intersection(kvm->mm, addr, addr+1); + if (!vma) { + pr_err("Can't find VMA for gfn:0x%lx\n", gfn); + break; + } + } mutex_lock(&kvm->arch.uvmem_lock); - if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + uvmem_page = pfn_to_page(uvmem_pfn); + pvt = uvmem_page->zone_device_data; + pvt->skip_page_out = skip_page_out; + pvt->remove_gfn = true; + + if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, + PAGE_SHIFT, kvm, pvt->gpa)) + pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", + pvt->gpa, addr); + } else { + /* Remove the shared flag if any */ kvmppc_gfn_remove(gfn, kvm); - mutex_unlock(&kvm->arch.uvmem_lock); - continue; } - uvmem_page = pfn_to_page(uvmem_pfn); - pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = skip_page_out; - pvt->remove_gfn = true; mutex_unlock(&kvm->arch.uvmem_lock); - - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) - continue; - kvm_release_pfn_clean(pfn); } + + mmap_read_unlock(kvm->mm); } unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) -- cgit From 24f69c0fa4e252f706884114b7d6353aa07678b5 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 21 Jul 2020 10:44:44 +0100 Subject: KVM: arm64: Make nVHE ASLR conditional on RANDOMIZE_BASE If there are spare bits in non-VHE hyp VA, KVM unconditionally replaces them with a random tag chosen at init. Disable this if the kernel is built without RANDOMIZE_BASE to align with kernel behavior. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200721094445.82184-2-dbrazdil@google.com --- arch/arm64/kvm/va_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index a4f48c1ac28c..e0404bcab019 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -48,7 +48,7 @@ __init void kvm_compute_layout(void) va_mask = GENMASK_ULL(tag_lsb - 1, 0); tag_val = hyp_va_msb; - if (tag_lsb != (vabits_actual - 1)) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { /* We have some free bits to insert a random tag. */ tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } -- cgit From a59a2edbbba7397fede86e40a3da17e5beebf98b Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 21 Jul 2020 10:44:45 +0100 Subject: KVM: arm64: Substitute RANDOMIZE_BASE for HARDEN_EL2_VECTORS The HARDEN_EL2_VECTORS config maps vectors at a fixed location on cores which are susceptible to Spector variant 3a (A57, A72) to prevent defeating hyp layout randomization by leaking the value of VBAR_EL2. Since this feature is only applicable when EL2 layout randomization is enabled, unify both behind the same RANDOMIZE_BASE Kconfig. Majority of code remains conditional on a capability selected for the affected cores. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200721094445.82184-3-dbrazdil@google.com --- arch/arm64/Kconfig | 16 ---------------- arch/arm64/include/asm/mmu.h | 6 ++---- arch/arm64/kernel/cpu_errata.c | 4 ++-- arch/arm64/kvm/Kconfig | 2 +- 4 files changed, 5 insertions(+), 23 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..152deef3277e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1182,22 +1182,6 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. -config HARDEN_EL2_VECTORS - bool "Harden EL2 vector mapping against system register leak" if EXPERT - default y - help - Speculation attacks against some high-performance processors can - be used to leak privileged information such as the vector base - register, resulting in a potential defeat of the EL2 layout - randomization. - - This config option will map the vectors to a fixed location, - independent of the EL2 code mapping, so that revealing VBAR_EL2 - to an attacker does not give away any extra information. This - only gets enabled on affected CPUs. - - If unsure, say Y. - config ARM64_SSBD bool "Speculative Store Bypass Disable" if EXPERT default y diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 68140fdd89d6..bd12011eb560 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -42,12 +42,10 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ - defined(CONFIG_HARDEN_EL2_VECTORS)) - +#ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __bp_harden_hyp_vecs[]; extern atomic_t arm64_el2_vector_last_slot; -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ +#endif #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ad06d6802d2e..a524142e55d0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -635,7 +635,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#if defined(CONFIG_HARDEN_EL2_VECTORS) +#ifdef CONFIG_RANDOMIZE_BASE static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -880,7 +880,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, }, -#ifdef CONFIG_HARDEN_EL2_VECTORS +#ifdef CONFIG_RANDOMIZE_BASE { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13489aff4440..318c8f2df245 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -58,7 +58,7 @@ config KVM_ARM_PMU virtual machines. config KVM_INDIRECT_VECTORS - def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE endif # KVM -- cgit From 9a74a2b87f4ca6c987039b3fb96d32093584afc2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Jul 2020 11:44:41 +0100 Subject: NFS: remove redundant initialization of variable result The variable result is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3d113cf8908a..99b110431923 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -901,7 +901,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t result = -EINVAL, requested; + ssize_t result, requested; size_t count; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; -- cgit From 9e29420ddb1331de1f86fe544a460d8f2c6d11c6 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:19 -0700 Subject: i2c: tegra: Don't mark VI I2C as IRQ safe runtime PM Tegra VI I2C is part of VE power domain and typically used for camera usecases. VE power domain is not always on and is non-IRQ safe. So, IRQ safe device cannot be attached to a non-IRQ safe domain as it prevents powering off the PM domain and generic power domain driver will warn. Current driver marks all I2C devices as IRQ safe and VI I2C device does not require IRQ safe as it will not be used for atomic transfers. This patch has fix to make VI I2C as non-IRQ safe. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 15772964a05f..3be101824e80 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1750,7 +1750,15 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto unprepare_slow_clk; } - pm_runtime_irq_safe(&pdev->dev); + /* + * VI I2C is in VE power domain which is not always on and not + * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ + * safe domain as it prevents powering off the PM domain. + * Also, VI I2C device don't need to use runtime IRQ safe as it will + * not be used for atomic transfers. + */ + if (!i2c_dev->is_vi) + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { ret = tegra_i2c_runtime_resume(&pdev->dev); -- cgit From 7232f53e73839bec4a2747d0c01d29994f9f39b9 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:20 -0700 Subject: i2c: tegra: Remove NULL pointer check before clk_enable/disable/prepare/unprepare clk_enable, clk_disable, clk_prepare, and clk_unprepare APIs have implementation for checking clk pointer not NULL and clock consumers can safely call these APIs without NULL pointer check. So, this patch cleans up Tegra i2c driver to remove explicit checks before these APIs. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 64 +++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 3be101824e80..c91307b9e13d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -655,21 +655,17 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) if (ret) return ret; - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_enable(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling fast clk failed, err %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, + "Enabling fast clk failed, err %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_enable(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to enable slow clock: %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to enable slow clock: %d\n", ret); + return ret; } ret = clk_enable(i2c_dev->div_clk); @@ -688,12 +684,8 @@ static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); clk_disable(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_disable(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_disable(i2c_dev->fast_clk); + clk_disable(i2c_dev->slow_clk); + clk_disable(i2c_dev->fast_clk); return pinctrl_pm_select_idle_state(i2c_dev->dev); } @@ -1716,20 +1708,16 @@ static int tegra_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c_dev); - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_prepare(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - return ret; - } + ret = clk_prepare(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_prepare(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to prepare slow clock: %d\n", ret); - goto unprepare_fast_clk; - } + ret = clk_prepare(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to prepare slow clock: %d\n", ret); + goto unprepare_fast_clk; } if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ && @@ -1843,12 +1831,10 @@ unprepare_div_clk: clk_unprepare(i2c_dev->div_clk); unprepare_slow_clk: - if (i2c_dev->is_vi) - clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->slow_clk); unprepare_fast_clk: - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->fast_clk); return ret; } @@ -1867,12 +1853,8 @@ static int tegra_i2c_remove(struct platform_device *pdev) tegra_i2c_runtime_suspend(&pdev->dev); clk_unprepare(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_unprepare(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->fast_clk); tegra_i2c_release_dma(i2c_dev); return 0; -- cgit From 42aa38b54e35216351e52bb03382a38edc6f1bb8 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:21 -0700 Subject: i2c: tegra: Fix the error path in tegra_i2c_runtime_resume tegra_i2c_runtime_resume does not disable prior enabled clocks properly. This patch fixes it. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c91307b9e13d..7b93c454b4c7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -665,18 +665,23 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) ret = clk_enable(i2c_dev->slow_clk); if (ret < 0) { dev_err(dev, "failed to enable slow clock: %d\n", ret); - return ret; + goto disable_fast_clk; } ret = clk_enable(i2c_dev->div_clk); if (ret < 0) { dev_err(i2c_dev->dev, "Enabling div clk failed, err %d\n", ret); - clk_disable(i2c_dev->fast_clk); - return ret; + goto disable_slow_clk; } return 0; + +disable_slow_clk: + clk_disable(i2c_dev->slow_clk); +disable_fast_clk: + clk_disable(i2c_dev->fast_clk); + return ret; } static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) -- cgit From 0d722620986722208c6bc2a67f5d68aa73b7cd5f Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:22 -0700 Subject: i2c: tegra: Fix runtime resume to re-init VI I2C VI I2C is on host1x bus and is part of VE power domain. During suspend/resume VE power domain goes through power off/on. So, controller reset followed by i2c re-initialization is required after the domain power up. This patch fixes it. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 7b93c454b4c7..1bf36669ca67 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -293,6 +293,8 @@ struct tegra_i2c_dev { bool is_curr_atomic_xfer; }; +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit); + static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { @@ -675,8 +677,22 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) goto disable_slow_clk; } + /* + * VI I2C device is attached to VE power domain which goes through + * power ON/OFF during PM runtime resume/suspend. So, controller + * should go through reset and need to re-initialize after power + * domain ON. + */ + if (i2c_dev->is_vi) { + ret = tegra_i2c_init(i2c_dev, true); + if (ret) + goto disable_div_clk; + } + return 0; +disable_div_clk: + clk_disable(i2c_dev->div_clk); disable_slow_clk: clk_disable(i2c_dev->slow_clk); disable_fast_clk: -- cgit From afca861bc6a3141c858d08279eb9afca76584fa6 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 27 Jul 2020 13:57:23 -0700 Subject: i2c: tegra: Avoid tegra_i2c_init_dma() for Tegra210 vi i2c VI I2C is on host1x bus so APB DMA can't be used for Tegra210 VI I2C and there are no tx and rx dma channels for VI I2C. So, avoid attempt of requesting DMA channels. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1bf36669ca67..00d3e4d7a01e 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -421,7 +421,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) dma_addr_t dma_phys; int err; - if (!i2c_dev->hw->has_apb_dma) + if (!i2c_dev->hw->has_apb_dma || i2c_dev->is_vi) return 0; if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { -- cgit From 4299f85a67480cdb43a3c4c1840a05259727e83c Mon Sep 17 00:00:00 2001 From: Lars Povlsen Date: Mon, 27 Jul 2020 10:42:08 +0200 Subject: dt-bindings: clock: sparx5: Add bindings include file The Sparx5 support 9 different clock outputs. This include file has defines for each supported clock ordinal. Signed-off-by: Lars Povlsen Reviewed-by: Stephen Boyd Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200727084211.6632-8-lars.povlsen@microchip.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/microchip,sparx5.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/dt-bindings/clock/microchip,sparx5.h diff --git a/include/dt-bindings/clock/microchip,sparx5.h b/include/dt-bindings/clock/microchip,sparx5.h new file mode 100644 index 000000000000..4b04dabacec2 --- /dev/null +++ b/include/dt-bindings/clock/microchip,sparx5.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019 Microchip Inc. + * + * Author: Lars Povlsen + */ + +#ifndef _DT_BINDINGS_CLK_SPARX5_H +#define _DT_BINDINGS_CLK_SPARX5_H + +#define CLK_ID_CORE 0 +#define CLK_ID_DDR 1 +#define CLK_ID_CPU2 2 +#define CLK_ID_ARM2 3 +#define CLK_ID_AUX1 4 +#define CLK_ID_AUX2 5 +#define CLK_ID_AUX3 6 +#define CLK_ID_AUX4 7 +#define CLK_ID_SYNCE 8 + +#define N_CLOCKS 9 + +#endif -- cgit From 53727eb6b3c210e826bb4c9d0aa89f65a5ae9342 Mon Sep 17 00:00:00 2001 From: Lars Povlsen Date: Mon, 27 Jul 2020 10:42:09 +0200 Subject: clk: sparx5: Add Sparx5 SoC DPLL clock driver This adds a device driver for the Sparx5 SoC DPLL clock Signed-off-by: Lars Povlsen Link: https://lore.kernel.org/r/20200727084211.6632-9-lars.povlsen@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/Makefile | 1 + drivers/clk/clk-sparx5.c | 295 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 drivers/clk/clk-sparx5.c diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index ca9af11d3391..da8fcf147eb1 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_COMMON_CLK_CDCE925) += clk-cdce925.o obj-$(CONFIG_ARCH_CLPS711X) += clk-clps711x.o obj-$(CONFIG_COMMON_CLK_CS2000_CP) += clk-cs2000-cp.o obj-$(CONFIG_ARCH_EFM32) += clk-efm32gg.o +obj-$(CONFIG_ARCH_SPARX5) += clk-sparx5.o obj-$(CONFIG_COMMON_CLK_FIXED_MMIO) += clk-fixed-mmio.o obj-$(CONFIG_COMMON_CLK_FSL_SAI) += clk-fsl-sai.o obj-$(CONFIG_COMMON_CLK_GEMINI) += clk-gemini.o diff --git a/drivers/clk/clk-sparx5.c b/drivers/clk/clk-sparx5.c new file mode 100644 index 000000000000..0fad0c1a0186 --- /dev/null +++ b/drivers/clk/clk-sparx5.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Microchip Sparx5 SoC Clock driver. + * + * Copyright (c) 2019 Microchip Inc. + * + * Author: Lars Povlsen + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define PLL_DIV GENMASK(7, 0) +#define PLL_PRE_DIV GENMASK(10, 8) +#define PLL_ROT_DIR BIT(11) +#define PLL_ROT_SEL GENMASK(13, 12) +#define PLL_ROT_ENA BIT(14) +#define PLL_CLK_ENA BIT(15) + +#define MAX_SEL 4 +#define MAX_PRE BIT(3) + +static const u8 sel_rates[MAX_SEL] = { 0, 2*8, 2*4, 2*2 }; + +static const char *clk_names[N_CLOCKS] = { + "core", "ddr", "cpu2", "arm2", + "aux1", "aux2", "aux3", "aux4", + "synce", +}; + +struct s5_hw_clk { + struct clk_hw hw; + void __iomem *reg; +}; + +struct s5_clk_data { + void __iomem *base; + struct s5_hw_clk s5_hw[N_CLOCKS]; +}; + +struct s5_pll_conf { + unsigned long freq; + u8 div; + bool rot_ena; + u8 rot_sel; + u8 rot_dir; + u8 pre_div; +}; + +#define to_s5_pll(hw) container_of(hw, struct s5_hw_clk, hw) + +static unsigned long s5_calc_freq(unsigned long parent_rate, + const struct s5_pll_conf *conf) +{ + unsigned long rate = parent_rate / conf->div; + + if (conf->rot_ena) { + int sign = conf->rot_dir ? -1 : 1; + int divt = sel_rates[conf->rot_sel] * (1 + conf->pre_div); + int divb = divt + sign; + + rate = mult_frac(rate, divt, divb); + rate = roundup(rate, 1000); + } + + return rate; +} + +static void s5_search_fractional(unsigned long rate, + unsigned long parent_rate, + int div, + struct s5_pll_conf *conf) +{ + struct s5_pll_conf best; + ulong cur_offset, best_offset = rate; + int d, i, j; + + memset(conf, 0, sizeof(*conf)); + conf->div = div; + conf->rot_ena = 1; /* Fractional rate */ + + for (d = 0; best_offset > 0 && d <= 1 ; d++) { + conf->rot_dir = !!d; + for (i = 0; best_offset > 0 && i < MAX_PRE; i++) { + conf->pre_div = i; + for (j = 1; best_offset > 0 && j < MAX_SEL; j++) { + conf->rot_sel = j; + conf->freq = s5_calc_freq(parent_rate, conf); + cur_offset = abs(rate - conf->freq); + if (cur_offset < best_offset) { + best_offset = cur_offset; + best = *conf; + } + } + } + } + + /* Best match */ + *conf = best; +} + +static unsigned long s5_calc_params(unsigned long rate, + unsigned long parent_rate, + struct s5_pll_conf *conf) +{ + if (parent_rate % rate) { + struct s5_pll_conf alt1, alt2; + int div; + + div = DIV_ROUND_CLOSEST_ULL(parent_rate, rate); + s5_search_fractional(rate, parent_rate, div, &alt1); + + /* Straight match? */ + if (alt1.freq == rate) { + *conf = alt1; + } else { + /* Try without rounding divider */ + div = parent_rate / rate; + if (div != alt1.div) { + s5_search_fractional(rate, parent_rate, div, + &alt2); + /* Select the better match */ + if (abs(rate - alt1.freq) < + abs(rate - alt2.freq)) + *conf = alt1; + else + *conf = alt2; + } + } + } else { + /* Straight fit */ + memset(conf, 0, sizeof(*conf)); + conf->div = parent_rate / rate; + } + + return conf->freq; +} + +static int s5_pll_enable(struct clk_hw *hw) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + u32 val = readl(pll->reg); + + val |= PLL_CLK_ENA; + writel(val, pll->reg); + + return 0; +} + +static void s5_pll_disable(struct clk_hw *hw) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + u32 val = readl(pll->reg); + + val &= ~PLL_CLK_ENA; + writel(val, pll->reg); +} + +static int s5_pll_set_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + struct s5_pll_conf conf; + unsigned long eff_rate; + u32 val; + + eff_rate = s5_calc_params(rate, parent_rate, &conf); + if (eff_rate != rate) + return -EOPNOTSUPP; + + val = readl(pll->reg) & PLL_CLK_ENA; + val |= FIELD_PREP(PLL_DIV, conf.div); + if (conf.rot_ena) { + val |= PLL_ROT_ENA; + val |= FIELD_PREP(PLL_ROT_SEL, conf.rot_sel); + val |= FIELD_PREP(PLL_PRE_DIV, conf.pre_div); + if (conf.rot_dir) + val |= PLL_ROT_DIR; + } + writel(val, pll->reg); + + return 0; +} + +static unsigned long s5_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + struct s5_pll_conf conf; + u32 val; + + val = readl(pll->reg); + + if (val & PLL_CLK_ENA) { + conf.div = FIELD_GET(PLL_DIV, val); + conf.pre_div = FIELD_GET(PLL_PRE_DIV, val); + conf.rot_ena = FIELD_GET(PLL_ROT_ENA, val); + conf.rot_dir = FIELD_GET(PLL_ROT_DIR, val); + conf.rot_sel = FIELD_GET(PLL_ROT_SEL, val); + + conf.freq = s5_calc_freq(parent_rate, &conf); + } else { + conf.freq = 0; + } + + return conf.freq; +} + +static long s5_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct s5_pll_conf conf; + + return s5_calc_params(rate, *parent_rate, &conf); +} + +static const struct clk_ops s5_pll_ops = { + .enable = s5_pll_enable, + .disable = s5_pll_disable, + .set_rate = s5_pll_set_rate, + .round_rate = s5_pll_round_rate, + .recalc_rate = s5_pll_recalc_rate, +}; + +static struct clk_hw *s5_clk_hw_get(struct of_phandle_args *clkspec, void *data) +{ + struct s5_clk_data *s5_clk = data; + unsigned int idx = clkspec->args[0]; + + if (idx >= N_CLOCKS) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return &s5_clk->s5_hw[idx].hw; +} + +static int s5_clk_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int i, ret; + struct s5_clk_data *s5_clk; + struct clk_parent_data pdata = { .index = 0 }; + struct clk_init_data init = { + .ops = &s5_pll_ops, + .num_parents = 1, + .parent_data = &pdata, + }; + + s5_clk = devm_kzalloc(dev, sizeof(*s5_clk), GFP_KERNEL); + if (!s5_clk) + return -ENOMEM; + + s5_clk->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(s5_clk->base)) + return PTR_ERR(s5_clk->base); + + for (i = 0; i < N_CLOCKS; i++) { + struct s5_hw_clk *s5_hw = &s5_clk->s5_hw[i]; + + init.name = clk_names[i]; + s5_hw->reg = s5_clk->base + (i * 4); + s5_hw->hw.init = &init; + ret = devm_clk_hw_register(dev, &s5_hw->hw); + if (ret) { + dev_err(dev, "failed to register %s clock\n", + init.name); + return ret; + } + } + + return devm_of_clk_add_hw_provider(dev, s5_clk_hw_get, s5_clk); +} + +static const struct of_device_id s5_clk_dt_ids[] = { + { .compatible = "microchip,sparx5-dpll", }, + { } +}; +MODULE_DEVICE_TABLE(of, s5_clk_dt_ids); + +static struct platform_driver s5_clk_driver = { + .probe = s5_clk_probe, + .driver = { + .name = "sparx5-clk", + .of_match_table = s5_clk_dt_ids, + }, +}; +builtin_platform_driver(s5_clk_driver); -- cgit From 4aeccdf0671125adee903b651fb134a4759ce161 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 29 Jul 2020 13:34:39 +0200 Subject: clk: mmp: avoid missing prototype warning The kernel test robot points out two harmless warnings in the mmp clk drivers: drivers/clk/mmp/clk-pxa168.c:68:13: warning: no previous prototype for 'pxa168_clk_init' [-Wmissing-prototypes] drivers/clk/mmp/clk-pxa910.c:66:13: warning: no previous prototype for 'pxa910_clk_init' [-Wmissing-prototypes] Fix these by including corresponding header file. Reported-by: kernel test robot Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200729113456.4072290-1-arnd@arndb.de Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-pxa168.c | 1 + drivers/clk/mmp/clk-pxa910.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c index 8e2551ab8462..b351039cac09 100644 --- a/drivers/clk/mmp/clk-pxa168.c +++ b/drivers/clk/mmp/clk-pxa168.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c index 7a7965141918..f254ceff3ea7 100644 --- a/drivers/clk/mmp/clk-pxa910.c +++ b/drivers/clk/mmp/clk-pxa910.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include -- cgit From be13d94b7d7f1a3e7290573faa16267941917106 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Jul 2020 12:12:58 +0100 Subject: drm: xlnx: fix spelling mistake "failes" -> "failed" There is a spelling mistake in a dev_dbg messages. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Laurent Pinchart Reviewed-by: Hyun Kwon Signed-off-by: Hyun Kwon Link: https://patchwork.freedesktop.org/patch/msgid/20200724111258.14762-1-colin.king@canonical.com --- drivers/gpu/drm/xlnx/zynqmp_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 821f7a71e182..0e1c818746eb 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1308,7 +1308,7 @@ zynqmp_dp_connector_detect(struct drm_connector *connector, bool force) ret = drm_dp_dpcd_read(&dp->aux, 0x0, dp->dpcd, sizeof(dp->dpcd)); if (ret < 0) { - dev_dbg(dp->dev, "DPCD read failes"); + dev_dbg(dp->dev, "DPCD read failed"); goto disconnected; } -- cgit From 2d889db7626d2b6d67f402e2478b11f9a49b44e3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 25 Jul 2020 06:34:29 +0000 Subject: drm: xlnx: Fix typo in parameter description Fix typo in parameter description. Fixes: d76271d22694 ("drm: xlnx: DRM/KMS driver for Xilinx ZynqMP DisplayPort Subsystem") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Laurent Pinchart Reviewed-by: Hyun Kwon Signed-off-by: Hyun Kwon Link: https://patchwork.freedesktop.org/patch/msgid/20200725063429.172139-1-weiyongjun1@huawei.com --- drivers/gpu/drm/xlnx/zynqmp_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 0e1c818746eb..b735072a702d 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(aux_timeout_ms, "DP aux timeout value in msec (default: 50)"); */ static uint zynqmp_dp_power_on_delay_ms = 4; module_param_named(power_on_delay_ms, zynqmp_dp_power_on_delay_ms, uint, 0444); -MODULE_PARM_DESC(aux_timeout_ms, "DP power on delay in msec (default: 4)"); +MODULE_PARM_DESC(power_on_delay_ms, "DP power on delay in msec (default: 4)"); /* Link configuration registers */ #define ZYNQMP_DP_LINK_BW_SET 0x0 -- cgit From 9443f2c88ad916cad7efcd40fabc3e8c33c98594 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 29 Jun 2020 13:47:49 +0100 Subject: pwm: bcm-iproc: Remove impossible comparison when validating duty cycle 'duty' here is an unsigned int, thus checking for <0 will always evaluate to false. Fixes the following W=1 warning: drivers/pwm/pwm-bcm-iproc.c:147:12: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] Cc: Ray Jui Cc: Scott Branden Cc: Yendapally Reddy Dhananjaya Reddy Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-pwm@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm-iproc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 1f829edd8ee7..46f0a45e9049 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -143,8 +143,7 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, value = rate * state->duty_cycle; duty = div64_u64(value, div); - if (period < IPROC_PWM_PERIOD_MIN || - duty < IPROC_PWM_DUTY_CYCLE_MIN) + if (period < IPROC_PWM_PERIOD_MIN) return -EINVAL; if (period <= IPROC_PWM_PERIOD_MAX && -- cgit From 75de72591aa8ffa3acdc14cc1d3708ce8ac09245 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 29 Jun 2020 13:47:50 +0100 Subject: pwm: bcm-kona: Remove impossible comparison when validating duty cycle 'dc' here is an unsigned long, thus checking for <0 will always evaluate to false. Fixes the following W=1 warning: drivers/pwm/pwm-bcm-kona.c:141:35: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] Cc: Florian Fainelli Cc: Ray Jui Cc: Scott Branden Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-pwm@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm-kona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c index 81da91df2529..16c5898b934a 100644 --- a/drivers/pwm/pwm-bcm-kona.c +++ b/drivers/pwm/pwm-bcm-kona.c @@ -138,7 +138,7 @@ static int kona_pwmc_config(struct pwm_chip *chip, struct pwm_device *pwm, dc = div64_u64(val, div); /* If duty_ns or period_ns are not achievable then return */ - if (pc < PERIOD_COUNT_MIN || dc < DUTY_CYCLE_HIGH_MIN) + if (pc < PERIOD_COUNT_MIN) return -EINVAL; /* If pc and dc are in bounds, the calculation is done */ -- cgit From fc810e7c7918416668d8801593c3e108db2da3d4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 29 Jun 2020 13:47:51 +0100 Subject: pwm: mediatek: Provide missing kerneldoc description for 'soc' arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kerneldoc syntax is used, but not complete. Descriptions are required for all arguments. Fixes the following W=1 build warning: drivers/pwm/pwm-mediatek.c:57: warning: Function parameter or member 'soc' not described in 'pwm_mediatek_chip' Cc: Matthias Brugger Cc: John Crispin Cc: Zhi Mao Cc: linux-pwm@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Signed-off-by: Lee Jones Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index b94e0d09c300..ab001ce55178 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -46,6 +46,7 @@ struct pwm_mediatek_of_data { * @clk_main: the clock used by PWM core * @clk_pwms: the clock used by each PWM channel * @clk_freq: the fix clock frequency of legacy MIPS SoC + * @soc: pointer to chip's platform data */ struct pwm_mediatek_chip { struct pwm_chip chip; -- cgit From dfd9b6154d5df4160f2c196d54dcf9223a7df959 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 29 Jun 2020 13:47:52 +0100 Subject: pwm: omap-dmtimer: Repair pwm_omap_dmtimer_chip's broken kerneldoc header Argument descriptions must be prepended with a '@' to be understood by the kerneldoc tooling/parsers/validators. Fixes the following W=1 warning: drivers/pwm/pwm-omap-dmtimer.c:70: warning: Function parameter or member 'dm_timer_pdev' not described in 'pwm_omap_dmtimer_chip' Cc: Tony Lindgren Cc: Joachim Eastwood Cc: NeilBrown Cc: Grant Erickson Signed-off-by: Lee Jones Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 0d31833db2e2..75cea7f2aff5 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -58,7 +58,7 @@ * @mutex: Mutex to protect pwm apply state * @dm_timer: Pointer to omap dm timer. * @pdata: Pointer to omap dm timer ops. - * dm_timer_pdev: Pointer to omap dm timer platform device + * @dm_timer_pdev: Pointer to omap dm timer platform device */ struct pwm_omap_dmtimer_chip { struct pwm_chip chip; -- cgit From 216a094de2521099e4ae04c5d07d8603ef9b24f1 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 19:59:24 +0200 Subject: pwm: Replace HTTP links with HTTPS ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-omap-dmtimer.c | 2 +- drivers/pwm/pwm-tiecap.c | 2 +- drivers/pwm/pwm-tiehrpwm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 75cea7f2aff5..358db4ff9d4f 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -14,7 +14,7 @@ * with a timer counter that goes up. When it overflows it gets * reloaded with the load value and the pwm output goes up. * When counter matches with match register, the output goes down. - * Reference Manual: http://www.ti.com/lit/ug/spruh73q/spruh73q.pdf + * Reference Manual: https://www.ti.com/lit/ug/spruh73q/spruh73q.pdf * * Limitations: * - When PWM is stopped, timer counter gets stopped immediately. This diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index ab38c8203b79..683804c7d26c 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -2,7 +2,7 @@ /* * ECAP PWM driver * - * Copyright (C) 2012 Texas Instruments, Inc. - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments, Inc. - https://www.ti.com/ */ #include diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 7b4c770ce9d6..0846917ff2d2 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -2,7 +2,7 @@ /* * EHRPWM PWM driver * - * Copyright (C) 2012 Texas Instruments, Inc. - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments, Inc. - https://www.ti.com/ */ #include -- cgit From 6ced5ff0be8e94871ba846dfbddf69d21363f3d7 Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Fri, 17 Jul 2020 21:46:06 -0700 Subject: pwm: bcm-iproc: handle clk_get_rate() return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle clk_get_rate() returning 0 to avoid possible division by zero. Fixes: daa5abc41c80 ("pwm: Add support for Broadcom iProc PWM controller") Signed-off-by: Rayagonda Kokatanur Signed-off-by: Scott Branden Reviewed-by: Ray Jui Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm-iproc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 46f0a45e9049..79b1e58e946d 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -85,8 +85,6 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, u64 tmp, multi, rate; u32 value, prescale; - rate = clk_get_rate(ip->clk); - value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); if (value & BIT(IPROC_PWM_CTRL_EN_SHIFT(pwm->hwpwm))) @@ -99,6 +97,13 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, else state->polarity = PWM_POLARITY_INVERSED; + rate = clk_get_rate(ip->clk); + if (rate == 0) { + state->period = 0; + state->duty_cycle = 0; + return; + } + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); prescale = value >> IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); prescale &= IPROC_PWM_PRESCALE_MAX; -- cgit From c0001213d195d1bac83e0744c06ff06dd5a8ba53 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 28 Jul 2020 14:27:04 -0400 Subject: drm/ttm: fix offset in VMAs with a pg_offs in ttm_bo_vm_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMAs with a pg_offs that's offset from the start of the vma_node need to adjust the offset within the BO accordingly. This matches the offset calculation in ttm_bo_vm_fault_reserved. Signed-off-by: Felix Kuehling Tested-by: Laurent Morichetti Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/381169/ --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index fa03fab02076..33526c5df0e8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -505,8 +505,10 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { - unsigned long offset = (addr) - vma->vm_start; struct ttm_buffer_object *bo = vma->vm_private_data; + unsigned long offset = (addr) - vma->vm_start + + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) + << PAGE_SHIFT); int ret; if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages) -- cgit From bdbc0c7a070c0cbe3009cd271c6ec8d87d69cc7a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Thu, 30 Jul 2020 14:25:19 +0100 Subject: KVM: arm64: Ensure that all nVHE hyp code is in .hyp.text Some compilers may put a subset of generated functions into '.text.*' ELF sections and the linker may leverage this division to optimize ELF layout. Unfortunately, the recently introduced HYPCOPY command assumes that all executable code (with the exception of specialized sections such as '.hyp.idmap.text') is in the '.text' section. If this assumption is broken, code in '.text.*' will be merged into kernel proper '.text' instead of the '.hyp.text' that is mapped in EL2. To ensure that this cannot happen, insert an OBJDUMP assertion into HYPCOPY. The command dumps a list of ELF sections in the input object file and greps for '.text.'. If found, compilation fails. Tested with both binutils' and LLVM's objdump (the output format is different). GCC offers '-fno-reorder-functions' to disable this behaviour. Select the flag if it is available. From inspection of GCC source (latest Git in July 2020), this flag does force all code into '.text'. By default, GCC uses profile data, heuristics and attributes to select a subsection. LLVM/Clang currently does not have a similar optimization pass. It can place static constructors into '.text.startup' and it's optimizer can be provided with profile data to reorder hot/cold functions. Neither of these is applicable to nVHE hyp code. If this changes in the future, the OBJDUMP assertion should alert users to the problem. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200730132519.48787-1-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/Makefile | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0b34414557d6..aef76487edc2 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,10 +20,30 @@ $(obj)/%.hyp.tmp.o: $(src)/%.S FORCE $(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE $(call if_changed,hypcopy) +# Disable reordering functions by GCC (enabled at -O2). +# This pass puts functions into '.text.*' sections to aid the linker +# in optimizing ELF layout. See HYPCOPY comment below for more info. +ccflags-y += $(call cc-option,-fno-reorder-functions) + +# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names +# and relevant ELF section names to avoid clashes with VHE code/data. +# +# Hyp code is assumed to be in the '.text' section of the input object +# files (with the exception of specialized sections such as +# '.hyp.idmap.text'). This assumption may be broken by a compiler that +# divides code into sections like '.text.unlikely' so as to optimize +# ELF layout. HYPCOPY checks that no such sections exist in the input +# using `objdump`, otherwise they would be linked together with other +# kernel code and not memory-mapped correctly at runtime. quiet_cmd_hypcopy = HYPCOPY $@ - cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ - --rename-section=.text=.hyp.text \ - $< $@ + cmd_hypcopy = \ + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ + /bin/false; \ + fi; \ + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ # Remove ftrace and Shadow Call Stack CFLAGS. # This is equivalent to the 'notrace' and '__noscs' annotations. -- cgit From 1ccf2fe35c30f79102ad129c5aa71059daaaed7f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 30 Jul 2020 11:44:41 +0200 Subject: KVM: arm: Add trace name for ARM_NISV Commit c726200dd106d ("KVM: arm/arm64: Allow reporting non-ISV data aborts to userspace") introduced a mechanism to deflect MMIO traffic the kernel can not handle to user space. For that, it introduced a new exit reason. However, it did not update the trace point array that gives human readable names to these exit reasons inside the trace log. Let's fix that up after the fact, so that trace logs are pretty even when we get user space MMIO traps on ARM. Fixes: c726200dd106d ("KVM: arm/arm64: Allow reporting non-ISV data aborts to userspace") Signed-off-by: Alexander Graf Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200730094441.18231-1-graf@amazon.com --- include/trace/events/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 2c735a3e6613..9417a34aad08 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -17,7 +17,7 @@ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ - ERSN(HYPERV) + ERSN(HYPERV), ERSN(ARM_NISV) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), -- cgit From c9a636f29b5f236441ff059cef0b2fe734c05afd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Jul 2020 11:28:18 +0100 Subject: KVM: arm64: Rename kvm_vcpu_dabt_isextabt() kvm_vcpu_dabt_isextabt() is not specific to data aborts and, unlike kvm_vcpu_dabt_issext(), has nothing to do with sign extension. Rename it to 'kvm_vcpu_abt_issea()'. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20200729102821.23392-2-will@kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 2 +- arch/arm64/kvm/hyp/switch.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index c9ba0df47f7d..e0536946abed 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -366,7 +366,7 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } -static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 5164074c1ae1..36a1f0a9ce2e 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -594,7 +594,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_dabt_iss1tw(vcpu); if (valid) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 36506112480e..8004f1f68946 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2074,7 +2074,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) is_iabt = kvm_vcpu_trap_is_iabt(vcpu); /* Synchronous External Abort? */ - if (kvm_vcpu_dabt_isextabt(vcpu)) { + if (kvm_vcpu_abt_issea(vcpu)) { /* * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. -- cgit From 84b951a803a5464b0bff2fb1366e96f07f75b066 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Jul 2020 11:28:19 +0100 Subject: KVM: arm64: Handle data and instruction external aborts the same way If the guest generates a synchronous external abort which is not handled by the host, we inject it back into the guest as a virtual SError, but only if the original fault was reported on the data side. Instruction faults are reported as "Unsupported FSC", causing the vCPU run loop to bail with -EFAULT. Although synchronous external aborts from a guest are pretty unusual, treat them the same regardless of whether they are taken as data or instruction aborts by EL2. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20200729102821.23392-3-will@kernel.org --- arch/arm64/kvm/mmu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8004f1f68946..14c6a9df5c9f 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2079,13 +2079,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - return 1; - - if (unlikely(!is_iabt)) { + if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) kvm_inject_vabt(vcpu); - return 1; - } + + return 1; } trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), -- cgit From 54dc0d2404dd7aa0dd4e4f388a65622b68c6eaff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Jul 2020 11:28:20 +0100 Subject: KVM: arm64: Don't skip cache maintenance for read-only memslots If a guest performs cache maintenance on a read-only memslot, we should inform userspace rather than skip the instruction altogether. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20200729102821.23392-4-will@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 14c6a9df5c9f..85b0ec9dd9ef 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2121,7 +2121,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) * So let's assume that the guest is just being * cautious, and skip the instruction. */ - if (kvm_vcpu_dabt_is_cm(vcpu)) { + if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); ret = 1; goto out_unlock; -- cgit From 022c8328dc8021248047b373b9f67790641b8f2d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Jul 2020 11:28:21 +0100 Subject: KVM: arm64: Move S1PTW S2 fault logic out of io_mem_abort() To allow for re-injection of stage-2 faults on stage-1 page-table walks due to either a missing or read-only memslot, move the triage logic out of io_mem_abort() and into kvm_handle_guest_abort(), where these aborts can be handled before anything else. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Marc Zyngier Cc: Quentin Perret Link: https://lore.kernel.org/r/20200729102821.23392-5-will@kernel.org --- arch/arm64/kvm/mmio.c | 6 ------ arch/arm64/kvm/mmu.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 4e0366759726..58de2ae4f6bb 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -145,12 +145,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, return -ENOSYS; } - /* Page table accesses IO mem: tell guest to fix its TTBR */ - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 85b0ec9dd9ef..dc8464669efd 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2105,12 +2105,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + /* + * The guest has put either its instructions or its page-tables + * somewhere it shouldn't have. Userspace won't be able to do + * anything about this (there's no syndrome for a start), so + * re-inject the abort back into the guest. + */ if (is_iabt) { - /* Prefetch Abort on I/O address */ ret = -ENOEXEC; goto out; } + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + goto out_unlock; + } + /* * Check for a cache maintenance operation. Since we * ended-up here, we know it is outside of any memory -- cgit From 048c397aa88b8c6ebbcce8b5c480ae53773f14b9 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 28 Jul 2020 20:17:22 +0000 Subject: NFSv4.2: xattr cache: get rid of cache discard work queue Caches should be small enough to discard them inline, so do that instead of using a work queue. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xattr.c | 37 +++++-------------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 23fdab977a2a..86777996cfec 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -75,7 +75,6 @@ struct nfs4_xattr_cache { spinlock_t listxattr_lock; struct inode *inode; struct nfs4_xattr_entry *listxattr; - struct work_struct work; }; struct nfs4_xattr_entry { @@ -101,8 +100,6 @@ static struct list_lru nfs4_xattr_large_entry_lru; static struct kmem_cache *nfs4_xattr_cache_cachep; -static struct workqueue_struct *nfs4_xattr_cache_wq; - /* * Hashing helper functions. */ @@ -365,9 +362,8 @@ nfs4_xattr_cache_unlink(struct inode *inode) } /* - * Discard a cache. Usually called by a worker, since walking all - * the entries can take up some cycles that we don't want to waste - * in the I/O path. Can also be called from the shrinker callback. + * Discard a cache. Called by get_cache() if there was an old, + * invalid cache. Can also be called from a shrinker callback. * * The cache is dead, it has already been unlinked from its inode, * and no longer appears on the cache LRU list. @@ -414,21 +410,6 @@ nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) kref_put(&cache->ref, nfs4_xattr_free_cache_cb); } -static void -nfs4_xattr_discard_cache_worker(struct work_struct *work) -{ - struct nfs4_xattr_cache *cache = container_of(work, - struct nfs4_xattr_cache, work); - - nfs4_xattr_discard_cache(cache); -} - -static void -nfs4_xattr_reap_cache(struct nfs4_xattr_cache *cache) -{ - queue_work(nfs4_xattr_cache_wq, &cache->work); -} - /* * Get a referenced copy of the cache structure. Avoid doing allocs * while holding i_lock. Which means that we do some optimistic allocation, @@ -513,10 +494,10 @@ nfs4_xattr_get_cache(struct inode *inode, int add) out: /* - * Discarding an old cache is done via a workqueue. + * Discard the now orphaned old cache. */ if (oldcache != NULL) - nfs4_xattr_reap_cache(oldcache); + nfs4_xattr_discard_cache(oldcache); return cache; } @@ -1008,7 +989,6 @@ static void nfs4_xattr_cache_init_once(void *p) atomic_long_set(&cache->nent, 0); nfs4_xattr_hash_init(cache); cache->listxattr = NULL; - INIT_WORK(&cache->work, nfs4_xattr_discard_cache_worker); INIT_LIST_HEAD(&cache->lru); INIT_LIST_HEAD(&cache->dispose); } @@ -1039,13 +1019,9 @@ int __init nfs4_xattr_cache_init(void) if (ret) goto out2; - nfs4_xattr_cache_wq = alloc_workqueue("nfs4_xattr", WQ_MEM_RECLAIM, 0); - if (nfs4_xattr_cache_wq == NULL) - goto out1; - ret = register_shrinker(&nfs4_xattr_cache_shrinker); if (ret) - goto out0; + goto out1; ret = register_shrinker(&nfs4_xattr_entry_shrinker); if (ret) @@ -1058,8 +1034,6 @@ int __init nfs4_xattr_cache_init(void) unregister_shrinker(&nfs4_xattr_entry_shrinker); out: unregister_shrinker(&nfs4_xattr_cache_shrinker); -out0: - destroy_workqueue(nfs4_xattr_cache_wq); out1: list_lru_destroy(&nfs4_xattr_cache_lru); out2: @@ -1079,5 +1053,4 @@ void nfs4_xattr_cache_exit(void) list_lru_destroy(&nfs4_xattr_entry_lru); list_lru_destroy(&nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); - destroy_workqueue(nfs4_xattr_cache_wq); } -- cgit From 2ed0b7578170c3bab10cde09d4440897b305e40c Mon Sep 17 00:00:00 2001 From: Li Heng Date: Wed, 29 Jul 2020 16:59:00 +0800 Subject: 9p: Remove unneeded cast from memory allocation Remove kmem_cache_alloc return value cast. Coccinelle emits the following warning: ./fs/9p/vfs_inode.c:226:12-29: WARNING: casting value returned by memory allocation function to (struct v9fs_inode *) is useless. Link: http://lkml.kernel.org/r/1596013140-49744-1-git-send-email-liheng40@huawei.com Signed-off-by: Li Heng [Dominique: commit message wording] Signed-off-by: Dominique Martinet --- fs/9p/vfs_inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0fd5bf29880e..ae0c38ad1fcb 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -223,8 +223,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat) struct inode *v9fs_alloc_inode(struct super_block *sb) { struct v9fs_inode *v9inode; - v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache, - GFP_KERNEL); + v9inode = kmem_cache_alloc(v9fs_inode_cache, GFP_KERNEL); if (!v9inode) return NULL; #ifdef CONFIG_9P_FSCACHE -- cgit From a9e10b169e65f0f7061233ebe843a4b4f488dbae Mon Sep 17 00:00:00 2001 From: Steve Cohen Date: Wed, 29 Jul 2020 01:35:52 -0400 Subject: drm: re-add deleted doc for drm_gem_open_ioctl Add back the removed documentation for drm_gem_open_ioctl. This patch is submitted in response to [1]. [1] https://lore.kernel.org/linux-arm-msm/20200728085244.GY6419@phenom.ffwll.local/ Signed-off-by: Steve Cohen Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1596000952-27621-1-git-send-email-cohens@codeaurora.org --- drivers/gpu/drm/drm_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index ee2058ad482c..fe9412219b1e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -871,6 +871,9 @@ err: * @file_priv: drm file-private structure * * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. */ int drm_gem_open_ioctl(struct drm_device *dev, void *data, -- cgit From c7c9e914f9a0478fba4dc6f227cfd69cf84a4063 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 26 Jul 2020 18:16:06 +0200 Subject: i2c: rcar: avoid race when unregistering slave MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the lockless design of the driver, it is theoretically possible to access a NULL pointer, if a slave interrupt was running while we were unregistering the slave. To make this rock solid, disable the interrupt for a short time while we are clearing the interrupt_enable register. This patch is purely based on code inspection. The OOPS is super-hard to trigger because clearing SAR (the address) makes interrupts even more unlikely to happen as well. While here, reinit SCR to SDBS because this bit should always be set according to documentation. There is no effect, though, because the interface is disabled. Fixes: 7b814d852af6 ("i2c: rcar: avoid race when unregistering slave client") Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 76c615be5aca..9e883474db8c 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -866,12 +866,14 @@ static int rcar_unreg_slave(struct i2c_client *slave) WARN_ON(!priv->slave); - /* disable irqs and ensure none is running before clearing ptr */ + /* ensure no irq is running before clearing ptr */ + disable_irq(priv->irq); rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSCR, 0); + rcar_i2c_write(priv, ICSSR, 0); + enable_irq(priv->irq); + rcar_i2c_write(priv, ICSCR, SDBS); rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ - synchronize_irq(priv->irq); priv->slave = NULL; pm_runtime_put(rcar_i2c_priv_to_dev(priv)); -- cgit From 9f4db31ea09e3c1e25b60b1d93e549fd38360197 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 31 Jul 2020 13:30:10 -0700 Subject: dt-bindings: clock: Fix YAML schemas for LPASS clocks on SC7180 The YAML schemas that landed forgot one clock: "bi_tcxo". Presumably the bindings were developed against the v4 version of the driver and when the ".name" was removed in v5 of the driver things broke. While touching this, add the needed includes in each example. I believe both examples are supposed to be independent of each other. Let's fix the bindings. Fixes: 381cc6f97cda ("dt-bindings: clock: Add YAML schemas for LPASS clocks on SC7180") Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200731133006.1.Iee81b115f5be50d6d69500fe1bda11bba6e16143@changeid Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml index a838250b33e7..c54172fbf29f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml @@ -25,10 +25,12 @@ properties: clocks: items: - description: gcc_lpass_sway clock from GCC + - description: Board XO source clock-names: items: - const: iface + - const: bi_tcxo power-domains: maxItems: 1 @@ -77,24 +79,28 @@ additionalProperties: false examples: - | + #include #include #include clock-controller@63000000 { compatible = "qcom,sc7180-lpasshm"; reg = <0x63000000 0x28>; - clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>; - clock-names = "iface"; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "bi_tcxo"; #clock-cells = <1>; #power-domain-cells = <1>; }; - | + #include + #include + #include clock-controller@62d00000 { compatible = "qcom,sc7180-lpasscorecc"; reg = <0x62d00000 0x50000>, <0x62780000 0x30000>; reg-names = "lpass_core_cc", "lpass_audio_cc"; - clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>; - clock-names = "iface"; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "bi_tcxo"; power-domains = <&lpass_hm LPASS_CORE_HM_GDSCR>; #clock-cells = <1>; #power-domain-cells = <1>; -- cgit From 70c8b4b8ab3714fb2d075be22aec05ae9c9a6a09 Mon Sep 17 00:00:00 2001 From: Hyun Kwon Date: Wed, 29 Jul 2020 16:30:45 -0700 Subject: drm: xlnx: zynqmp: Use switch - case for link rate downshift Use switch - case to downshift from the current link rate. It's a small loop now, so fine to be replaced with switch - case. With a loop, it is confusing and hard to follow as reported below. The patch d76271d22694: "drm: xlnx: DRM/KMS driver for Xilinx ZynqMP DisplayPort Subsystem" from Jul 7, 2018, leads to the following static checker warning: drivers/gpu/drm/xlnx/zynqmp_dp.c:594 zynqmp_dp_mode_configure() error: iterator underflow 'bws' (-1)-2 Reported-by: Dan Carpenter Signed-off-by: Hyun Kwon Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/1596065445-4630-1-git-send-email-hyun.kwon@xilinx.com --- drivers/gpu/drm/xlnx/zynqmp_dp.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index b735072a702d..99158ee67d02 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -567,34 +567,37 @@ static int zynqmp_dp_mode_configure(struct zynqmp_dp *dp, int pclock, u8 current_bw) { int max_rate = dp->link_config.max_rate; - u8 bws[3] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7, DP_LINK_BW_5_4 }; + u8 bw_code; u8 max_lanes = dp->link_config.max_lanes; u8 max_link_rate_code = drm_dp_link_rate_to_bw_code(max_rate); u8 bpp = dp->config.bpp; u8 lane_cnt; - s8 i; - if (current_bw == DP_LINK_BW_1_62) { + /* Downshift from current bandwidth */ + switch (current_bw) { + case DP_LINK_BW_5_4: + bw_code = DP_LINK_BW_2_7; + break; + case DP_LINK_BW_2_7: + bw_code = DP_LINK_BW_1_62; + break; + case DP_LINK_BW_1_62: dev_err(dp->dev, "can't downshift. already lowest link rate\n"); return -EINVAL; - } - - for (i = ARRAY_SIZE(bws) - 1; i >= 0; i--) { - if (current_bw && bws[i] >= current_bw) - continue; - - if (bws[i] <= max_link_rate_code) - break; + default: + /* If not given, start with max supported */ + bw_code = max_link_rate_code; + break; } for (lane_cnt = 1; lane_cnt <= max_lanes; lane_cnt <<= 1) { int bw; u32 rate; - bw = drm_dp_bw_code_to_link_rate(bws[i]); + bw = drm_dp_bw_code_to_link_rate(bw_code); rate = zynqmp_dp_max_rate(bw, lane_cnt, bpp); if (pclock <= rate) { - dp->mode.bw_code = bws[i]; + dp->mode.bw_code = bw_code; dp->mode.lane_cnt = lane_cnt; dp->mode.pclock = pclock; return dp->mode.bw_code; -- cgit From f369bc3f9096f5d355e8b80540bc30ac9a602912 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 1 Aug 2020 08:17:13 +0200 Subject: vgaarb: mark vga_tryget static This symbols isn't used anywhere outside of vgaarb.c. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200801061713.307434-1-hch@lst.de --- drivers/gpu/vga/vgaarb.c | 3 +-- include/linux/vgaarb.h | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index f2f3ef8af271..5180c5687ee5 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -529,7 +529,7 @@ EXPORT_SYMBOL(vga_get); * * 0 on success, negative error code on failure. */ -int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) +static int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; unsigned long flags; @@ -554,7 +554,6 @@ bail: spin_unlock_irqrestore(&vga_lock, flags); return rc; } -EXPORT_SYMBOL(vga_tryget); /** * vga_put - release lock on legacy VGA resources diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 553b34c8b5f7..977caf96c8d2 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -109,12 +109,6 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, return vga_get(pdev, rsrc, 0); } -#if defined(CONFIG_VGA_ARB) -extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); -#else -static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } -#endif - #if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #else -- cgit From 7ef5264de773279b9f23b6cc8afb5addb30e970b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:20 +0200 Subject: modules: mark ref_module static ref_module isn't used anywhere outside of module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 1 - kernel/module.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 2e6670860d27..f1fdbeef2153 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -657,7 +657,6 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while (0) #endif /* CONFIG_MODULE_UNLOAD */ -int ref_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ diff --git a/kernel/module.c b/kernel/module.c index e8a198588f26..baae0e83d630 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -869,7 +869,7 @@ static int add_module_usage(struct module *a, struct module *b) } /* Module a uses b: caller needs module_mutex() */ -int ref_module(struct module *a, struct module *b) +static int ref_module(struct module *a, struct module *b) { int err; @@ -888,7 +888,6 @@ int ref_module(struct module *a, struct module *b) } return 0; } -EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) @@ -1169,11 +1168,10 @@ static inline void module_unload_free(struct module *mod) { } -int ref_module(struct module *a, struct module *b) +static int ref_module(struct module *a, struct module *b) { return strong_try_module_get(b); } -EXPORT_SYMBOL_GPL(ref_module); static inline int module_unload_init(struct module *mod) { -- cgit From 773110470e2fa3839523384ae014f8a723c4d178 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:21 +0200 Subject: modules: mark find_symbol static find_symbol is only used in module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 11 ----------- kernel/module.c | 3 +-- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index f1fdbeef2153..90bdc362be36 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -590,17 +590,6 @@ struct symsearch { bool unused; }; -/* - * Search for an exported symbol by name. - * - * Must be called with module_mutex held or preemption disabled. - */ -const struct kernel_symbol *find_symbol(const char *name, - struct module **owner, - const s32 **crc, - bool gplok, - bool warn); - /* * Walk the exported symbol table * diff --git a/kernel/module.c b/kernel/module.c index baae0e83d630..0f95fb4b3e37 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -585,7 +585,7 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, /* Find an exported symbol and return it, along with, (optional) crc and * (optional) module which owns it. Needs preempt disabled or module_mutex. */ -const struct kernel_symbol *find_symbol(const char *name, +static const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const s32 **crc, bool gplok, @@ -608,7 +608,6 @@ const struct kernel_symbol *find_symbol(const char *name, pr_debug("Failed to find symbol %s\n", name); return NULL; } -EXPORT_SYMBOL_GPL(find_symbol); /* * Search for module by name: must hold module_mutex (or preempt disabled -- cgit From a54e04914c211b5678602a46b3ede5d82ec1327d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:22 +0200 Subject: modules: mark each_symbol_section static each_symbol_section is only used inside of module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 9 --------- kernel/module.c | 3 +-- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 90bdc362be36..b79219eed83c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -590,15 +590,6 @@ struct symsearch { bool unused; }; -/* - * Walk the exported symbol table - * - * Must be called with module_mutex held or preemption disabled. - */ -bool each_symbol_section(bool (*fn)(const struct symsearch *arr, - struct module *owner, - void *data), void *data); - /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, diff --git a/kernel/module.c b/kernel/module.c index 0f95fb4b3e37..c2a099a27b68 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -422,7 +422,7 @@ static bool each_symbol_in_section(const struct symsearch *arr, } /* Returns true as soon as fn returns true, otherwise false. */ -bool each_symbol_section(bool (*fn)(const struct symsearch *arr, +static bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data) @@ -484,7 +484,6 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, } return false; } -EXPORT_SYMBOL_GPL(each_symbol_section); struct find_symbol_arg { /* Input */ -- cgit From 3fe1e56d0e68b623dd62d8d38265d2a052e7e185 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:23 +0200 Subject: modules: unexport __module_text_address __module_text_address is only used by built-in code. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- kernel/module.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index c2a099a27b68..6ee1739e3150 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4503,7 +4503,6 @@ struct module *__module_text_address(unsigned long addr) } return mod; } -EXPORT_SYMBOL_GPL(__module_text_address); /* Don't grab lock, we're oopsing. */ void print_modules(void) -- cgit From 34e64705ad415ed7a816e60ef62b42fe6d1729d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:24 +0200 Subject: modules: unexport __module_address __module_address is only used by built-in code. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- kernel/module.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index 6ee1739e3150..e85d06158fbc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4464,7 +4464,6 @@ struct module *__module_address(unsigned long addr) } return mod; } -EXPORT_SYMBOL_GPL(__module_address); /* * is_module_text_address - is this address inside module code? -- cgit From cd8732cdcc37d7077c4fa2c966b748c0662b607e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:25 +0200 Subject: modules: rename the licence field in struct symsearch to license Use the same spelling variant as the rest of the file. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- kernel/module.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index b79219eed83c..be04ba2f881d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,7 +586,7 @@ struct symsearch { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, - } licence; + } license; bool unused; }; diff --git a/kernel/module.c b/kernel/module.c index e85d06158fbc..62d817a0dca8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -504,9 +504,9 @@ static bool check_exported_symbol(const struct symsearch *syms, struct find_symbol_arg *fsa = data; if (!fsa->gplok) { - if (syms->licence == GPL_ONLY) + if (syms->license == GPL_ONLY) return false; - if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) { + if (syms->license == WILL_BE_GPL_ONLY && fsa->warn) { pr_warn("Symbol %s is being used by a non-GPL module, " "which will not be allowed in the future\n", fsa->name); -- cgit From ef1dac6021cc8ec5de02ce31722bf26ac4ed5523 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:26 +0200 Subject: modules: return licensing information from find_symbol Report the GPLONLY status through a new argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- kernel/module.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index be04ba2f881d..30b0f5fcdb3c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -582,7 +582,7 @@ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; const s32 *crcs; - enum { + enum mod_license { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, diff --git a/kernel/module.c b/kernel/module.c index 62d817a0dca8..656f5ff27088 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -495,6 +495,7 @@ struct find_symbol_arg { struct module *owner; const s32 *crc; const struct kernel_symbol *sym; + enum mod_license license; }; static bool check_exported_symbol(const struct symsearch *syms, @@ -528,6 +529,7 @@ static bool check_exported_symbol(const struct symsearch *syms, fsa->owner = owner; fsa->crc = symversion(syms->crcs, symnum); fsa->sym = &syms->start[symnum]; + fsa->license = syms->license; return true; } @@ -587,6 +589,7 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, static const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const s32 **crc, + enum mod_license *license, bool gplok, bool warn) { @@ -601,6 +604,8 @@ static const struct kernel_symbol *find_symbol(const char *name, *owner = fsa.owner; if (crc) *crc = fsa.crc; + if (license) + *license = fsa.license; return fsa.sym; } @@ -1074,7 +1079,7 @@ void __symbol_put(const char *symbol) struct module *owner; preempt_disable(); - if (!find_symbol(symbol, &owner, NULL, true, false)) + if (!find_symbol(symbol, &owner, NULL, NULL, true, false)) BUG(); module_put(owner); preempt_enable(); @@ -1352,7 +1357,7 @@ static inline int check_modstruct_version(const struct load_info *info, * locking is necessary -- use preempt_disable() to placate lockdep. */ preempt_disable(); - if (!find_symbol("module_layout", NULL, &crc, true, false)) { + if (!find_symbol("module_layout", NULL, &crc, NULL, true, false)) { preempt_enable(); BUG(); } @@ -1436,6 +1441,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, struct module *owner; const struct kernel_symbol *sym; const s32 *crc; + enum mod_license license; int err; /* @@ -1445,7 +1451,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, */ sched_annotate_sleep(); mutex_lock(&module_mutex); - sym = find_symbol(name, &owner, &crc, + sym = find_symbol(name, &owner, &crc, &license, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); if (!sym) goto unlock; @@ -2213,7 +2219,7 @@ void *__symbol_get(const char *symbol) const struct kernel_symbol *sym; preempt_disable(); - sym = find_symbol(symbol, &owner, NULL, true, true); + sym = find_symbol(symbol, &owner, NULL, NULL, true, true); if (sym && strong_try_module_get(owner)) sym = NULL; preempt_enable(); @@ -2249,7 +2255,7 @@ static int verify_exported_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { if (find_symbol(kernel_symbol_name(s), &owner, NULL, - true, false)) { + NULL, true, false)) { pr_err("%s: exports duplicate symbol %s" " (owned by %s)\n", mod->name, kernel_symbol_name(s), -- cgit From 67dd23f9e6fbaf163431912ef5599c5e0693476c Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Sat, 1 Aug 2020 07:10:38 -0400 Subject: nfs: ensure correct writeback errors are returned on close() nfs_wb_all() calls filemap_write_and_wait(), which uses filemap_check_errors() to determine the error to return. filemap_check_errors() only looks at the mapping->flags and will therefore only return either -ENOSPC or -EIO. To ensure that the correct error is returned on close(), nfs{,4}_file_flush() should call filemap_check_wb_err() which looks at the errseq value in mapping->wb_err without consuming it. Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one") Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- fs/nfs/nfs4file.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ccd6c1637b27..bb244fc39d4b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -141,6 +141,7 @@ static int nfs_file_flush(struct file *file, fl_owner_t id) { struct inode *inode = file_inode(file); + errseq_t since; dprintk("NFS: flush(%pD2)\n", file); @@ -149,7 +150,9 @@ nfs_file_flush(struct file *file, fl_owner_t id) return 0; /* Flush writes to the server and return any errors */ - return nfs_wb_all(inode); + since = filemap_sample_wb_err(file->f_mapping); + nfs_wb_all(inode); + return filemap_check_wb_err(file->f_mapping, since); } ssize_t diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8e5d6223ddd3..a33970765467 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -110,6 +110,7 @@ static int nfs4_file_flush(struct file *file, fl_owner_t id) { struct inode *inode = file_inode(file); + errseq_t since; dprintk("NFS: flush(%pD2)\n", file); @@ -125,7 +126,9 @@ nfs4_file_flush(struct file *file, fl_owner_t id) return filemap_fdatawrite(file->f_mapping); /* Flush writes to the server and return any errors */ - return nfs_wb_all(inode); + since = filemap_sample_wb_err(file->f_mapping); + nfs_wb_all(inode); + return filemap_check_wb_err(file->f_mapping, since); } #ifdef CONFIG_NFS_V4_2 -- cgit From 5133ba8f15911e98567cdb6b767be8080a636b0b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 10 Jun 2020 14:41:30 +0200 Subject: libceph: use target_copy() in send_linger() Instead of copying just oloc, oid and flags, copy the entire linger target. This is more for consistency than anything else, as send_linger() -> submit_request() -> __submit_request() sends the request regardless of what calc_target() says (i.e. both on CALC_TARGET_NO_ACTION and CALC_TARGET_NEED_RESEND). Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/osd_client.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2db8b44e70c2..db6abb5a5511 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3076,9 +3076,7 @@ static void send_linger(struct ceph_osd_linger_request *lreq) cancel_linger_request(req); request_reinit(req); - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - req->r_flags = lreq->t.flags; + target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; mutex_lock(&lreq->lock); -- cgit From 6e6f0f0116079d6be42080064fe7079283a507ed Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 16 Jun 2020 09:58:49 +0200 Subject: libceph: dump class and method names on method calls Signed-off-by: Ilya Dryomov --- net/ceph/debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 409d505ff320..2110439f8a24 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -223,6 +223,9 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req) if (op->op == CEPH_OSD_OP_WATCH) seq_printf(s, "-%s", ceph_osd_watch_op_name(op->watch.op)); + else if (op->op == CEPH_OSD_OP_CALL) + seq_printf(s, "-%s/%s", op->cls.class_name, + op->cls.method_name); } seq_putc(s, '\n'); -- cgit From 3e699bd865527004773012da38febdf444fd5fa8 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Jun 2020 03:52:15 -0400 Subject: ceph: add check_session_state() helper and make it global And remove the unsed mdsc parameter to simplify the code. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++-------------------- fs/ceph/mds_client.h | 3 +++ 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a50497142e59..58c54d41aa40 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1785,8 +1785,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc, /* * send a session close request */ -static int request_close_session(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +static int request_close_session(struct ceph_mds_session *session) { struct ceph_msg *msg; @@ -1809,7 +1808,7 @@ static int __close_session(struct ceph_mds_client *mdsc, if (session->s_state >= CEPH_MDS_SESSION_CLOSING) return 0; session->s_state = CEPH_MDS_SESSION_CLOSING; - return request_close_session(mdsc, session); + return request_close_session(session); } static bool drop_negative_children(struct dentry *dentry) @@ -4263,6 +4262,29 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) ceph_force_reconnect(fsc->sb); } +bool check_session_state(struct ceph_mds_session *s) +{ + if (s->s_state == CEPH_MDS_SESSION_CLOSING) { + dout("resending session close request for mds%d\n", + s->s_mds); + request_close_session(s); + return false; + } + if (s->s_ttl && time_after(jiffies, s->s_ttl)) { + if (s->s_state == CEPH_MDS_SESSION_OPEN) { + s->s_state = CEPH_MDS_SESSION_HUNG; + pr_info("mds%d hung\n", s->s_mds); + } + } + if (s->s_state == CEPH_MDS_SESSION_NEW || + s->s_state == CEPH_MDS_SESSION_RESTARTING || + s->s_state == CEPH_MDS_SESSION_REJECTED) + /* this mds is failed or recovering, just wait */ + return false; + + return true; +} + /* * delayed work -- periodically trim expired leases, renew caps with mds */ @@ -4294,23 +4316,8 @@ static void delayed_work(struct work_struct *work) struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); if (!s) continue; - if (s->s_state == CEPH_MDS_SESSION_CLOSING) { - dout("resending session close request for mds%d\n", - s->s_mds); - request_close_session(mdsc, s); - ceph_put_mds_session(s); - continue; - } - if (s->s_ttl && time_after(jiffies, s->s_ttl)) { - if (s->s_state == CEPH_MDS_SESSION_OPEN) { - s->s_state = CEPH_MDS_SESSION_HUNG; - pr_info("mds%d hung\n", s->s_mds); - } - } - if (s->s_state == CEPH_MDS_SESSION_NEW || - s->s_state == CEPH_MDS_SESSION_RESTARTING || - s->s_state == CEPH_MDS_SESSION_REJECTED) { - /* this mds is failed or recovering, just wait */ + + if (!check_session_state(s)) { ceph_put_mds_session(s); continue; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5e0c4073a6be..6147ff0a1cdf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -18,6 +18,7 @@ #include #include "metric.h" +#include "super.h" /* The first 8 bits are reserved for old ceph releases */ enum ceph_feature_type { @@ -476,6 +477,8 @@ struct ceph_mds_client { extern const char *ceph_mds_op_name(int op); +extern bool check_session_state(struct ceph_mds_session *s); + extern struct ceph_mds_session * __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); -- cgit From 4f1d756def68588b88068af1d5a4a3b6dc7e6e2a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Jun 2020 03:52:16 -0400 Subject: ceph: add global total_caps to count the mdsc's total caps number This will help to reduce using the global mdsc->mutex lock in many places. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 ++ fs/ceph/debugfs.c | 14 ++------------ fs/ceph/mds_client.c | 1 + fs/ceph/metric.c | 1 + fs/ceph/metric.h | 1 + 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 972c13aa4225..5f4894063a73 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -668,6 +668,7 @@ void ceph_add_cap(struct inode *inode, spin_lock(&session->s_cap_lock); list_add_tail(&cap->session_caps, &session->s_caps); session->s_nr_caps++; + atomic64_inc(&mdsc->metric.total_caps); spin_unlock(&session->s_cap_lock); } else { spin_lock(&session->s_cap_lock); @@ -1161,6 +1162,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } else { list_del_init(&cap->session_caps); session->s_nr_caps--; + atomic64_dec(&mdsc->metric.total_caps); cap->session = NULL; removed = 1; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 070ed8481340..3030f5585085 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -145,7 +145,7 @@ static int metric_show(struct seq_file *s, void *p) struct ceph_fs_client *fsc = s->private; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_client_metric *m = &mdsc->metric; - int i, nr_caps = 0; + int nr_caps = 0; s64 total, sum, avg, min, max, sq; seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); @@ -190,17 +190,7 @@ static int metric_show(struct seq_file *s, void *p) percpu_counter_sum(&m->d_lease_mis), percpu_counter_sum(&m->d_lease_hit)); - mutex_lock(&mdsc->mutex); - for (i = 0; i < mdsc->max_sessions; i++) { - struct ceph_mds_session *s; - - s = __ceph_lookup_mds_session(mdsc, i); - if (!s) - continue; - nr_caps += s->s_nr_caps; - ceph_put_mds_session(s); - } - mutex_unlock(&mdsc->mutex); + nr_caps = atomic64_read(&m->total_caps); seq_printf(s, "%-14s%-16d%-16lld%lld\n", "caps", nr_caps, percpu_counter_sum(&m->i_caps_mis), percpu_counter_sum(&m->i_caps_hit)); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 58c54d41aa40..f3c71230df93 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1485,6 +1485,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, cap->session = NULL; list_del_init(&cap->session_caps); session->s_nr_caps--; + atomic64_dec(&session->s_mdsc->metric.total_caps); if (cap->queue_release) __ceph_queue_cap_release(session, cap); else diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 9217f35bc2b9..269eacbd2a15 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -22,6 +22,7 @@ int ceph_metric_init(struct ceph_client_metric *m) if (ret) goto err_d_lease_mis; + atomic64_set(&m->total_caps, 0); ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); if (ret) goto err_i_caps_hit; diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index ccd81285a450..23a3373d5a3d 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -12,6 +12,7 @@ struct ceph_client_metric { struct percpu_counter d_lease_hit; struct percpu_counter d_lease_mis; + atomic64_t total_caps; struct percpu_counter i_caps_hit; struct percpu_counter i_caps_mis; -- cgit From b682c6d41bc23353b5d80e02ca4961ac67624f4c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Jun 2020 03:52:18 -0400 Subject: ceph: switch to WARN_ON_ONCE in encode_supported_features() ...and let the errnos bubble up to the callers. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f3c71230df93..d5e523cc40e6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1168,7 +1168,7 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; #define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) -static void encode_supported_features(void **p, void *end) +static int encode_supported_features(void **p, void *end) { static const size_t count = ARRAY_SIZE(feature_bits); @@ -1176,16 +1176,22 @@ static void encode_supported_features(void **p, void *end) size_t i; size_t size = FEATURE_BYTES(count); - BUG_ON(*p + 4 + size > end); + if (WARN_ON_ONCE(*p + 4 + size > end)) + return -ERANGE; + ceph_encode_32(p, size); memset(*p, 0, size); for (i = 0; i < count; i++) ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8); *p += size; } else { - BUG_ON(*p + 4 > end); + if (WARN_ON_ONCE(*p + 4 > end)) + return -ERANGE; + ceph_encode_32(p, 0); } + + return 0; } /* @@ -1203,6 +1209,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; size_t size, count; void *p, *end; + int ret; const char* metadata[][2] = { {"hostname", mdsc->nodename}, @@ -1232,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 GFP_NOFS, false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); - return NULL; + return ERR_PTR(-ENOMEM); } p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -1269,7 +1276,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 p += val_len; } - encode_supported_features(&p, end); + ret = encode_supported_features(&p, end); + if (ret) { + pr_err("encode_supported_features failed!\n"); + ceph_msg_put(msg); + return ERR_PTR(ret); + } + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -1297,8 +1310,8 @@ static int __open_session(struct ceph_mds_client *mdsc, /* send connect message */ msg = create_session_open_msg(mdsc, session->s_seq); - if (!msg) - return -ENOMEM; + if (IS_ERR(msg)) + return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); return 0; } @@ -1312,6 +1325,7 @@ static struct ceph_mds_session * __open_export_target_session(struct ceph_mds_client *mdsc, int target) { struct ceph_mds_session *session; + int ret; session = __ceph_lookup_mds_session(mdsc, target); if (!session) { @@ -1320,8 +1334,11 @@ __open_export_target_session(struct ceph_mds_client *mdsc, int target) return session; } if (session->s_state == CEPH_MDS_SESSION_NEW || - session->s_state == CEPH_MDS_SESSION_CLOSING) - __open_session(mdsc, session); + session->s_state == CEPH_MDS_SESSION_CLOSING) { + ret = __open_session(mdsc, session); + if (ret) + return ERR_PTR(ret); + } return session; } @@ -2520,7 +2537,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ceph_encode_copy(&p, &ts, sizeof(ts)); } - BUG_ON(p > end); + if (WARN_ON_ONCE(p > end)) { + ceph_msg_put(msg); + msg = ERR_PTR(-ERANGE); + goto out_free2; + } + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -2756,7 +2778,9 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (session->s_state == CEPH_MDS_SESSION_NEW || session->s_state == CEPH_MDS_SESSION_CLOSING) { - __open_session(mdsc, session); + err = __open_session(mdsc, session); + if (err) + goto out_session; /* retry the same mds later */ if (random) req->r_resend_mds = mds; -- cgit From fa9967734227b44acb1b6918033f9122dc7825b9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 1 Jul 2020 01:52:48 -0400 Subject: ceph: fix potential mdsc use-after-free crash Make sure the delayed work stopped before releasing the resources. cancel_delayed_work_sync() will only guarantee that the work finishes executing if the work is already in the ->worklist. That means after the cancel_delayed_work_sync() returns, it will leave the work requeued if it was rearmed at the end. That can lead to a use after free once the work struct is freed. Fix it by flushing the delayed work instead of trying to cancel it, and ensure that the work doesn't rearm if the mdsc is stopping. URL: https://tracker.ceph.com/issues/46293 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d5e523cc40e6..9a09d12569bd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4330,6 +4330,9 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); + if (mdsc->stopping) + return; + mutex_lock(&mdsc->mutex); renew_interval = mdsc->mdsmap->m_session_timeout >> 2; renew_caps = time_after_eq(jiffies, HZ*renew_interval + @@ -4689,7 +4692,16 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); - cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ + /* + * Make sure the delayed work stopped before releasing + * the resources. + * + * Because the cancel_delayed_work_sync() will only + * guarantee that the work finishes executing. But the + * delayed work will re-arm itself again after that. + */ + flush_delayed_work(&mdsc->delayed_work); + if (mdsc->mdsmap) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); -- cgit From 585d72f33e7083972030fac7792ea3050a4a8dff Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 30 Jun 2020 15:36:21 -0400 Subject: ceph: clean up and optimize ceph_check_delayed_caps() Make this loop look a bit more sane. Also optimize away the spinlock release/reacquire if we can't get an inode reference. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5f4894063a73..55ccccf77cea 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4189,10 +4189,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) struct ceph_inode_info *ci; dout("check_delayed_caps\n"); - while (1) { - spin_lock(&mdsc->cap_delay_lock); - if (list_empty(&mdsc->cap_delay_list)) - break; + spin_lock(&mdsc->cap_delay_lock); + while (!list_empty(&mdsc->cap_delay_list)) { ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); @@ -4202,13 +4200,13 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) list_del_init(&ci->i_cap_delay_list); inode = igrab(&ci->vfs_inode); - spin_unlock(&mdsc->cap_delay_lock); - if (inode) { + spin_unlock(&mdsc->cap_delay_lock); dout("check_delayed_caps on %p\n", inode); ceph_check_caps(ci, 0, NULL); /* avoid calling iput_final() in tick thread */ ceph_async_iput(inode); + spin_lock(&mdsc->cap_delay_lock); } } spin_unlock(&mdsc->cap_delay_lock); -- cgit From d1d9655052606fd9078e896668ec90191372d513 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 6 Jul 2020 08:51:35 -0400 Subject: ceph: do not access the kiocb after aio requests In aio case, if the completion comes very fast just before the ceph_read_iter() returns to fs/aio.c, the kiocb will be freed in the completion callback, then if ceph_read_iter() access again we will potentially hit the use-after-free bug. [ jlayton: initialize direct_lock early, and use it everywhere ] URL: https://tracker.ceph.com/issues/45649 Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 160644ddaeed..d51c3f2fdca0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1538,6 +1538,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct page *pinned_page = NULL; + bool direct_lock = iocb->ki_flags & IOCB_DIRECT; ssize_t ret; int want, got = 0; int retry_op = 0, read = 0; @@ -1546,7 +1547,7 @@ again: dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_start_io_direct(inode); else ceph_start_io_read(inode); @@ -1603,7 +1604,7 @@ again: } ceph_put_cap_refs(ci, got); - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_end_io_direct(inode); else ceph_end_io_read(inode); -- cgit From 042f649810f61c4a834f3d6d866c567f7f6b3f8c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 1 Jul 2020 11:54:43 -0400 Subject: libceph: just have osd_req_op_init() return a pointer The caller can just ignore the return. No need for this wrapper that just casts the other function to void. [ idryomov: argument alignment ] Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 39 ++++++++++++++++----------------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c60b59e9291b..83fa08a06507 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -404,7 +404,7 @@ void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); &__oreq->r_ops[__whch].typ.fld; \ }) -extern void osd_req_op_init(struct ceph_osd_request *osd_req, +struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index db6abb5a5511..e4fbcad6e7d8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -525,7 +525,7 @@ EXPORT_SYMBOL(ceph_osdc_put_request); static void request_init(struct ceph_osd_request *req) { - /* req only, each op is zeroed in _osd_req_op_init() */ + /* req only, each op is zeroed in osd_req_op_init() */ memset(req, 0, sizeof(*req)); kref_init(&req->r_kref); @@ -746,8 +746,8 @@ EXPORT_SYMBOL(ceph_osdc_alloc_messages); * other information associated with them. It also serves as a * common init routine for all the other init functions, below. */ -static struct ceph_osd_req_op * -_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, +struct ceph_osd_req_op * +osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags) { struct ceph_osd_req_op *op; @@ -762,12 +762,6 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, return op; } - -void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode, u32 flags) -{ - (void)_osd_req_op_init(osd_req, which, opcode, flags); -} EXPORT_SYMBOL(osd_req_op_init); void osd_req_op_extent_init(struct ceph_osd_request *osd_req, @@ -775,8 +769,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - opcode, 0); + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, + opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && @@ -822,7 +816,7 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, BUG_ON(which + 1 >= osd_req->r_num_ops); prev_op = &osd_req->r_ops[which]; - op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); + op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); /* dup previous one */ op->indata_len = prev_op->indata_len; op->outdata_len = prev_op->outdata_len; @@ -845,7 +839,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, size_t size; int ret; - op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) @@ -883,8 +877,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - opcode, 0); + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; int ret; @@ -928,7 +922,7 @@ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, { struct ceph_osd_req_op *op; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; op->watch.gen = 0; @@ -943,10 +937,9 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_write_size, u32 flags) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - CEPH_OSD_OP_SETALLOCHINT, - 0); + struct ceph_osd_req_op *op; + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; op->alloc_hint.flags = flags; @@ -4799,7 +4792,7 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, struct ceph_pagelist *pl; int ret; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); pl = ceph_pagelist_alloc(GFP_NOIO); if (!pl) @@ -4868,7 +4861,7 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, struct ceph_pagelist *pl; int ret; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); op->notify.cookie = cookie; pl = ceph_pagelist_alloc(GFP_NOIO); @@ -5332,8 +5325,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, if (IS_ERR(pages)) return PTR_ERR(pages); - op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, - dst_fadvise_flags); + op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, + dst_fadvise_flags); op->copy_from.snapid = src_snapid; op->copy_from.src_version = src_version; op->copy_from.flags = copy_from_flags; -- cgit From c00e4522adff010141ce86839bb4fe494c853077 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Wed, 8 Jul 2020 07:03:22 +0000 Subject: ceph: remove unnecessary cast in kfree() Remove unnecassary casts in the argument to kfree. Signed-off-by: Xu Wang Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 71ee34d160c3..3a733ac33d9b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -497,10 +497,10 @@ static int __set_xattr(struct ceph_inode_info *ci, kfree(*newxattr); *newxattr = NULL; if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); if (update_xattr) { - kfree((void *)name); + kfree(name); name = xattr->name; } ci->i_xattrs.names_size -= xattr->name_len; @@ -566,9 +566,9 @@ static void __free_xattr(struct ceph_inode_xattr *xattr) BUG_ON(!xattr); if (xattr->should_free_name) - kfree((void *)xattr->name); + kfree(xattr->name); if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); kfree(xattr); } @@ -582,9 +582,9 @@ static int __remove_xattr(struct ceph_inode_info *ci, rb_erase(&xattr->node, &ci->i_xattrs.index); if (xattr->should_free_name) - kfree((void *)xattr->name); + kfree(xattr->name); if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); ci->i_xattrs.names_size -= xattr->name_len; ci->i_xattrs.vals_size -= xattr->val_len; -- cgit From 94f17c00d6687993101372f996cf6690ec9adf83 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 08:53:28 +0200 Subject: libceph: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. [ idryomov: Do the same for the CRUSH paper and replace ceph.newdream.net with ceph.io. ] Signed-off-by: Alexander A. Klimov Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/Kconfig | 2 +- include/linux/crush/crush.h | 2 +- net/ceph/Kconfig | 2 +- net/ceph/ceph_hash.c | 2 +- net/ceph/crush/hash.c | 2 +- net/ceph/crush/mapper.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index cf235f6eacf9..471e40156065 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -13,7 +13,7 @@ config CEPH_FS scalable file system designed to provide high performance, reliable access to petabytes of storage. - More information at http://ceph.newdream.net/. + More information at https://ceph.io/. If unsure, say N. diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 33c16f2de7f6..2f811baf78d2 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -17,7 +17,7 @@ * The algorithm was originally described in detail in this paper * (although the algorithm has evolved somewhat since then): * - * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf * * LGPL2 */ diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index d7bec7adc267..f36f9a3a4e20 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -13,7 +13,7 @@ config CEPH_LIB common functionality to both the Ceph filesystem and to the rados block device (rbd). - More information at http://ceph.newdream.net/. + More information at https://ceph.io/. If unsure, say N. diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 9a5850f264ed..81e1e006c540 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -4,7 +4,7 @@ /* * Robert Jenkin's hash function. - * http://burtleburtle.net/bob/hash/evahash.html + * https://burtleburtle.net/bob/hash/evahash.html * This is in the public domain. */ #define mix(a, b, c) \ diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index e5cc603cdb17..fe79f6d2d0db 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -7,7 +7,7 @@ /* * Robert Jenkins' function for mixing 32-bit values - * http://burtleburtle.net/bob/hash/evahash.html + * https://burtleburtle.net/bob/hash/evahash.html * a, b = random bits, c = input and output */ #define crush_hashmix(a, b, c) do { \ diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 3f323ed9df52..07e5614eb3f1 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -298,7 +298,7 @@ static __u64 crush_ln(unsigned int xin) * * for reference, see: * - * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables + * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables * */ -- cgit From aaf5a476201bf93bdab75d6922340516ee63f7e2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 17 Jul 2020 09:25:13 -0400 Subject: ceph: check the sesion state and return false in case it is closed If the session is already in closed state, we should skip it. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a09d12569bd..ef8a1179171b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4303,6 +4303,7 @@ bool check_session_state(struct ceph_mds_session *s) } if (s->s_state == CEPH_MDS_SESSION_NEW || s->s_state == CEPH_MDS_SESSION_RESTARTING || + s->s_state == CEPH_MDS_SESSION_CLOSED || s->s_state == CEPH_MDS_SESSION_REJECTED) /* this mds is failed or recovering, just wait */ return false; -- cgit From 18f473b384a64cef69f166a3e2b73d3d2eca82c6 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 16 Jul 2020 10:05:57 -0400 Subject: ceph: periodically send perf metrics to MDSes This will send the caps/read/write/metadata metrics to any available MDS once per second, which will be the same as the userland client. It will skip the MDS sessions which don't support the metric collection, as the MDSs will close socket connections when they get an unknown type message. We can disable the metric sending via the disable_send_metrics module parameter. [ jlayton: fix up endianness bug in ceph_mdsc_send_metrics() ] URL: https://tracker.ceph.com/issues/43215 Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 + fs/ceph/mds_client.h | 4 +- fs/ceph/metric.c | 148 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/metric.h | 77 ++++++++++++++++++++++ fs/ceph/super.c | 42 ++++++++++++ fs/ceph/super.h | 2 + include/linux/ceph/ceph_fs.h | 1 + 7 files changed, 276 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ef8a1179171b..d6cd2e4f0bc8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3334,6 +3334,8 @@ static void handle_session(struct ceph_mds_session *session, session->s_state = CEPH_MDS_SESSION_OPEN; session->s_features = features; renewed_caps(mdsc, session, 0); + if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) + metric_schedule_delayed(&mdsc->metric); wake = 1; if (mdsc->stopping) __close_session(mdsc, session); @@ -4725,6 +4727,7 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) ceph_metric_destroy(&mdsc->metric); + flush_delayed_work(&mdsc->metric.delayed_work); fsc->mdsc = NULL; kfree(mdsc); dout("mdsc_destroy %p done\n", mdsc); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 6147ff0a1cdf..bc9e95937d7c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -28,8 +28,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_LAZY_CAP_WANTED, CEPHFS_FEATURE_MULTI_RECONNECT, CEPHFS_FEATURE_DELEG_INO, + CEPHFS_FEATURE_METRIC_COLLECT, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT, }; /* @@ -43,6 +44,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_LAZY_CAP_WANTED, \ CEPHFS_FEATURE_MULTI_RECONNECT, \ CEPHFS_FEATURE_DELEG_INO, \ + CEPHFS_FEATURE_METRIC_COLLECT, \ \ CEPHFS_FEATURE_MAX, \ } diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 269eacbd2a15..2466b261fba2 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -1,10 +1,150 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include #include "metric.h" +#include "mds_client.h" + +static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, + struct ceph_mds_session *s) +{ + struct ceph_metric_head *head; + struct ceph_metric_cap *cap; + struct ceph_metric_read_latency *read; + struct ceph_metric_write_latency *write; + struct ceph_metric_metadata_latency *meta; + struct ceph_client_metric *m = &mdsc->metric; + u64 nr_caps = atomic64_read(&m->total_caps); + struct ceph_msg *msg; + struct timespec64 ts; + s64 sum; + s32 items = 0; + s32 len; + + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + + sizeof(*meta); + + msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); + if (!msg) { + pr_err("send metrics to mds%d, failed to allocate message\n", + s->s_mds); + return false; + } + + head = msg->front.iov_base; + + /* encode the cap metric */ + cap = (struct ceph_metric_cap *)(head + 1); + cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); + cap->ver = 1; + cap->compat = 1; + cap->data_len = cpu_to_le32(sizeof(*cap) - 10); + cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); + cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); + cap->total = cpu_to_le64(nr_caps); + items++; + + /* encode the read latency metric */ + read = (struct ceph_metric_read_latency *)(cap + 1); + read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); + read->ver = 1; + read->compat = 1; + read->data_len = cpu_to_le32(sizeof(*read) - 10); + sum = m->read_latency_sum; + jiffies_to_timespec64(sum, &ts); + read->sec = cpu_to_le32(ts.tv_sec); + read->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the write latency metric */ + write = (struct ceph_metric_write_latency *)(read + 1); + write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); + write->ver = 1; + write->compat = 1; + write->data_len = cpu_to_le32(sizeof(*write) - 10); + sum = m->write_latency_sum; + jiffies_to_timespec64(sum, &ts); + write->sec = cpu_to_le32(ts.tv_sec); + write->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the metadata latency metric */ + meta = (struct ceph_metric_metadata_latency *)(write + 1); + meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); + meta->ver = 1; + meta->compat = 1; + meta->data_len = cpu_to_le32(sizeof(*meta) - 10); + sum = m->metadata_latency_sum; + jiffies_to_timespec64(sum, &ts); + meta->sec = cpu_to_le32(ts.tv_sec); + meta->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + put_unaligned_le32(items, &head->num); + msg->front.iov_len = len; + msg->hdr.version = cpu_to_le16(1); + msg->hdr.compat_version = cpu_to_le16(1); + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("client%llu send metrics to mds%d\n", + ceph_client_gid(mdsc->fsc->client), s->s_mds); + ceph_con_send(&s->s_con, msg); + + return true; +} + + +static void metric_get_session(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_session *s; + int i; + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + s = __ceph_lookup_mds_session(mdsc, i); + if (!s) + continue; + + /* + * Skip it if MDS doesn't support the metric collection, + * or the MDS will close the session's socket connection + * directly when it get this message. + */ + if (check_session_state(s) && + test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { + mdsc->metric.session = s; + break; + } + + ceph_put_mds_session(s); + } + mutex_unlock(&mdsc->mutex); +} + +static void metric_delayed_work(struct work_struct *work) +{ + struct ceph_client_metric *m = + container_of(work, struct ceph_client_metric, delayed_work.work); + struct ceph_mds_client *mdsc = + container_of(m, struct ceph_mds_client, metric); + + if (mdsc->stopping) + return; + + if (!m->session || !check_session_state(m->session)) { + if (m->session) { + ceph_put_mds_session(m->session); + m->session = NULL; + } + metric_get_session(mdsc); + } + if (m->session) { + ceph_mdsc_send_metrics(mdsc, m->session); + metric_schedule_delayed(m); + } +} int ceph_metric_init(struct ceph_client_metric *m) { @@ -52,6 +192,9 @@ int ceph_metric_init(struct ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum = 0; + m->session = NULL; + INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); + return 0; err_i_caps_mis: @@ -73,6 +216,11 @@ void ceph_metric_destroy(struct ceph_client_metric *m) percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_hit); + + cancel_delayed_work_sync(&m->delayed_work); + + if (m->session) + ceph_put_mds_session(m->session); } static inline void __update_latency(ktime_t *totalp, ktime_t *lsump, diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index 23a3373d5a3d..fe5d07d2e63a 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -6,6 +6,71 @@ #include #include +extern bool disable_send_metrics; + +enum ceph_metric_type { + CLIENT_METRIC_TYPE_CAP_INFO, + CLIENT_METRIC_TYPE_READ_LATENCY, + CLIENT_METRIC_TYPE_WRITE_LATENCY, + CLIENT_METRIC_TYPE_METADATA_LATENCY, + CLIENT_METRIC_TYPE_DENTRY_LEASE, + + CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, +}; + +/* metric caps header */ +struct ceph_metric_cap { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(hit + mis + total) */ + __le64 hit; + __le64 mis; + __le64 total; +} __packed; + +/* metric read latency header */ +struct ceph_metric_read_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric write latency header */ +struct ceph_metric_write_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric metadata latency header */ +struct ceph_metric_metadata_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +struct ceph_metric_head { + __le32 num; /* the number of metrics that will be sent */ +} __packed; + /* This is the global metrics */ struct ceph_client_metric { atomic64_t total_dentries; @@ -36,8 +101,20 @@ struct ceph_client_metric { ktime_t metadata_latency_sq_sum; ktime_t metadata_latency_min; ktime_t metadata_latency_max; + + struct ceph_mds_session *session; + struct delayed_work delayed_work; /* delayed work */ }; +static inline void metric_schedule_delayed(struct ceph_client_metric *m) +{ + if (disable_send_metrics) + return; + + /* per second */ + schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ)); +} + extern int ceph_metric_init(struct ceph_client_metric *m); extern void ceph_metric_destroy(struct ceph_client_metric *m); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c9784eb1159a..933f5df5da7d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -27,6 +27,9 @@ #include #include +static DEFINE_SPINLOCK(ceph_fsc_lock); +static LIST_HEAD(ceph_fsc_list); + /* * Ceph superblock operations * @@ -691,6 +694,10 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc->wb_pagevec_pool) goto fail_cap_wq; + spin_lock(&ceph_fsc_lock); + list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); + spin_unlock(&ceph_fsc_lock); + return fsc; fail_cap_wq: @@ -717,6 +724,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); + spin_lock(&ceph_fsc_lock); + list_del(&fsc->metric_wakeup); + spin_unlock(&ceph_fsc_lock); + ceph_mdsc_destroy(fsc); destroy_workqueue(fsc->inode_wq); destroy_workqueue(fsc->cap_wq); @@ -1282,6 +1293,37 @@ static void __exit exit_ceph(void) destroy_caches(); } +static int param_set_metrics(const char *val, const struct kernel_param *kp) +{ + struct ceph_fs_client *fsc; + int ret; + + ret = param_set_bool(val, kp); + if (ret) { + pr_err("Failed to parse sending metrics switch value '%s'\n", + val); + return ret; + } else if (!disable_send_metrics) { + // wake up all the mds clients + spin_lock(&ceph_fsc_lock); + list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { + metric_schedule_delayed(&fsc->mdsc->metric); + } + spin_unlock(&ceph_fsc_lock); + } + + return 0; +} + +static const struct kernel_param_ops param_ops_metrics = { + .set = param_set_metrics, + .get = param_get_bool, +}; + +bool disable_send_metrics = false; +module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); +MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 5a6cdd39bc10..2dcb6a90c636 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -101,6 +101,8 @@ struct ceph_mount_options { struct ceph_fs_client { struct super_block *sb; + struct list_head metric_wakeup; + struct ceph_mount_options *mount_options; struct ceph_client *client; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ebf5ba62b772..455e9b9e2adf 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -130,6 +130,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_REQUEST 24 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 #define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_METRICS 29 #define CEPH_MSG_CLIENT_CAPS 0x310 #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 -- cgit From 3b4168dd8b1d3e0bb129cf41e6bb50e217fe7781 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 16 Jul 2020 10:05:58 -0400 Subject: ceph: send client provided metric flags in client metadata Send metric flags to the MDS, indicating what metrics the client supports. Currently that consists of cap statistics, and read, write and metadata latencies. URL: https://tracker.ceph.com/issues/43435 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/ceph/metric.h | 13 ++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d6cd2e4f0bc8..af7221d1c610 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1194,6 +1194,48 @@ static int encode_supported_features(void **p, void *end) return 0; } +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) +static int encode_metric_spec(void **p, void *end) +{ + static const size_t count = ARRAY_SIZE(metric_bits); + + /* header */ + if (WARN_ON_ONCE(*p + 2 > end)) + return -ERANGE; + + ceph_encode_8(p, 1); /* version */ + ceph_encode_8(p, 1); /* compat */ + + if (count > 0) { + size_t i; + size_t size = METRIC_BYTES(count); + + if (WARN_ON_ONCE(*p + 4 + 4 + size > end)) + return -ERANGE; + + /* metric spec info length */ + ceph_encode_32(p, 4 + size); + + /* metric spec */ + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); + *p += size; + } else { + if (WARN_ON_ONCE(*p + 4 + 4 > end)) + return -ERANGE; + + /* metric spec info length */ + ceph_encode_32(p, 4); + /* metric spec */ + ceph_encode_32(p, 0); + } + + return 0; +} + /* * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. @@ -1234,6 +1276,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 size = FEATURE_BYTES(count); extra_bytes += 4 + size; + /* metric spec */ + size = 0; + count = ARRAY_SIZE(metric_bits); + if (count > 0) + size = METRIC_BYTES(count); + extra_bytes += 2 + 4 + 4 + size; + /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); @@ -1252,9 +1301,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map * - * ClientSession messages with metadata are v3 + * ClientSession messages with metadata are v4 */ - msg->hdr.version = cpu_to_le16(3); + msg->hdr.version = cpu_to_le16(4); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ @@ -1283,6 +1332,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 return ERR_PTR(ret); } + ret = encode_metric_spec(&p, end); + if (ret) { + pr_err("encode_metric_spec failed!\n"); + ceph_msg_put(msg); + return ERR_PTR(ret); + } + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index fe5d07d2e63a..1d0959d669d7 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -18,6 +18,19 @@ enum ceph_metric_type { CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, }; +/* + * This will always have the highest metric bit value + * as the last element of the array. + */ +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ + CLIENT_METRIC_TYPE_CAP_INFO, \ + CLIENT_METRIC_TYPE_READ_LATENCY, \ + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ + \ + CLIENT_METRIC_TYPE_MAX, \ +} + /* metric caps header */ struct ceph_metric_cap { __le32 type; /* ceph metric type */ -- cgit From f1f565a26976612121f97464f9245307422d0ce8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:36:04 -0700 Subject: ceph: delete repeated words in fs/ceph/ Drop duplicated words "down" and "the" in fs/ceph/. [ idryomov: merge into a single patch ] Signed-off-by: Randy Dunlap Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 2 +- fs/ceph/super.h | 2 +- include/linux/ceph/ceph_features.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 933f5df5da7d..585aecea5cad 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -839,7 +839,7 @@ static void destroy_caches(void) } /* - * ceph_umount_begin - initiate forced umount. Tear down down the + * ceph_umount_begin - initiate forced umount. Tear down the * mount, skipping steps that may hang while waiting for server(s). */ static void ceph_umount_begin(struct super_block *sb) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2dcb6a90c636..9001a896ae8c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -355,7 +355,7 @@ struct ceph_inode_info { unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ /* - * Link to the the auth cap's session's s_cap_dirty list. s_cap_dirty + * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty * is protected by the mdsc->cap_dirty_lock, but each individual item * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty * requires the mdsc->cap_dirty_lock. List presence for an item can diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 39e6f4c57580..fcd84e8d88f4 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -58,7 +58,7 @@ * because 10.2.z (jewel) did not care if its peers advertised this * feature bit. * - * - In the second phase we stop advertising the the bit and call it + * - In the second phase we stop advertising the bit and call it * RETIRED. This can normally be done in the *next* major release * following the one in which we marked the feature DEPRECATED. In * the above example, for 12.0.z (luminous) we can say: -- cgit From 8e298deb8d8c449d87acceab8c8bfef71b67b08d Mon Sep 17 00:00:00 2001 From: Jia Yang Date: Thu, 23 Jul 2020 10:25:52 +0800 Subject: ceph: remove unused variables in ceph_mdsmap_decode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build warnings: fs/ceph/mdsmap.c: In function ‘ceph_mdsmap_decode’: fs/ceph/mdsmap.c:192:7: warning: variable ‘info_cv’ set but not used [-Wunused-but-set-variable] fs/ceph/mdsmap.c:177:7: warning: variable ‘state_seq’ set but not used [-Wunused-but-set-variable] fs/ceph/mdsmap.c:123:15: warning: variable ‘mdsmap_cv’ set but not used [-Wunused-but-set-variable] Note that p is increased in ceph_decode_*. Signed-off-by: Jia Yang Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/mdsmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 889627817e52..e4aba6c6d3b5 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -120,7 +120,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) const void *start = *p; int i, j, n; int err; - u8 mdsmap_v, mdsmap_cv; + u8 mdsmap_v; u16 mdsmap_ev; m = kzalloc(sizeof(*m), GFP_NOFS); @@ -129,7 +129,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_decode_need(p, end, 1 + 1, bad); mdsmap_v = ceph_decode_8(p); - mdsmap_cv = ceph_decode_8(p); + *p += sizeof(u8); /* mdsmap_cv */ if (mdsmap_v >= 4) { u32 mdsmap_len; ceph_decode_32_safe(p, end, mdsmap_len, bad); @@ -174,7 +174,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) u64 global_id; u32 namelen; s32 mds, inc, state; - u64 state_seq; u8 info_v; void *info_end = NULL; struct ceph_entity_addr addr; @@ -189,9 +188,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) info_v= ceph_decode_8(p); if (info_v >= 4) { u32 info_len; - u8 info_cv; ceph_decode_need(p, end, 1 + sizeof(u32), bad); - info_cv = ceph_decode_8(p); + *p += sizeof(u8); /* info_cv */ info_len = ceph_decode_32(p); info_end = *p + info_len; if (info_end > end) @@ -210,7 +208,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) mds = ceph_decode_32(p); inc = ceph_decode_32(p); state = ceph_decode_32(p); - state_seq = ceph_decode_64(p); + *p += sizeof(u64); /* state_seq */ err = ceph_decode_entity_addr(p, end, &addr); if (err) goto corrupt; -- cgit From a7caa88f8b72c136f9a401f498471b8a8e35370d Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 23 Jul 2020 15:32:25 +0800 Subject: ceph: fix use-after-free for fsc->mdsc If the ceph_mdsc_init() fails, it will free the mdsc already. Reported-by: syzbot+b57f46d8d6ea51960b8c@syzkaller.appspotmail.com Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index af7221d1c610..590822fab767 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4453,7 +4453,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) goto err_mdsc; } - fsc->mdsc = mdsc; init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -4508,6 +4507,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) strscpy(mdsc->nodename, utsname()->nodename, sizeof(mdsc->nodename)); + + fsc->mdsc = mdsc; return 0; err_mdsmap: -- cgit From 2c81ef286c42c5bfc0d0a60219b781791d4bd55c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jul 2020 16:22:40 +0100 Subject: ceph: remove redundant initialization of variable mds The variable mds is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3030f5585085..97539b497e4c 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -262,7 +262,7 @@ static int mds_sessions_show(struct seq_file *s, void *ptr) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_auth_client *ac = fsc->client->monc.auth; struct ceph_options *opt = fsc->client->options; - int mds = -1; + int mds; mutex_lock(&mdsc->mutex); -- cgit From bd8548d0dcdab514e08e35a3451667486d879dae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Aug 2020 10:48:35 +0200 Subject: clk: hsdk: Fix bad dependency on IOMEM CONFIG_IOMEM does not exist. The correct symbol to depend on is CONFIG_HAS_IOMEM. Fixes: 1e7468bd9d30a21e ("clk: Specify IOMEM dependency for HSDK pll driver") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200803084835.21838-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 690a2587e0c5..4026fac9fac3 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -50,7 +50,7 @@ source "drivers/clk/versatile/Kconfig" config CLK_HSDK bool "PLL Driver for HSDK platform" depends on OF || COMPILE_TEST - depends on IOMEM + depends on HAS_IOMEM help This driver supports the HSDK core, system, ddr, tunnel and hdmi PLLs control. -- cgit From 6cfde88418fe95240e32d2955b51988360ee0942 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 1 Aug 2020 10:53:42 +0200 Subject: clk: drop unused function __clk_get_flags The function __clk_get_flags has not been used since the April 2019 commit a348f05361c9 ("ARM: omap2+: hwmod: drop CLK_IS_BASIC flag usage"). Other uses were removed in June 2015, eg by commit 98d8a60eccee ("clk: Convert __clk_get_flags() to clk_hw_get_flags()"), which shows how clk_hw_get_flags can easily be used instead. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/1596272022-14173-1-git-send-email-Julia.Lawall@inria.fr Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 6 ------ include/linux/clk-provider.h | 1 - 2 files changed, 7 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 3f588ed06ce3..d71456aad11d 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -500,12 +500,6 @@ static unsigned long clk_core_get_accuracy_no_lock(struct clk_core *core) return core->accuracy; } -unsigned long __clk_get_flags(struct clk *clk) -{ - return !clk ? 0 : clk->core->flags; -} -EXPORT_SYMBOL_GPL(__clk_get_flags); - unsigned long clk_hw_get_flags(const struct clk_hw *hw) { return hw->core->flags; diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bd1ee9039558..93a78a5256d1 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1096,7 +1096,6 @@ int clk_hw_get_parent_index(struct clk_hw *hw); int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *new_parent); unsigned int __clk_get_enable_count(struct clk *clk); unsigned long clk_hw_get_rate(const struct clk_hw *hw); -unsigned long __clk_get_flags(struct clk *clk); unsigned long clk_hw_get_flags(const struct clk_hw *hw); #define clk_hw_can_set_rate_parent(hw) \ (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) -- cgit From f34e4651ce66a754f41203284acf09b28b9dd955 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 30 Jul 2020 20:26:19 +0200 Subject: clk: bcm2835: Do not use prediv with bcm2711's PLLs Contrary to previous SoCs, bcm2711 doesn't have a prescaler in the PLL feedback loop. Bypass it by zeroing fb_prediv_mask when running on bcm2711. Note that, since the prediv configuration bits were re-purposed, this was triggering miscalculations on all clocks hanging from the VPU clock, notably the aux UART, making its output unintelligible. Fixes: 42de9ad400af ("clk: bcm2835: Add BCM2711_CLOCK_EMMC2 support") Reported-by: Nathan Chancellor Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20200730182619.23246-1-nsaenzjulienne@suse.de Tested-by: Nathan Chancellor Reviewed-by: Florian Fainelli Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 027eba31f793..3439bc65bb4e 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -314,6 +314,7 @@ struct bcm2835_cprman { struct device *dev; void __iomem *regs; spinlock_t regs_lock; /* spinlock for all clocks */ + unsigned int soc; /* * Real names of cprman clock parents looked up through @@ -526,6 +527,20 @@ static int bcm2835_pll_is_on(struct clk_hw *hw) A2W_PLL_CTRL_PRST_DISABLE; } +static u32 bcm2835_pll_get_prediv_mask(struct bcm2835_cprman *cprman, + const struct bcm2835_pll_data *data) +{ + /* + * On BCM2711 there isn't a pre-divisor available in the PLL feedback + * loop. Bits 13:14 of ANA1 (PLLA,PLLB,PLLC,PLLD) have been re-purposed + * for to for VCO RANGE bits. + */ + if (cprman->soc & SOC_BCM2711) + return 0; + + return data->ana->fb_prediv_mask; +} + static void bcm2835_pll_choose_ndiv_and_fdiv(unsigned long rate, unsigned long parent_rate, u32 *ndiv, u32 *fdiv) @@ -583,7 +598,7 @@ static unsigned long bcm2835_pll_get_rate(struct clk_hw *hw, ndiv = (a2wctrl & A2W_PLL_CTRL_NDIV_MASK) >> A2W_PLL_CTRL_NDIV_SHIFT; pdiv = (a2wctrl & A2W_PLL_CTRL_PDIV_MASK) >> A2W_PLL_CTRL_PDIV_SHIFT; using_prediv = cprman_read(cprman, data->ana_reg_base + 4) & - data->ana->fb_prediv_mask; + bcm2835_pll_get_prediv_mask(cprman, data); if (using_prediv) { ndiv *= 2; @@ -666,6 +681,7 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, struct bcm2835_pll *pll = container_of(hw, struct bcm2835_pll, hw); struct bcm2835_cprman *cprman = pll->cprman; const struct bcm2835_pll_data *data = pll->data; + u32 prediv_mask = bcm2835_pll_get_prediv_mask(cprman, data); bool was_using_prediv, use_fb_prediv, do_ana_setup_first; u32 ndiv, fdiv, a2w_ctl; u32 ana[4]; @@ -683,7 +699,7 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, for (i = 3; i >= 0; i--) ana[i] = cprman_read(cprman, data->ana_reg_base + i * 4); - was_using_prediv = ana[1] & data->ana->fb_prediv_mask; + was_using_prediv = ana[1] & prediv_mask; ana[0] &= ~data->ana->mask0; ana[0] |= data->ana->set0; @@ -693,10 +709,10 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, ana[3] |= data->ana->set3; if (was_using_prediv && !use_fb_prediv) { - ana[1] &= ~data->ana->fb_prediv_mask; + ana[1] &= ~prediv_mask; do_ana_setup_first = true; } else if (!was_using_prediv && use_fb_prediv) { - ana[1] |= data->ana->fb_prediv_mask; + ana[1] |= prediv_mask; do_ana_setup_first = false; } else { do_ana_setup_first = true; @@ -2262,6 +2278,7 @@ static int bcm2835_clk_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cprman); cprman->onecell.num = asize; + cprman->soc = pdata->soc; hws = cprman->onecell.hws; for (i = 0; i < asize; i++) { -- cgit From cbd287c09351f1d3a4b3cb9167a2616a11390d32 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Aug 2020 17:06:21 -0600 Subject: io_uring: io_async_buf_func() need not test page bit Since we don't do exclusive waits or wakeups, we know that the bit is always going to be set. Kill the test. Also see commit: 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2a3af95be4ca..bb4f0b2d5138 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2965,10 +2965,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, if (!wake_page_match(wpq, key)) return 0; - /* Stop waking things up if the page is locked again */ - if (test_bit(key->bit_nr, &key->page->flags)) - return -1; - list_del_init(&wait->entry); init_task_work(&req->task_work, io_req_task_submit); -- cgit From c1dd91d16246b168b80af9b64c5cc35a66410455 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Aug 2020 16:43:59 -0600 Subject: io_uring: add comments on how the async buffered read retry works The retry based logic here isn't easy to follow unless you're already familiar with how io_uring does task_work based retries. Add some comments explaining the flow a little better. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe --- fs/io_uring.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bb4f0b2d5138..5e1c08e22990 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2952,6 +2952,16 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, return io_rw_prep_async(req, READ, force_nonblock); } +/* + * This is our waitqueue callback handler, registered through lock_page_async() + * when we initially tried to do the IO with the iocb armed our waitqueue. + * This gets called when the page is unlocked, and we generally expect that to + * happen when the page IO is completed and the page is now uptodate. This will + * queue a task_work based retry of the operation, attempting to copy the data + * again. If the latter fails because the page was NOT uptodate, then we will + * do a thread based blocking retry of the operation. That's the unexpected + * slow path. + */ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, int sync, void *arg) { @@ -3004,7 +3014,18 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, return -EOPNOTSUPP; } - +/* + * This controls whether a given IO request should be armed for async page + * based retry. If we return false here, the request is handed to the async + * worker threads for retry. If we're doing buffered reads on a regular file, + * we prepare a private wait_page_queue entry and retry the operation. This + * will either succeed because the page is now uptodate and unlocked, or it + * will register a callback when the page is unlocked at IO completion. Through + * that callback, io_uring uses task_work to setup a retry of the operation. + * That retry will attempt the buffered read again. The retry will generally + * succeed, or in rare cases where it fails, we then fall back to using the + * async worker threads for a blocking retry. + */ static bool io_rw_should_retry(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; -- cgit From 9e9da64124ff37e064a75fabdbd6010787a1e0ea Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Mon, 22 Jun 2020 14:11:24 +0200 Subject: openrisc: fix __user in raw_copy_to_user()'s prototype raw_copy_to_user()'s prototype seems to be a copy & paste of raw_copy_from_user() and as such has the __user annotation in the 'from' argument instead of the 'to'. So, move the __user annotation in the prototype to the 'to'. Reported-by: kernel test robot Signed-off-by: Luc Van Oostenryck Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..46e31bb4a9ad 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -241,7 +241,7 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long size) return __copy_tofrom_user(to, (__force const void *)from, size); } static inline unsigned long -raw_copy_to_user(void *to, const void __user *from, unsigned long size) +raw_copy_to_user(void __user *to, const void *from, unsigned long size) { return __copy_tofrom_user((__force void *)to, from, size); } -- cgit From d0b7213f895cd0e209ff5ba89998aeb09267bdc7 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Fri, 26 Jun 2020 05:24:24 +0900 Subject: init: Align init_task to avoid conflict with MUTEX_FLAGS When booting on 32-bit machines (seen on OpenRISC) I saw this warning with CONFIG_DEBUG_MUTEXES turned on. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/locking/mutex.c:1242 __mutex_unlock_slowpath+0x328/0x3ec DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current) Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc1-simple-smp-00005-g2864e2171db4-dirty #179 Call trace: [<(ptrval)>] dump_stack+0x34/0x48 [<(ptrval)>] __warn+0x104/0x158 [<(ptrval)>] ? __mutex_unlock_slowpath+0x328/0x3ec [<(ptrval)>] warn_slowpath_fmt+0x7c/0x94 [<(ptrval)>] __mutex_unlock_slowpath+0x328/0x3ec [<(ptrval)>] mutex_unlock+0x18/0x28 [<(ptrval)>] __cpuhp_setup_state_cpuslocked.part.0+0x29c/0x2f4 [<(ptrval)>] ? page_alloc_cpu_dead+0x0/0x30 [<(ptrval)>] ? start_kernel+0x0/0x684 [<(ptrval)>] __cpuhp_setup_state+0x4c/0x5c [<(ptrval)>] page_alloc_init+0x34/0x68 [<(ptrval)>] ? start_kernel+0x1a0/0x684 [<(ptrval)>] ? early_init_dt_scan_nodes+0x60/0x70 irq event stamp: 0 I traced this to kernel/locking/mutex.c storing 3 bits of MUTEX_FLAGS in the task_struct pointer (mutex.owner). There is a comment saying that task_structs are always aligned to L1_CACHE_BYTES. This is not true for the init_task. On 64-bit machines this is not a problem because symbol addresses are naturally aligned to 64-bits providing 3 bits for MUTEX_FLAGS. Howerver, for 32-bit machines the symbol address only has 2 bits available. Fix this by setting init_task alignment to at least L1_CACHE_BYTES. Signed-off-by: Stafford Horne Acked-by: Peter Zijlstra (Intel) --- init/init_task.c | 1 + 1 file changed, 1 insertion(+) diff --git a/init/init_task.c b/init/init_task.c index 15089d15010a..ab6173f8e6a8 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -65,6 +65,7 @@ struct task_struct init_task #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK __init_task_data #endif + __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), -- cgit From ff6c923dbec332dd6c6f649b754f4edd8f0a3c50 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 5 Jul 2020 05:53:49 +0900 Subject: openrisc: Add support for external initrd images In OpenRISC we set the initrd_start and initrd_end based on the symbols we setup in vmlinux.lds.S. However, this is not needed if we use the generic linker description in INIT_DATA_SECTION. Removing our own initrd setup reduces code, but also the generic code supports loading external initrd images. A bootloader can load a rootfs image into memory and we can configure devicetree to load it with: chosen { bootargs = "earlycon"; stdout-path = "uart0:115200"; linux,initrd-start = < 0x08000100 >; linux,initrd-end = < 0x08200000 >; }; Reported-by: Mateusz Holenko Signed-off-by: Stafford Horne --- arch/openrisc/kernel/setup.c | 8 +++++--- arch/openrisc/kernel/vmlinux.lds.S | 12 ------------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8aa438e1f51f..b18e775f8be3 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -292,13 +292,15 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long)_end; #ifdef CONFIG_BLK_DEV_INITRD - initrd_start = (unsigned long)&__initrd_start; - initrd_end = (unsigned long)&__initrd_end; if (initrd_start == initrd_end) { + printk(KERN_INFO "Initial ramdisk not found\n"); initrd_start = 0; initrd_end = 0; + } else { + printk(KERN_INFO "Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), initrd_end - initrd_start); + initrd_below_start_ok = 1; } - initrd_below_start_ok = 1; #endif /* setup memblock allocator */ diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 60449fd7f16f..22fbc5fb24b3 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -96,18 +96,6 @@ SECTIONS __init_end = .; - . = ALIGN(PAGE_SIZE); - .initrd : AT(ADDR(.initrd) - LOAD_OFFSET) - { - __initrd_start = .; - *(.initrd) - __initrd_end = .; - FILL (0); - . = ALIGN (PAGE_SIZE); - } - - __vmlinux_end = .; /* last address of the physical file */ - BSS_SECTION(0, 0, 0x20) _end = .; -- cgit From 57b8e277c33620e115633cdf700a260b55095460 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 16 Jun 2020 06:19:46 +0900 Subject: openrisc: Fix oops caused when dumping stack When dumping a stack with 'cat /proc/#/stack' the kernel would oops. For example: # cat /proc/690/stack Unable to handle kernel access at virtual address 0x7fc60f58 Oops#: 0000 CPU #: 0 PC: c00097fc SR: 0000807f SP: d6f09b9c GPR00: 00000000 GPR01: d6f09b9c GPR02: d6f09bb8 GPR03: d6f09bc4 GPR04: 7fc60f5c GPR05: c00099b4 GPR06: 00000000 GPR07: d6f09ba3 GPR08: ffffff00 GPR09: c0009804 GPR10: d6f08000 GPR11: 00000000 GPR12: ffffe000 GPR13: dbb86000 GPR14: 00000001 GPR15: dbb86250 GPR16: 7fc60f63 GPR17: 00000f5c GPR18: d6f09bc4 GPR19: 00000000 GPR20: c00099b4 GPR21: ffffffc0 GPR22: 00000000 GPR23: 00000000 GPR24: 00000001 GPR25: 000002c6 GPR26: d78b6850 GPR27: 00000001 GPR28: 00000000 GPR29: dbb86000 GPR30: ffffffff GPR31: dbb862fc RES: 00000000 oGPR11: ffffffff Process cat (pid: 702, stackpage=d79d6000) Stack: Call trace: [<598977f2>] save_stack_trace_tsk+0x40/0x74 [<95063f0e>] stack_trace_save_tsk+0x44/0x58 [] proc_pid_stack+0xd0/0x13c [] proc_single_show+0x6c/0xf0 [] seq_read+0x1b4/0x688 [<2d6c7480>] do_iter_read+0x208/0x248 [<2182a2fb>] vfs_readv+0x64/0x90 This was caused by the stack trace code in save_stack_trace_tsk using the wrong stack pointer. It was using the user stack pointer instead of the kernel stack pointer. Fix this by using the right stack. Also for good measure we add try_get_task_stack/put_task_stack to ensure the task is not lost while we are walking it's stack. Fixes: eecac38b0423a ("openrisc: support framepointers and STACKTRACE_SUPPORT") Signed-off-by: Stafford Horne --- arch/openrisc/kernel/stacktrace.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc7..54d38809e22c 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); -- cgit From c28b27416da91659ad580074db6756b2ab7e62da Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 24 Jun 2020 04:44:05 +0900 Subject: openrisc: Implement proper SMP tlb flushing Up until now when flushing pages from the TLB on SMP OpenRISC was always resorting to flush the entire TLB on all CPUs. This patch adds the mechanics for flushing specific ranges and pages based on the usage. The function switch_mm is updated to account for cpu usage by updating mm_struct's cpumask. This is used in the SMP flush routines. This mostly follows the riscv implementation. Signed-off-by: Stafford Horne --- arch/openrisc/kernel/smp.c | 85 +++++++++++++++++++++++++++++++++++++++++----- arch/openrisc/mm/tlb.c | 17 +++++++--- 2 files changed, 89 insertions(+), 13 deletions(-) diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index bd1e660bbc89..29c82ef2e207 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -219,30 +219,99 @@ static inline void ipi_flush_tlb_all(void *ignored) local_flush_tlb_all(); } +static inline void ipi_flush_tlb_mm(void *info) +{ + struct mm_struct *mm = (struct mm_struct *)info; + + local_flush_tlb_mm(mm); +} + +static void smp_flush_tlb_mm(struct cpumask *cmask, struct mm_struct *mm) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + local_flush_tlb_mm(mm); + } else { + on_each_cpu_mask(cmask, ipi_flush_tlb_mm, mm, 1); + } + put_cpu(); +} + +struct flush_tlb_data { + unsigned long addr1; + unsigned long addr2; +}; + +static inline void ipi_flush_tlb_page(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_page(NULL, fd->addr1); +} + +static inline void ipi_flush_tlb_range(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_range(NULL, fd->addr1, fd->addr2); +} + +static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start, + unsigned long end) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + if ((end - start) <= PAGE_SIZE) + local_flush_tlb_page(NULL, start); + else + local_flush_tlb_range(NULL, start, end); + } else { + struct flush_tlb_data fd; + + fd.addr1 = start; + fd.addr2 = end; + + if ((end - start) <= PAGE_SIZE) + on_each_cpu_mask(cmask, ipi_flush_tlb_page, &fd, 1); + else + on_each_cpu_mask(cmask, ipi_flush_tlb_range, &fd, 1); + } + put_cpu(); +} + void flush_tlb_all(void) { on_each_cpu(ipi_flush_tlb_all, NULL, 1); } -/* - * FIXME: implement proper functionality instead of flush_tlb_all. - * *But*, as things currently stands, the local_tlb_flush_* functions will - * all boil down to local_tlb_flush_all anyway. - */ void flush_tlb_mm(struct mm_struct *mm) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_mm(mm_cpumask(mm), mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), uaddr, uaddr + PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end); } /* Instruction cache invalidate - performed on each cpu */ diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c index 4b680aed8f5f..2b6feabf6381 100644 --- a/arch/openrisc/mm/tlb.c +++ b/arch/openrisc/mm/tlb.c @@ -137,21 +137,28 @@ void local_flush_tlb_mm(struct mm_struct *mm) void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *next_tsk) { + unsigned int cpu; + + if (unlikely(prev == next)) + return; + + cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + /* remember the pgd for the fault handlers * this is similar to the pgd register in some other CPU's. * we need our own copy of it because current and active_mm * might be invalid at points where we still need to derefer * the pgd. */ - current_pgd[smp_processor_id()] = next->pgd; + current_pgd[cpu] = next->pgd; /* We don't have context support implemented, so flush all * entries belonging to previous map */ - - if (prev != next) - local_flush_tlb_mm(prev); - + local_flush_tlb_mm(prev); } /* -- cgit From c1d55d50139bea6bfe964458272a93dd899efb83 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 29 Jul 2020 21:03:14 +0900 Subject: asm-generic/io.h: Fix sparse warnings on big-endian architectures On big-endian architectures like OpenRISC, sparse outputs below warnings on asm-generic/io.h. This is due to io statements like: __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr); The __raw_writel() function expects native endianness, however cpu_to_le32() returns __le32. On little-endian machines these match up and there is no issue. However, on big-endian we get warnings, for IO that is defined as little-endian the mismatch is expected. The fix I propose is to __force to native endian. Warnings: ./include/asm-generic/io.h:166:15: warning: cast to restricted __le16 ./include/asm-generic/io.h:166:15: warning: cast to restricted __le16 ./include/asm-generic/io.h:166:15: warning: cast to restricted __le16 ./include/asm-generic/io.h:166:15: warning: cast to restricted __le16 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:179:15: warning: cast to restricted __le32 ./include/asm-generic/io.h:215:22: warning: incorrect type in argument 1 (different base types) ./include/asm-generic/io.h:215:22: expected unsigned short [usertype] value ./include/asm-generic/io.h:215:22: got restricted __le16 [usertype] ./include/asm-generic/io.h:225:22: warning: incorrect type in argument 1 (different base types) ./include/asm-generic/io.h:225:22: expected unsigned int [usertype] value ./include/asm-generic/io.h:225:22: got restricted __le32 [usertype] Signed-off-by: Stafford Horne Acked-by: Arnd Bergmann --- include/asm-generic/io.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 30a3aab312e6..dabf8cb7203b 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -163,7 +163,7 @@ static inline u16 readw(const volatile void __iomem *addr) u16 val; __io_br(); - val = __le16_to_cpu(__raw_readw(addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); __io_ar(val); return val; } @@ -176,7 +176,7 @@ static inline u32 readl(const volatile void __iomem *addr) u32 val; __io_br(); - val = __le32_to_cpu(__raw_readl(addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); __io_ar(val); return val; } @@ -212,7 +212,7 @@ static inline void writeb(u8 value, volatile void __iomem *addr) static inline void writew(u16 value, volatile void __iomem *addr) { __io_bw(); - __raw_writew(cpu_to_le16(value), addr); + __raw_writew((u16 __force)cpu_to_le16(value), addr); __io_aw(); } #endif @@ -222,7 +222,7 @@ static inline void writew(u16 value, volatile void __iomem *addr) static inline void writel(u32 value, volatile void __iomem *addr) { __io_bw(); - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); __io_aw(); } #endif @@ -474,7 +474,7 @@ static inline u16 _inw(unsigned long addr) u16 val; __io_pbr(); - val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); __io_par(val); return val; } @@ -487,7 +487,7 @@ static inline u32 _inl(unsigned long addr) u32 val; __io_pbr(); - val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); __io_par(val); return val; } @@ -508,7 +508,7 @@ static inline void _outb(u8 value, unsigned long addr) static inline void _outw(u16 value, unsigned long addr) { __io_pbw(); - __raw_writew(cpu_to_le16(value), PCI_IOBASE + addr); + __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); } #endif @@ -518,7 +518,7 @@ static inline void _outw(u16 value, unsigned long addr) static inline void _outl(u32 value, unsigned long addr) { __io_pbw(); - __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr); + __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); } #endif -- cgit From d88ca7e1a27eb2df056bbf37ddef62e1c73d37ea Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 30 Jul 2020 19:47:14 +0900 Subject: fbmem: pull fbcon_update_vcs() out of fb_set_var() syzbot is reporting OOB read bug in vc_do_resize() [1] caused by memcpy() based on outdated old_{rows,row_size} values, for resize_screen() can recurse into vc_do_resize() which changes vc->vc_{cols,rows} that outdates old_{rows,row_size} values which were saved before calling resize_screen(). Daniel Vetter explained that resize_screen() should not recurse into fbcon_update_vcs() path due to FBINFO_MISC_USEREVENT being still set when calling resize_screen(). Instead of masking FBINFO_MISC_USEREVENT before calling fbcon_update_vcs(), we can remove FBINFO_MISC_USEREVENT by calling fbcon_update_vcs() only if fb_set_var() returned 0. This change assumes that it is harmless to call fbcon_update_vcs() when fb_set_var() returned 0 without reaching fb_notifier_call_chain(). [1] https://syzkaller.appspot.com/bug?id=c70c88cfd16dcf6e1d3c7f0ab8648b3144b5b25e Reported-and-tested-by: syzbot Suggested-by: Daniel Vetter Signed-off-by: Tetsuo Handa Reported-by: kernel test robot for missing #include Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/075b7e37-3278-cd7d-31ab-c5073cfa8e92@i-love.sakura.ne.jp --- drivers/video/fbdev/core/fbmem.c | 8 ++------ drivers/video/fbdev/core/fbsysfs.c | 4 ++-- drivers/video/fbdev/ps3fb.c | 5 +++-- include/linux/fb.h | 2 -- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 30e73ec4ad5c..da7c88ffaa6a 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -957,7 +957,6 @@ static int fb_check_caps(struct fb_info *info, struct fb_var_screeninfo *var, int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) { - int flags = info->flags; int ret = 0; u32 activate; struct fb_var_screeninfo old_var; @@ -1052,9 +1051,6 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) event.data = &mode; fb_notifier_call_chain(FB_EVENT_MODE_CHANGE, &event); - if (flags & FBINFO_MISC_USEREVENT) - fbcon_update_vcs(info, activate & FB_ACTIVATE_ALL); - return 0; } EXPORT_SYMBOL(fb_set_var); @@ -1105,9 +1101,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - info->flags |= FBINFO_MISC_USEREVENT; ret = fb_set_var(info, &var); - info->flags &= ~FBINFO_MISC_USEREVENT; + if (!ret) + fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); console_unlock(); if (!ret && copy_to_user(argp, &var, sizeof(var))) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index d54c88f88991..65dae05fff8e 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -91,9 +91,9 @@ static int activate(struct fb_info *fb_info, struct fb_var_screeninfo *var) var->activate |= FB_ACTIVATE_FORCE; console_lock(); - fb_info->flags |= FBINFO_MISC_USEREVENT; err = fb_set_var(fb_info, var); - fb_info->flags &= ~FBINFO_MISC_USEREVENT; + if (!err) + fbcon_update_vcs(fb_info, var->activate & FB_ACTIVATE_ALL); console_unlock(); if (err) return err; diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c index 9df78fb77267..203c254f8f6c 100644 --- a/drivers/video/fbdev/ps3fb.c +++ b/drivers/video/fbdev/ps3fb.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -824,12 +825,12 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd, var = info->var; fb_videomode_to_var(&var, vmode); console_lock(); - info->flags |= FBINFO_MISC_USEREVENT; /* Force, in case only special bits changed */ var.activate |= FB_ACTIVATE_FORCE; par->new_mode_id = val; retval = fb_set_var(info, &var); - info->flags &= ~FBINFO_MISC_USEREVENT; + if (!retval) + fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); console_unlock(); } break; diff --git a/include/linux/fb.h b/include/linux/fb.h index 2b530e6d86e4..850f79e9a7cb 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -400,8 +400,6 @@ struct fb_tile_ops { #define FBINFO_HWACCEL_YPAN 0x2000 /* optional */ #define FBINFO_HWACCEL_YWRAP 0x4000 /* optional */ -#define FBINFO_MISC_USEREVENT 0x10000 /* event request - from userspace */ #define FBINFO_MISC_TILEBLITTING 0x20000 /* use tile blitting */ /* A driver may set this flag to indicate that it does want a set_par to be -- cgit From a72a6a16d51034045cb6355924b62221a8221ca3 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 4 Jun 2020 11:02:14 +0300 Subject: drm/tidss: fix modeset init for DPI panels The connector type for DISPC's DPI videoport was set the LVDS instead of DPI. This causes any DPI panel setup to fail with tidss, making all DPI panels unusable. Fix this by using correct connector type. Signed-off-by: Tomi Valkeinen Fixes: 32a1795f57eecc39749017 ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Cc: stable@vger.kernel.org # v5.7+ Link: https://patchwork.freedesktop.org/patch/msgid/20200604080214.107159-1-tomi.valkeinen@ti.com Reviewed-by: Jyri Sarha --- drivers/gpu/drm/tidss/tidss_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index 4b99e9fa84a5..c0240f7e0b19 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -154,7 +154,7 @@ static int tidss_dispc_modeset_init(struct tidss_device *tidss) break; case DISPC_VP_DPI: enc_type = DRM_MODE_ENCODER_DPI; - conn_type = DRM_MODE_CONNECTOR_LVDS; + conn_type = DRM_MODE_CONNECTOR_DPI; break; default: WARN_ON(1); -- cgit From ecfdedd7da5d54416db5ca0f851264dca8736f59 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 18 Jun 2020 12:51:52 +0300 Subject: drm/omap: force runtime PM suspend on system suspend Use SET_LATE_SYSTEM_SLEEP_PM_OPS in DSS submodules to force runtime PM suspend and resume. We use suspend late version so that omapdrm's system suspend callback is called first, as that will disable all the display outputs after which it's safe to force DSS into suspend. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20200618095153.611071-1-tomi.valkeinen@ti.com Acked-by: Tony Lindgren Fixes: cef766300353 ("drm/omap: Prepare DSS for probing without legacy platform data") Cc: stable@vger.kernel.org # v5.7+ Tested-by: Tony Lindgren --- drivers/gpu/drm/omapdrm/dss/dispc.c | 1 + drivers/gpu/drm/omapdrm/dss/dsi.c | 1 + drivers/gpu/drm/omapdrm/dss/dss.c | 1 + drivers/gpu/drm/omapdrm/dss/venc.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index 6639ee9b05d3..48593932bddf 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -4915,6 +4915,7 @@ static int dispc_runtime_resume(struct device *dev) static const struct dev_pm_ops dispc_pm_ops = { .runtime_suspend = dispc_runtime_suspend, .runtime_resume = dispc_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dispchw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 79ddfbfd1b58..eeccf40bae41 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5467,6 +5467,7 @@ static int dsi_runtime_resume(struct device *dev) static const struct dev_pm_ops dsi_pm_ops = { .runtime_suspend = dsi_runtime_suspend, .runtime_resume = dsi_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dsihw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 4d5739fa4a5d..6ccbc29c4ce4 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1614,6 +1614,7 @@ static int dss_runtime_resume(struct device *dev) static const struct dev_pm_ops dss_pm_ops = { .runtime_suspend = dss_runtime_suspend, .runtime_resume = dss_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dsshw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c index 9701843ccf09..01ee6c50b663 100644 --- a/drivers/gpu/drm/omapdrm/dss/venc.c +++ b/drivers/gpu/drm/omapdrm/dss/venc.c @@ -902,6 +902,7 @@ static int venc_runtime_resume(struct device *dev) static const struct dev_pm_ops venc_pm_ops = { .runtime_suspend = venc_runtime_suspend, .runtime_resume = venc_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static const struct of_device_id venc_of_match[] = { -- cgit From b5ac98cbb8e5e30c34ebc837d1e5a3982d2b5f5c Mon Sep 17 00:00:00 2001 From: Marius Iacob Date: Sat, 1 Aug 2020 15:34:46 +0300 Subject: drm: Added orientation quirk for ASUS tablet model T103HAF Signed-off-by: Marius Iacob Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200801123445.1514567-1-themariusus@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d00ea384dcbf..58f5dc2f6dd5 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -121,6 +121,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T101HA"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Asus T103HAF */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), -- cgit From a34a0a632dd991a371fec56431d73279f9c54029 Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Sun, 19 Jul 2020 23:45:45 +0800 Subject: drm: fix drm_dp_mst_port refcount leaks in drm_dp_mst_allocate_vcpi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_dp_mst_allocate_vcpi() invokes drm_dp_mst_topology_get_port_validated(), which increases the refcount of the "port". These reference counting issues take place in two exception handling paths separately. Either when “slots” is less than 0 or when drm_dp_init_vcpi() returns a negative value, the function forgets to reduce the refcnt increased drm_dp_mst_topology_get_port_validated(), which results in a refcount leak. Fix these issues by pulling up the error handling when "slots" is less than 0, and calling drm_dp_mst_topology_put_port() before termination when drm_dp_init_vcpi() returns a negative value. Fixes: 1e797f556c61 ("drm/dp: Split drm_dp_mst_allocate_vcpi") Cc: # v4.12+ Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Xin Xiong Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20200719154545.GA41231@xin-virtual-machine --- drivers/gpu/drm/drm_dp_mst_topology.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 09b32289497e..b23cb2fec3f3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4308,11 +4308,11 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, { int ret; - port = drm_dp_mst_topology_get_port_validated(mgr, port); - if (!port) + if (slots < 0) return false; - if (slots < 0) + port = drm_dp_mst_topology_get_port_validated(mgr, port); + if (!port) return false; if (port->vcpi.vcpi > 0) { @@ -4328,6 +4328,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, if (ret) { DRM_DEBUG_KMS("failed to init vcpi slots=%d max=63 ret=%d\n", DIV_ROUND_UP(pbn, mgr->pbn_div), ret); + drm_dp_mst_topology_put_port(port); goto out; } DRM_DEBUG_KMS("initing vcpi for pbn=%d slots=%d\n", -- cgit From b748fc7a8763a5b3f8149f12c45711cd73ef8176 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Jul 2020 10:34:20 -0400 Subject: ceph: set sec_context xattr on symlink creation Symlink inodes should have the security context set in their xattrs on creation. We already set the context on creation, but we don't attach the pagelist. The effect is that symlink inodes don't get an SELinux context set on them at creation, so they end up unlabeled instead of inheriting the proper context. Make it do so. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 39f5311404b0..060bdcc5ce32 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -930,6 +930,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; + } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); -- cgit From a0102bda5bc0991c5c8c7c07770b236894a810fd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Jul 2020 11:03:55 -0400 Subject: ceph: move sb->wb_pagevec_pool to be a global mempool When doing some testing recently, I hit some page allocation failures on mount, when creating the wb_pagevec_pool for the mount. That requires 128k (32 contiguous pages), and after thrashing the memory during an xfstests run, sometimes that would fail. 128k for each mount seems like a lot to hold in reserve for a rainy day, so let's change this to a global mempool that gets allocated when the module is plugged in. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 23 +++++++++++------------ fs/ceph/super.c | 22 ++++++++-------------- fs/ceph/super.h | 2 -- include/linux/ceph/libceph.h | 1 + 4 files changed, 20 insertions(+), 28 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 01ad09733ac7..6ea761c84494 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req) osd_data = osd_req_op_extent_osd_data(req, 0); if (osd_data->pages_from_pool) - mempool_free(osd_data->pages, - ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); + mempool_free(osd_data->pages, ceph_wb_pagevec_pool); else kfree(osd_data->pages); ceph_osdc_put_request(req); @@ -955,10 +954,10 @@ retry: int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; - mempool_t *pool = NULL; /* Becomes non-null if mempool used */ struct page *page; pgoff_t strip_unit_end = 0; u64 offset = 0, len = 0; + bool from_pool = false; max_pages = wsize >> PAGE_SHIFT; @@ -1057,16 +1056,16 @@ get_more_pages: sizeof(*pages), GFP_NOFS); if (!pages) { - pool = fsc->wb_pagevec_pool; - pages = mempool_alloc(pool, GFP_NOFS); + from_pool = true; + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } len = 0; } else if (page->index != (offset + len) >> PAGE_SHIFT) { - if (num_ops >= (pool ? CEPH_OSD_SLAB_OPS : - CEPH_OSD_MAX_OPS)) { + if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : + CEPH_OSD_MAX_OPS)) { redirty_page_for_writepage(wbc, page); unlock_page(page); break; @@ -1161,7 +1160,7 @@ new_request: offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, - !!pool, false); + from_pool, false); osd_req_op_extent_update(req, op_idx, len); len = 0; @@ -1188,12 +1187,12 @@ new_request: dout("writepages got pages at %llu~%llu\n", offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, - 0, !!pool, false); + 0, from_pool, false); osd_req_op_extent_update(req, op_idx, len); BUG_ON(op_idx + 1 != req->r_num_ops); - pool = NULL; + from_pool = false; if (i < locked_pages) { BUG_ON(num_ops <= req->r_num_ops); num_ops -= req->r_num_ops; @@ -1204,8 +1203,8 @@ new_request: pages = kmalloc_array(locked_pages, sizeof(*pages), GFP_NOFS); if (!pages) { - pool = fsc->wb_pagevec_pool; - pages = mempool_alloc(pool, GFP_NOFS); + from_pool = true; + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } memcpy(pages, data_pages + i, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 585aecea5cad..7ec0e6d03d10 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -637,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt) { struct ceph_fs_client *fsc; - int page_count; - size_t size; int err; fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); @@ -686,22 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc->cap_wq) goto fail_inode_wq; - /* set up mempools */ - err = -ENOMEM; - page_count = fsc->mount_options->wsize >> PAGE_SHIFT; - size = sizeof (struct page *) * (page_count ? page_count : 1); - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); - if (!fsc->wb_pagevec_pool) - goto fail_cap_wq; - spin_lock(&ceph_fsc_lock); list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); spin_unlock(&ceph_fsc_lock); return fsc; -fail_cap_wq: - destroy_workqueue(fsc->cap_wq); fail_inode_wq: destroy_workqueue(fsc->inode_wq); fail_client: @@ -732,8 +720,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) destroy_workqueue(fsc->inode_wq); destroy_workqueue(fsc->cap_wq); - mempool_destroy(fsc->wb_pagevec_pool); - destroy_mount_options(fsc->mount_options); ceph_destroy_client(fsc->client); @@ -752,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; struct kmem_cache *ceph_dir_file_cachep; struct kmem_cache *ceph_mds_request_cachep; +mempool_t *ceph_wb_pagevec_pool; static void ceph_inode_init_once(void *foo) { @@ -796,6 +783,10 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + if (!ceph_wb_pagevec_pool) + goto bad_pagevec_pool; + error = ceph_fscache_register(); if (error) goto bad_fscache; @@ -804,6 +795,8 @@ static int __init init_caches(void) bad_fscache: kmem_cache_destroy(ceph_mds_request_cachep); +bad_pagevec_pool: + mempool_destroy(ceph_wb_pagevec_pool); bad_mds_req: kmem_cache_destroy(ceph_dir_file_cachep); bad_dir_file: @@ -834,6 +827,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_file_cachep); kmem_cache_destroy(ceph_dir_file_cachep); kmem_cache_destroy(ceph_mds_request_cachep); + mempool_destroy(ceph_wb_pagevec_pool); ceph_fscache_unregister(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9001a896ae8c..4c3c964b1c54 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -118,8 +118,6 @@ struct ceph_fs_client { struct ceph_mds_client *mdsc; - /* writeback */ - mempool_t *wb_pagevec_pool; atomic_long_t writeback_count; struct workqueue_struct *inode_wq; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e5ed1c541e7f..c8645f0b797d 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; extern struct kmem_cache *ceph_dir_file_cachep; extern struct kmem_cache *ceph_mds_request_cachep; +extern mempool_t *ceph_wb_pagevec_pool; /* ceph_common.c */ extern bool libceph_compatible(void *data); -- cgit From 224c7b6778fe08e1880ef88867051bec0a154d6c Mon Sep 17 00:00:00 2001 From: Yanhu Cao Date: Fri, 31 Jul 2020 16:25:13 +0800 Subject: ceph: use frag's MDS in either mode When doing some tests with multiple mds, we were seeing many mds forwarding requests between them, causing clients to resend. If the request is a modification operation and the mode is set to USE_AUTH_MDS, then the auth mds should be selected to handle the request. If auth mds for frag is already set, then it should be returned directly without further processing. The current logic is wrong because it only returns directly if mode is USE_AUTH_MDS, but we want to do that for all modes. If we don't, then when the frag's mds is not equal to cap session's mds, the request will get sent to the wrong MDS needlessly. Drop the mode check in this condition. Signed-off-by: Yanhu Cao Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 590822fab767..1095802ad9bd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1103,8 +1103,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) { - if (mode == USE_ANY_MDS && - !ceph_mdsmap_is_laggy(mdsc->mdsmap, + if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) goto out; } -- cgit From 9bf1306257589b3a5ad46175538af9922d94e26f Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 27 Jul 2020 15:46:14 +1200 Subject: rtc: ds1307: provide an indication that the watchdog has fired There's not much feedback when the ds1388 watchdog fires. Generally it yanks on the reset line and the board reboots. Capture the fact that the watchdog has fired in the past so that userspace can retrieve it via WDIOC_GETBOOTSTATUS. This should help distinguish a watchdog triggered reset from a power interruption. Signed-off-by: Chris Packham Signed-off-by: Alexandre Belloni Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200727034615.19755-1-chris.packham@alliedtelesis.co.nz --- drivers/rtc/rtc-ds1307.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 49702942bb08..54c85cdd019d 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1668,6 +1668,8 @@ static const struct watchdog_ops ds1388_wdt_ops = { static void ds1307_wdt_register(struct ds1307 *ds1307) { struct watchdog_device *wdt; + int err; + int val; if (ds1307->type != ds_1388) return; @@ -1676,6 +1678,10 @@ static void ds1307_wdt_register(struct ds1307 *ds1307) if (!wdt) return; + err = regmap_read(ds1307->regmap, DS1388_REG_FLAG, &val); + if (!err && val & DS1388_BIT_WF) + wdt->bootstatus = WDIOF_CARDRESET; + wdt->info = &ds1388_wdt_info; wdt->ops = &ds1388_wdt_ops; wdt->timeout = 99; -- cgit From a8ac78357d9b71a5608c609094ad3b114a46ccd4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 3 Aug 2020 18:00:08 +0300 Subject: scsi: target: Make iscsit_register_transport() return void This function always returns 0. We can make it return void to simplify the code. Also, no caller ever checks the return value of this function. Link: https://lore.kernel.org/r/20200803150008.83920-1-maxg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_transport.c | 4 +--- include/target/iscsi/iscsi_transport.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c index 036940518bfe..27c85f260459 100644 --- a/drivers/target/iscsi/iscsi_target_transport.c +++ b/drivers/target/iscsi/iscsi_target_transport.c @@ -31,7 +31,7 @@ void iscsit_put_transport(struct iscsit_transport *t) module_put(t->owner); } -int iscsit_register_transport(struct iscsit_transport *t) +void iscsit_register_transport(struct iscsit_transport *t) { INIT_LIST_HEAD(&t->t_node); @@ -40,8 +40,6 @@ int iscsit_register_transport(struct iscsit_transport *t) mutex_unlock(&transport_mutex); pr_debug("Registered iSCSI transport: %s\n", t->name); - - return 0; } EXPORT_SYMBOL(iscsit_register_transport); diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 75bee29fd7dd..b8feba7ffebc 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -43,7 +43,7 @@ static inline void *iscsit_priv_cmd(struct iscsi_cmd *cmd) * From iscsi_target_transport.c */ -extern int iscsit_register_transport(struct iscsit_transport *); +extern void iscsit_register_transport(struct iscsit_transport *); extern void iscsit_unregister_transport(struct iscsit_transport *); extern struct iscsit_transport *iscsit_get_transport(int); extern void iscsit_put_transport(struct iscsit_transport *); -- cgit From e95b4789ff4380733006836d28e554dc296b2298 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Wed, 29 Jul 2020 01:18:24 -0700 Subject: scsi: fcoe: Memory leak fix in fcoe_sysfs_fcf_del() In fcoe_sysfs_fcf_del(), we first deleted the fcf from the list and then freed it if ctlr_dev was not NULL. This was causing a memory leak. Free the fcf even if ctlr_dev is NULL. Link: https://lore.kernel.org/r/20200729081824.30996-3-jhasan@marvell.com Reviewed-by: Girish Basrur Reviewed-by: Santosh Vernekar Reviewed-by: Saurav Kashyap Reviewed-by: Shyam Sundar Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 85c7959961cc..1409c7687853 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -256,9 +256,9 @@ static void fcoe_sysfs_fcf_del(struct fcoe_fcf *new) WARN_ON(!fcf_dev); new->fcf_dev = NULL; fcoe_fcf_device_delete(fcf_dev); - kfree(new); mutex_unlock(&cdev->lock); } + kfree(new); } /** -- cgit From ec007ef40abb6a164d148b0dc19789a7a2de2cc8 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Wed, 29 Jul 2020 01:18:23 -0700 Subject: scsi: libfc: Free skb in fc_disc_gpn_id_resp() for valid cases In fc_disc_gpn_id_resp(), skb is supposed to get freed in all cases except for PTR_ERR. However, in some cases it didn't. This fix is to call fc_frame_free(fp) before function returns. Link: https://lore.kernel.org/r/20200729081824.30996-2-jhasan@marvell.com Reviewed-by: Girish Basrur Reviewed-by: Santosh Vernekar Reviewed-by: Saurav Kashyap Reviewed-by: Shyam Sundar Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_disc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 19721db23283..d8cbc9c0e766 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -581,8 +581,12 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, if (PTR_ERR(fp) == -FC_EX_CLOSED) goto out; - if (IS_ERR(fp)) - goto redisc; + if (IS_ERR(fp)) { + mutex_lock(&disc->disc_mutex); + fc_disc_restart(disc); + mutex_unlock(&disc->disc_mutex); + goto out; + } cp = fc_frame_payload_get(fp, sizeof(*cp)); if (!cp) @@ -609,7 +613,7 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, new_rdata->disc_id = disc->disc_id; fc_rport_login(new_rdata); } - goto out; + goto free_fp; } rdata->disc_id = disc->disc_id; mutex_unlock(&rdata->rp_mutex); @@ -626,6 +630,8 @@ redisc: fc_disc_restart(disc); mutex_unlock(&disc->disc_mutex); } +free_fp: + fc_frame_free(fp); out: kref_put(&rdata->kref, fc_rport_destroy); if (!IS_ERR(fp)) -- cgit From a3d8a2573687bc8955986e4e011ebfe19cc71054 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 31 Jul 2020 14:49:28 +0900 Subject: scsi: sd_zbc: Improve zone revalidation Currently, for zoned disks, since blk_revalidate_disk_zones() requires the disk capacity to be set already to operate correctly, zones revalidation can only be done on the second revalidate scan once the gendisk capacity is set at the end of the first scan. As a result, if zone revalidation fails, there is no second chance to recover from the failure and the disk capacity is changed to 0, with the disk left unusable. This can be improved by shuffling around code, specifically, by moving the call to sd_zbc_revalidate_zones() from sd_zbc_read_zones() to the end of sd_revalidate_disk(), after set_capacity_revalidate_and_notify() is called to set the gendisk capacity. With this change, if sd_zbc_revalidate_zones() fails on the first scan, the second scan will call it again to recover, if possible. Using the new struct scsi_disk fields rev_nr_zones and rev_zone_blocks, sd_zbc_revalidate_zones() does actual work only if it detects a change with the disk zone configuration. This means that for a successful zones revalidation on the first scan, the second scan will not cause another heavy full check. While at it, remove the unecesary "extern" declaration of sd_zbc_read_zones(). Link: https://lore.kernel.org/r/20200731054928.668547-1-damien.lemoal@wdc.com Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 10 ++++-- drivers/scsi/sd.h | 11 ++++-- drivers/scsi/sd_zbc.c | 93 ++++++++++++++++++++++++--------------------------- 3 files changed, 60 insertions(+), 54 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index acde0ca35769..95018e650f2d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2578,8 +2578,6 @@ sd_print_capacity(struct scsi_disk *sdkp, sd_printk(KERN_NOTICE, sdkp, "%u-byte physical blocks\n", sdkp->physical_block_size); - - sd_zbc_print_zones(sdkp); } /* called with buffer of length 512 */ @@ -3220,6 +3218,14 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_config_write_same(sdkp); kfree(buffer); + /* + * For a zoned drive, revalidating the zones can be done only once + * the gendisk capacity is set. So if this fails, set back the gendisk + * capacity to 0. + */ + if (sd_zbc_revalidate_zones(sdkp)) + set_capacity_revalidate_and_notify(disk, 0, false); + out: return 0; } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 27c0f4e9b1d4..4933e7daf17d 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -75,7 +75,9 @@ struct scsi_disk { struct opal_dev *opal_dev; #ifdef CONFIG_BLK_DEV_ZONED u32 nr_zones; + u32 rev_nr_zones; u32 zone_blocks; + u32 rev_zone_blocks; u32 zones_optimal_open; u32 zones_optimal_nonseq; u32 zones_max_open; @@ -215,8 +217,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) int sd_zbc_init_disk(struct scsi_disk *sdkp); void sd_zbc_release_disk(struct scsi_disk *sdkp); -extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); -extern void sd_zbc_print_zones(struct scsi_disk *sdkp); +int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); +int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all); unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, @@ -242,7 +244,10 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, return 0; } -static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} +static inline int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) +{ + return 0; +} static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 35d929b00a0b..fee0815d7e65 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -633,6 +633,23 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, return 0; } +static void sd_zbc_print_zones(struct scsi_disk *sdkp) +{ + if (!sd_is_zoned(sdkp) || !sdkp->capacity) + return; + + if (sdkp->capacity & (sdkp->zone_blocks - 1)) + sd_printk(KERN_NOTICE, sdkp, + "%u zones of %u logical blocks + 1 runt zone\n", + sdkp->nr_zones - 1, + sdkp->zone_blocks); + else + sd_printk(KERN_NOTICE, sdkp, + "%u zones of %u logical blocks\n", + sdkp->nr_zones, + sdkp->zone_blocks); +} + static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); @@ -640,36 +657,31 @@ static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset); } -static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp, - u32 zone_blocks, - unsigned int nr_zones) +int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) { struct gendisk *disk = sdkp->disk; + struct request_queue *q = disk->queue; + u32 zone_blocks = sdkp->rev_zone_blocks; + unsigned int nr_zones = sdkp->rev_nr_zones; + u32 max_append; int ret = 0; + if (!sd_is_zoned(sdkp)) + return 0; + /* * Make sure revalidate zones are serialized to ensure exclusive * updates of the scsi disk data. */ mutex_lock(&sdkp->rev_mutex); - /* - * Revalidate the disk zones to update the device request queue zone - * bitmaps and the zone write pointer offset array. Do this only once - * the device capacity is set on the second revalidate execution for - * disk scan or if something changed when executing a normal revalidate. - */ - if (sdkp->first_scan) { - sdkp->zone_blocks = zone_blocks; - sdkp->nr_zones = nr_zones; - goto unlock; - } - if (sdkp->zone_blocks == zone_blocks && sdkp->nr_zones == nr_zones && disk->queue->nr_zones == nr_zones) goto unlock; + sdkp->zone_blocks = zone_blocks; + sdkp->nr_zones = nr_zones; sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO); if (!sdkp->rev_wp_offset) { ret = -ENOMEM; @@ -681,6 +693,21 @@ static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp, kvfree(sdkp->rev_wp_offset); sdkp->rev_wp_offset = NULL; + if (ret) { + sdkp->zone_blocks = 0; + sdkp->nr_zones = 0; + sdkp->capacity = 0; + goto unlock; + } + + max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks), + q->limits.max_segments << (PAGE_SHIFT - 9)); + max_append = min_t(u32, max_append, queue_max_hw_sectors(q)); + + blk_queue_max_zone_append_sectors(q, max_append); + + sd_zbc_print_zones(sdkp); + unlock: mutex_unlock(&sdkp->rev_mutex); @@ -693,7 +720,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) struct request_queue *q = disk->queue; unsigned int nr_zones; u32 zone_blocks = 0; - u32 max_append; int ret; if (!sd_is_zoned(sdkp)) @@ -722,22 +748,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) sdkp->device->use_16_for_rw = 1; sdkp->device->use_10_for_rw = 0; - ret = sd_zbc_revalidate_zones(sdkp, zone_blocks, nr_zones); - if (ret) - goto err; - - /* - * On the first scan 'chunk_sectors' isn't setup yet, so calling - * blk_queue_max_zone_append_sectors() will result in a WARN(). Defer - * this setting to the second scan. - */ - if (sdkp->first_scan) - return 0; - - max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks), - q->limits.max_segments << (PAGE_SHIFT - 9)); - - blk_queue_max_zone_append_sectors(q, max_append); + sdkp->rev_nr_zones = nr_zones; + sdkp->rev_zone_blocks = zone_blocks; return 0; @@ -747,23 +759,6 @@ err: return ret; } -void sd_zbc_print_zones(struct scsi_disk *sdkp) -{ - if (!sd_is_zoned(sdkp) || !sdkp->capacity) - return; - - if (sdkp->capacity & (sdkp->zone_blocks - 1)) - sd_printk(KERN_NOTICE, sdkp, - "%u zones of %u logical blocks + 1 runt zone\n", - sdkp->nr_zones - 1, - sdkp->zone_blocks); - else - sd_printk(KERN_NOTICE, sdkp, - "%u zones of %u logical blocks\n", - sdkp->nr_zones, - sdkp->zone_blocks); -} - int sd_zbc_init_disk(struct scsi_disk *sdkp) { if (!sd_is_zoned(sdkp)) -- cgit From 8a8fb8977ea2bdf160b9e475cf2350abbad4f0e7 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 3 Aug 2020 19:34:02 +0800 Subject: scsi: scsi_transport_sas: Add spaces around binary operator "|" According to the kernel coding style, use one space around the binary "|" operator. Add spaces around it. Link: https://lore.kernel.org/r/1596454442-220565-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index e443dee43bcf..c9abed8429c9 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1526,7 +1526,7 @@ int sas_rphy_add(struct sas_rphy *rphy) list_add_tail(&rphy->list, &sas_host->rphy_list); if (identify->device_type == SAS_END_DEVICE && (identify->target_port_protocols & - (SAS_PROTOCOL_SSP|SAS_PROTOCOL_STP|SAS_PROTOCOL_SATA))) + (SAS_PROTOCOL_SSP | SAS_PROTOCOL_STP | SAS_PROTOCOL_SATA))) rphy->scsi_target_id = sas_host->next_target_id++; else if (identify->device_type == SAS_END_DEVICE) rphy->scsi_target_id = -1; -- cgit From af6de8c60fe9433afa73cea6fcccdccd98ad3e5e Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Wed, 29 Jul 2020 19:10:11 -0400 Subject: scsi: lpfc: nvmet: Avoid hang / use-after-free again when destroying targetport We cannot wait on a completion object in the lpfc_nvme_targetport structure in the _destroy_targetport() code path because the NVMe/fc transport will free that structure immediately after the .targetport_delete() callback. This results in a use-after-free, and a crash if slub_debug=FZPU is enabled. An earlier fix put put the completion on the stack, but commit 2a0fb340fcc8 ("scsi: lpfc: Correct localport timeout duration error") subsequently changed the code to reference the completion through a pointer in the object rather than the local stack variable. Fix this by using the stack variable directly. Link: https://lore.kernel.org/r/20200729231011.13240-1-emilne@redhat.com Fixes: 2a0fb340fcc8 ("scsi: lpfc: Correct localport timeout duration error") Reviewed-by: James Smart Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index a4430aeeb04a..d4ade7cdb93a 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -2110,7 +2110,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) } tgtp->tport_unreg_cmp = &tport_unreg_cmp; nvmet_fc_unregister_targetport(phba->targetport); - if (!wait_for_completion_timeout(tgtp->tport_unreg_cmp, + if (!wait_for_completion_timeout(&tport_unreg_cmp, msecs_to_jiffies(LPFC_NVMET_WAIT_TMO))) lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "6179 Unreg targetport x%px timeout " -- cgit From 1eaff53649b83aca01d46e034b0590dd0504360a Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 31 Jul 2020 14:56:39 +0800 Subject: scsi: lpfc: Add missing misc_deregister() for lpfc_init() lpfc_init() misses a call misc_deregister() in an error path. Add a label 'unregister' to fix it. Link: https://lore.kernel.org/r/20200731065639.190646-1-jingxiangfeng@huawei.com Reviewed-by: James Smart Signed-off-by: Jing Xiangfeng Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index db768e28d3f9..03dadc4fa7cb 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -14099,17 +14099,18 @@ lpfc_init(void) printk(KERN_ERR "Could not register lpfcmgmt device, " "misc_register returned with status %d", error); + error = -ENOMEM; lpfc_transport_functions.vport_create = lpfc_vport_create; lpfc_transport_functions.vport_delete = lpfc_vport_delete; lpfc_transport_template = fc_attach_transport(&lpfc_transport_functions); if (lpfc_transport_template == NULL) - return -ENOMEM; + goto unregister; lpfc_vport_transport_template = fc_attach_transport(&lpfc_vport_transport_functions); if (lpfc_vport_transport_template == NULL) { fc_release_transport(lpfc_transport_template); - return -ENOMEM; + goto unregister; } lpfc_nvme_cmd_template(); lpfc_nvmet_cmd_template(); @@ -14135,6 +14136,8 @@ unwind: cpuhp_failure: fc_release_transport(lpfc_transport_template); fc_release_transport(lpfc_vport_transport_template); +unregister: + misc_deregister(&lpfc_mgmt_dev); return error; } -- cgit From a1e4d3d8aef92d0099b1a559e21aad4789dbe13c Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:22 -0700 Subject: scsi: lpfc: Fix FCoE speed reporting Current Link speed was shown as "unknown" in sysfs for FCoE ports. In this scenario, the port was working in 20G speed, which happens to not be a speed handled by the driver. Add support for all possible link speeds that could get reported from port_speed field in link state ACQE. Additionally, as supported_speeds can't be manipulated via the FCoE driver on a converged ethernet port (it must be managed by the nic function), don't fill out the supported_speeds field for the fc host object in sysfs. Revise debug logging to report Link speed mgmt valuess. Link: https://lore.kernel.org/r/20200803210229.23063-2-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 6 ++++++ drivers/scsi/lpfc/lpfc_init.c | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index a62c60ca6477..cc2c907777d2 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -6679,9 +6679,15 @@ lpfc_get_host_speed(struct Scsi_Host *shost) } } else if (lpfc_is_link_up(phba) && (phba->hba_flag & HBA_FCOE_MODE)) { switch (phba->fc_linkspeed) { + case LPFC_ASYNC_LINK_SPEED_1GBPS: + fc_host_speed(shost) = FC_PORTSPEED_1GBIT; + break; case LPFC_ASYNC_LINK_SPEED_10GBPS: fc_host_speed(shost) = FC_PORTSPEED_10GBIT; break; + case LPFC_ASYNC_LINK_SPEED_20GBPS: + fc_host_speed(shost) = FC_PORTSPEED_20GBIT; + break; case LPFC_ASYNC_LINK_SPEED_25GBPS: fc_host_speed(shost) = FC_PORTSPEED_25GBIT; break; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 03dadc4fa7cb..d0c3556138fd 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4577,6 +4577,13 @@ static void lpfc_host_supported_speeds_set(struct Scsi_Host *shost) struct lpfc_hba *phba = vport->phba; fc_host_supported_speeds(shost) = 0; + /* + * Avoid reporting supported link speed for FCoE as it can't be + * controlled via FCoE. + */ + if (phba->hba_flag & HBA_FCOE_MODE) + return; + if (phba->lmt & LMT_128Gb) fc_host_supported_speeds(shost) |= FC_PORTSPEED_128GBIT; if (phba->lmt & LMT_64Gb) @@ -4910,6 +4917,9 @@ lpfc_sli4_port_speed_parse(struct lpfc_hba *phba, uint32_t evt_code, case LPFC_ASYNC_LINK_SPEED_40GBPS: port_speed = 40000; break; + case LPFC_ASYNC_LINK_SPEED_100GBPS: + port_speed = 100000; + break; default: port_speed = 0; } @@ -8589,7 +8599,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) "VPI(B:%d M:%d) " "VFI(B:%d M:%d) " "RPI(B:%d M:%d) " - "FCFI:%d EQ:%d CQ:%d WQ:%d RQ:%d\n", + "FCFI:%d EQ:%d CQ:%d WQ:%d RQ:%d lmt:x%x\n", phba->sli4_hba.extents_in_use, phba->sli4_hba.max_cfg_param.xri_base, phba->sli4_hba.max_cfg_param.max_xri, @@ -8603,7 +8613,8 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) phba->sli4_hba.max_cfg_param.max_eq, phba->sli4_hba.max_cfg_param.max_cq, phba->sli4_hba.max_cfg_param.max_wq, - phba->sli4_hba.max_cfg_param.max_rq); + phba->sli4_hba.max_cfg_param.max_rq, + phba->lmt); /* * Calculate queue resources based on how -- cgit From 9e3e365a92d329e5e69e3f92037537be6276992b Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:23 -0700 Subject: scsi: lpfc: Fix no message shown for lpfc_hdw_queue out of range value If module parameters override the default configuration settings for hardware queues or irqs, the driver was not notifying the change from defaults. Revise such that any changes will result in a kernel log message. Link: https://lore.kernel.org/r/20200803210229.23063-3-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 20 +++++++++++++++++--- drivers/scsi/lpfc/lpfc_init.c | 3 ++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index cc2c907777d2..ece6c250ebaf 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -7412,12 +7412,26 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba) { - if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu) + int logit = 0; + + if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu) { phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu; - if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu) + logit = 1; + } + if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu) { phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu; - if (phba->cfg_irq_chann > phba->cfg_hdw_queue) + logit = 1; + } + if (phba->cfg_irq_chann > phba->cfg_hdw_queue) { phba->cfg_irq_chann = phba->cfg_hdw_queue; + logit = 1; + } + if (logit) + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "2006 Reducing Queues - CPU limitation: " + "IRQ %d HDWQ %d\n", + phba->cfg_irq_chann, + phba->cfg_hdw_queue); if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME && phba->nvmet_support) { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index d0c3556138fd..05ace6916b66 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8637,7 +8637,8 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) if ((phba->cfg_irq_chann > qmin) || (phba->cfg_hdw_queue > qmin)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "2005 Reducing Queues: " + "2005 Reducing Queues - " + "FW resource limitation: " "WQ %d CQ %d EQ %d: min %d: " "IRQ %d HDWQ %d\n", phba->sli4_hba.max_cfg_param.max_wq, -- cgit From 8ccd6926db7dd865dd6eabe3c25ae2696bffc07e Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:24 -0700 Subject: scsi: lpfc: Fix RSCN timeout due to incorrect gidft counter In configs with a large number of initiators in the same zone (>250), RSCN timeouts are seen when creating or deleting vports: lpfc 0000:07:00.1: 5:(0):0231 RSCN timeout Data: x0 x3 During RSCN processing driver issues GID_FT command to nameserver. A counter for number of simultaneous GID_FT commands is maintained (an unsigned value). The counter is incremented when the GID_FT is issued. If the GID_FT command fails for some reason the driver retries the GID_FT from the completion call back. But the counter was decremented before the retry was issued. When the second GID_FT completes, the callback again tries to decrement the counter, possibly wrapping to a very large non-zero value, which causes the RSCN cleanup code to not execute. Thus the RSCN timeout failure. Do not decrement the counter on a retry. Also add defensive checks to ensure the counter is not decremented if already zero. Link: https://lore.kernel.org/r/20200803210229.23063-4-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index dd9f2bf54edd..ef2015fad2d5 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -713,7 +713,8 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* This is a GID_FT completing so the gidft_inp counter was * incremented before the GID_FT was issued to the wire. */ - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; /* * Skip processing the NS response @@ -741,11 +742,14 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, goto out; /* CT command is being retried */ - vport->gidft_inp--; rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, vport->fc_ns_retry, type); if (rc == 0) goto out; + else { /* Unable to send NS cmd */ + if (vport->gidft_inp) + vport->gidft_inp--; + } } if (vport->fc_flag & FC_RSCN_MODE) lpfc_els_flush_rscn(vport); @@ -825,7 +829,8 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (uint32_t) CTrsp->ReasonCode, (uint32_t) CTrsp->Explanation); } - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; } lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, @@ -918,7 +923,8 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* This is a GID_PT completing so the gidft_inp counter was * incremented before the GID_PT was issued to the wire. */ - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; /* * Skip processing the NS response @@ -942,11 +948,14 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, vport->fc_ns_retry++; /* CT command is being retried */ - vport->gidft_inp--; rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_PT, vport->fc_ns_retry, GID_PT_N_PORT); if (rc == 0) goto out; + else { /* Unable to send NS cmd */ + if (vport->gidft_inp) + vport->gidft_inp--; + } } if (vport->fc_flag & FC_RSCN_MODE) lpfc_els_flush_rscn(vport); @@ -1027,7 +1036,8 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (uint32_t)CTrsp->ReasonCode, (uint32_t)CTrsp->Explanation); } - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; } lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, -- cgit From 24411fcd6fe7b55d1a48b80a9ffeb3564ff029a7 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:25 -0700 Subject: scsi: lpfc: Fix oops when unloading driver while running mds diags While mds diagnostic tests are running, if the driver is requested to be unloaded, oops or hangs are observed. The driver doesn't terminate the processing of diag frames when the unload is started. As such: oops may be seen for __lpfc_sli_release_iocbq_s4 because ring memory is referenced that was already freed; or hangs see in lpfc_nvme_wait_for_io_drain as ios no longer complete. If unloading, don't process diag frames. Just clean them up. Link: https://lore.kernel.org/r/20200803210229.23063-5-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 8582b51b0613..4cd7ded656b7 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13650,7 +13650,11 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) { spin_unlock_irqrestore(&phba->hbalock, iflags); /* Handle MDS Loopback frames */ - lpfc_sli4_handle_mds_loopback(phba->pport, dma_buf); + if (!(phba->pport->load_flag & FC_UNLOADING)) + lpfc_sli4_handle_mds_loopback(phba->pport, + dma_buf); + else + lpfc_in_buf_free(phba, &dma_buf->dbuf); break; } @@ -18363,7 +18367,10 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba, fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) { vport = phba->pport; /* Handle MDS Loopback frames */ - lpfc_sli4_handle_mds_loopback(vport, dmabuf); + if (!(phba->pport->load_flag & FC_UNLOADING)) + lpfc_sli4_handle_mds_loopback(vport, dmabuf); + else + lpfc_in_buf_free(phba, &dmabuf->dbuf); return; } -- cgit From 678768da988081a922b0645b9bd552f6cdd982cc Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:26 -0700 Subject: scsi: lpfc: Fix retry of PRLI when status indicates its unsupported With port bounce/address swaps and timing between initiator GID queries vs remote port FC4 support registrations, the driver may be in a situation where it sends PRLIs for both FCP and NVME even though the target may not support one of the protocols. In this case, the remote port will reject the PRLI and usually indicate it does not support the request. However, the driver currently ignores the status of the failure and immediately retries the PRLI, which is pointless. In the case of this one remote port, the reception of the PRLI retry caused it to decide to send a LOGO. The LOGO restarted the process and the same results happened. It made the remote port undiscoverable to either protocol. Add logic to detect the non-support status and not attempt the retry of the PRLI. Link: https://lore.kernel.org/r/20200803210229.23063-6-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 85d4e4000c25..48dc63f22cca 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3937,10 +3937,14 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has * rejected the PRLI - can't do it now. Delay - * for 1 second and try again - don't care about - * the explanation. + * for 1 second and try again. + * + * However, if explanation is REQ_UNSUPPORTED there's + * no point to retry PRLI. */ - if (cmd == ELS_CMD_PRLI || cmd == ELS_CMD_NVMEPRLI) { + if ((cmd == ELS_CMD_PRLI || cmd == ELS_CMD_NVMEPRLI) && + stat.un.b.lsRjtRsnCodeExp != + LSEXP_REQ_UNSUPPORTED) { delay = 1000; maxretry = lpfc_max_els_tries + 1; retry = 1; -- cgit From feb3cc57fb63a2b3d3254e8c8ca3fe776c767d38 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:27 -0700 Subject: scsi: lpfc: Fix validation of bsg reply lengths There are a couple of code areas which validate sufficient reply buffer length, but the checks are using the request elements rather than the reply elements. Rework to validate using the reply structures. Link: https://lore.kernel.org/r/20200803210229.23063-7-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_bsg.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 1d88fedaf3f0..6f9d648a9b9c 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2494,13 +2494,12 @@ lpfc_sli4_bsg_link_diag_test(struct bsg_job *job) diag_status_reply = (struct diag_status *) bsg_reply->reply_data.vendor_reply.vendor_rsp; - if (job->reply_len < - sizeof(struct fc_bsg_request) + sizeof(struct diag_status)) { + if (job->reply_len < sizeof(*bsg_reply) + sizeof(*diag_status_reply)) { lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, "3012 Received Run link diag test reply " "below minimum size (%d): reply_len:%d\n", - (int)(sizeof(struct fc_bsg_request) + - sizeof(struct diag_status)), + (int)(sizeof(*bsg_reply) + + sizeof(*diag_status_reply)), job->reply_len); rc = -EINVAL; goto job_error; @@ -3418,8 +3417,7 @@ lpfc_bsg_get_dfc_rev(struct bsg_job *job) event_reply = (struct get_mgmt_rev_reply *) bsg_reply->reply_data.vendor_reply.vendor_rsp; - if (job->reply_len < - sizeof(struct fc_bsg_request) + sizeof(struct get_mgmt_rev_reply)) { + if (job->reply_len < sizeof(*bsg_reply) + sizeof(*event_reply)) { lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, "2741 Received GET_DFC_REV reply below " "minimum size\n"); @@ -5202,8 +5200,8 @@ lpfc_menlo_cmd(struct bsg_job *job) goto no_dd_data; } - if (job->reply_len < - sizeof(struct fc_bsg_request) + sizeof(struct menlo_response)) { + if (job->reply_len < sizeof(*bsg_reply) + + sizeof(struct menlo_response)) { lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, "2785 Received MENLO_CMD reply below " "minimum size\n"); @@ -5359,9 +5357,7 @@ lpfc_forced_link_speed(struct bsg_job *job) forced_reply = (struct forced_link_speed_support_reply *) bsg_reply->reply_data.vendor_reply.vendor_rsp; - if (job->reply_len < - sizeof(struct fc_bsg_request) + - sizeof(struct forced_link_speed_support_reply)) { + if (job->reply_len < sizeof(*bsg_reply) + sizeof(*forced_reply)) { lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, "0049 Received FORCED_LINK_SPEED reply below " "minimum size\n"); @@ -5715,8 +5711,7 @@ lpfc_get_trunk_info(struct bsg_job *job) event_reply = (struct lpfc_trunk_info *) bsg_reply->reply_data.vendor_reply.vendor_rsp; - if (job->reply_len < - sizeof(struct fc_bsg_request) + sizeof(struct lpfc_trunk_info)) { + if (job->reply_len < sizeof(*bsg_reply) + sizeof(*event_reply)) { lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, "2728 Received GET TRUNK _INFO reply below " "minimum size\n"); -- cgit From 00081c5ca4d52fe787bf7a5fbb3fb5939dd7818e Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:28 -0700 Subject: scsi: lpfc: Fix LUN loss after cable pull On devices that support FCP sequence error recovery, which attempts to preserve the devices login across link bounce, adisc is used for device validation. Turns out the device fc4 type is cleared as part of the link bounce, but the ADISC handling doesn't restore the FC4 support as it normally would with a PRLI. This caused situations where the device wasn't reregistered with the transport thus scan logic and LUN discovery never kicked in. In the ADISC completion handling, reset the fc4 type so that transport port reregistration occurs with the remote port. Link: https://lore.kernel.org/r/20200803210229.23063-8-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nportdisc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index e4c710fe0245..cad53d19cb25 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1745,7 +1745,13 @@ lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport, } } - if (ndlp->nlp_type & NLP_FCP_TARGET) { + if (ndlp->nlp_type & NLP_FCP_TARGET) + ndlp->nlp_fc4_type |= NLP_FC4_FCP; + + if (ndlp->nlp_type & NLP_NVME_TARGET) + ndlp->nlp_fc4_type |= NLP_FC4_NVME; + + if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET)) { ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); } else { -- cgit From 7e0e8be3a1fd1399373224e22d77487e63566704 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:29 -0700 Subject: scsi: lpfc: Update lpfc version to 12.8.0.3 Update lpfc version to 12.8.0.3 Link: https://lore.kernel.org/r/20200803210229.23063-9-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 1987c6666279..20adec4387f0 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.8.0.2" +#define LPFC_DRIVER_VERSION "12.8.0.3" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ -- cgit From ce368536dd614452407dc31e2449eb84681a06af Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Sat, 1 Aug 2020 07:10:39 -0400 Subject: nfs: nfs_file_write() should check for writeback errors The NFS_CONTEXT_ERROR_WRITE flag (as well as the check of said flag) was removed by commit 6fbda89b257f. The absence of an error check allows writes to be continually queued up for a server that may no longer be able to handle them. Fix it by adding an error check using the generic error reporting functions. Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one") Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index bb244fc39d4b..d42657b86d9b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -591,12 +591,14 @@ static const struct vm_operations_struct nfs_file_vm_ops = { .page_mkwrite = nfs_vm_page_mkwrite, }; -static int nfs_need_check_write(struct file *filp, struct inode *inode) +static int nfs_need_check_write(struct file *filp, struct inode *inode, + int error) { struct nfs_open_context *ctx; ctx = nfs_file_open_context(filp); - if (nfs_ctx_key_to_expire(ctx, inode)) + if (nfs_error_is_fatal_on_server(error) || + nfs_ctx_key_to_expire(ctx, inode)) return 1; return 0; } @@ -607,6 +609,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; + errseq_t since; + int error; result = nfs_key_timeout_notify(file, inode); if (result) @@ -631,6 +635,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos > i_size_read(inode)) nfs_revalidate_mapping(inode, file->f_mapping); + since = filemap_sample_wb_err(file->f_mapping); nfs_start_io_write(inode); result = generic_write_checks(iocb, from); if (result > 0) { @@ -649,7 +654,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; /* Return error values */ - if (nfs_need_check_write(file, inode)) { + error = filemap_check_wb_err(file->f_mapping, since); + if (nfs_need_check_write(file, inode, error)) { int err = nfs_wb_all(inode); if (err < 0) result = err; -- cgit From 7de62bc09fe6d100ebd6c931c3f9a6fa7e6ed10f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 15 Jul 2020 13:17:52 -0400 Subject: SUNRPC dont update timeout value on connection reset Current behaviour: every time a v3 operation is re-sent to the server we update (double) the timeout. There is no distinction between whether or not the previous timer had expired before the re-sent happened. Here's the scenario: 1. Client sends a v3 operation 2. Server RST-s the connection (prior to the timeout) (eg., connection is immediately reset) 3. Client re-sends a v3 operation but the timeout is now 120sec. As a result, an application sees 2mins pause before a retry in case server again does not reply. Instead, this patch proposes to keep track off when the minor timeout should happen and if it didn't, then don't update the new timeout. Value is updated based on the previous value to make timeouts predictable. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e64bd8222f55..a603d48d2b2c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -101,6 +101,7 @@ struct rpc_rqst { * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ + unsigned long rq_minortimeo; /* minor timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ ktime_t rq_rtt; /* round-trip time */ unsigned int rq_retries; /* # of retries */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d5cc5db9dbf3..6ba9d5842629 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -607,6 +607,11 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) req->rq_majortimeo += xprt_calc_majortimeo(req); } +static void xprt_reset_minortimeo(struct rpc_rqst *req) +{ + req->rq_minortimeo += req->rq_timeout; +} + static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) { unsigned long time_init; @@ -618,6 +623,7 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) time_init = xprt_abs_ktime_to_jiffies(task->tk_start); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); + req->rq_minortimeo = time_init + req->rq_timeout; } /** @@ -631,6 +637,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req) const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; int status = 0; + if (time_before(jiffies, req->rq_minortimeo)) + return status; if (time_before(jiffies, req->rq_majortimeo)) { if (to->to_exponential) req->rq_timeout <<= 1; @@ -649,6 +657,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) spin_unlock(&xprt->transport_lock); status = -ETIMEDOUT; } + xprt_reset_minortimeo(req); if (req->rq_timeout == 0) { printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); -- cgit From 262e6ae7081df304fc625cf368d5c2cbba2bb991 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 23:33:33 +0200 Subject: modules: inherit TAINT_PROPRIETARY_MODULE If a TAINT_PROPRIETARY_MODULE exports symbol, inherit the taint flag for all modules importing these symbols, and don't allow loading symbols from TAINT_PROPRIETARY_MODULE modules if the module previously imported gplonly symbols. Add a anti-circumvention devices so people don't accidentally get themselves into trouble this way. Comment from Greg: "Ah, the proven-to-be-illegal "GPL Condom" defense :)" [jeyu: pr_info -> pr_err and pr_warn as per discussion] Link: http://lore.kernel.org/r/20200730162957.GA22469@lst.de Acked-by: Daniel Vetter Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 1 + kernel/module.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 30b0f5fcdb3c..e30ed5fa33a7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,6 +389,7 @@ struct module { unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const s32 *gpl_crcs; + bool using_gplonly_symbols; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ diff --git a/kernel/module.c b/kernel/module.c index 656f5ff27088..09bf5a652a47 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1431,6 +1431,24 @@ static int verify_namespace_is_imported(const struct load_info *info, return 0; } +static bool inherit_taint(struct module *mod, struct module *owner) +{ + if (!owner || !test_bit(TAINT_PROPRIETARY_MODULE, &owner->taints)) + return true; + + if (mod->using_gplonly_symbols) { + pr_err("%s: module using GPL-only symbols uses symbols from proprietary module %s.\n", + mod->name, owner->name); + return false; + } + + if (!test_bit(TAINT_PROPRIETARY_MODULE, &mod->taints)) { + pr_warn("%s: module uses symbols from proprietary module %s, inheriting taint.\n", + mod->name, owner->name); + set_bit(TAINT_PROPRIETARY_MODULE, &mod->taints); + } + return true; +} /* Resolve a symbol for this module. I.e. if we find one, record usage. */ static const struct kernel_symbol *resolve_symbol(struct module *mod, @@ -1456,6 +1474,14 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, if (!sym) goto unlock; + if (license == GPL_ONLY) + mod->using_gplonly_symbols = true; + + if (!inherit_taint(mod, owner)) { + sym = NULL; + goto getname; + } + if (!check_version(info, name, mod, crc)) { sym = ERR_PTR(-EINVAL); goto getname; -- cgit From db36e827d8762eaa6aacd2c918a55ac487aa2501 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:23 +0300 Subject: dt-bindings: i2c: add generic properties for GPIO bus recovery The I2C GPIO bus recovery properties consist of two GPIOS and one extra pinctrl state ("gpio" or "recovery"). "recovery" pinctrl state is considered deprecated and "gpio" should be used instead. Not all are mandatory for recovery. Signed-off-by: Codrin Ciubotariu Reviewed-by: Rob Herring [wsa: kept sorting, minor whitespace change] Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index 438ae123107e..a21c359b9f02 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -72,6 +72,16 @@ wants to support one of the below features, it should adapt these bindings. this information to adapt power management to keep the arbitration awake all the time, for example. Can not be combined with 'single-master'. +- pinctrl + add extra pinctrl to configure SCL/SDA pins to GPIO function for bus + recovery, call it "gpio" or "recovery" (deprecated) state + +- scl-gpios + specify the gpio related to SCL pin. Used for GPIO bus recovery. + +- sda-gpios + specify the gpio related to SDA pin. Optional for GPIO bus recovery. + - single-master states that there is no other master active on this bus. The OS can use this information to detect a stalled bus more reliably, for example. -- cgit From 75820314de26b00aaea0d0e79269c0d17914c5c4 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:24 +0300 Subject: i2c: core: add generic I2C GPIO recovery Multiple I2C bus drivers use similar bindings to obtain information needed for I2C recovery. For example, for platforms using device-tree, the properties look something like this: &i2c { ... pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c_default>; pinctrl-1 = <&pinctrl_i2c_gpio>; sda-gpios = <&pio 0 GPIO_ACTIVE_HIGH>; scl-gpios = <&pio 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; ... } For this reason, we can add this common initialization in the core. This way, other I2C bus drivers will be able to support GPIO recovery just by providing a pointer to platform's pinctrl and calling i2c_recover_bus() when SDA is stuck low. Signed-off-by: Codrin Ciubotariu [wsa: inverted one logic for better readability, minor update to kdoc] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 126 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/i2c.h | 11 ++++ 2 files changed, 137 insertions(+) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 69217d2193da..ece25560eae4 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -181,6 +182,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->prepare_recovery) bri->prepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); /* * If we can set SDA, we will always create a STOP to ensure additional @@ -236,6 +239,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->unprepare_recovery) bri->unprepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); return ret; } @@ -251,6 +256,125 @@ int i2c_recover_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_recover_bus); +static void i2c_gpio_init_pinctrl_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct pinctrl *p = bri->pinctrl; + + /* + * we can't change states without pinctrl, so remove the states if + * populated + */ + if (!p) { + bri->pins_default = NULL; + bri->pins_gpio = NULL; + return; + } + + if (!bri->pins_default) { + bri->pins_default = pinctrl_lookup_state(p, + PINCTRL_STATE_DEFAULT); + if (IS_ERR(bri->pins_default)) { + dev_dbg(dev, PINCTRL_STATE_DEFAULT " state not found for GPIO recovery\n"); + bri->pins_default = NULL; + } + } + if (!bri->pins_gpio) { + bri->pins_gpio = pinctrl_lookup_state(p, "gpio"); + if (IS_ERR(bri->pins_gpio)) + bri->pins_gpio = pinctrl_lookup_state(p, "recovery"); + + if (IS_ERR(bri->pins_gpio)) { + dev_dbg(dev, "no gpio or recovery state found for GPIO recovery\n"); + bri->pins_gpio = NULL; + } + } + + /* for pinctrl state changes, we need all the information */ + if (bri->pins_default && bri->pins_gpio) { + dev_info(dev, "using pinctrl states for GPIO recovery"); + } else { + bri->pinctrl = NULL; + bri->pins_default = NULL; + bri->pins_gpio = NULL; + } +} + +static int i2c_gpio_init_generic_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct gpio_desc *gpiod; + int ret = 0; + + /* + * don't touch the recovery information if the driver is not using + * generic SCL recovery + */ + if (bri->recover_bus && bri->recover_bus != i2c_generic_scl_recovery) + return 0; + + /* + * pins might be taken as GPIO, so we should inform pinctrl about + * this and move the state to GPIO + */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); + + /* + * if there is incomplete or no recovery information, see if generic + * GPIO recovery is available + */ + if (!bri->scl_gpiod) { + gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) { + bri->scl_gpiod = gpiod; + bri->recover_bus = i2c_generic_scl_recovery; + dev_info(dev, "using generic GPIOs for recovery\n"); + } + } + + /* SDA GPIOD line is optional, so we care about DEFER only */ + if (!bri->sda_gpiod) { + /* + * We have SCL. Pull SCL low and wait a bit so that SDA glitches + * have no effect. + */ + gpiod_direction_output(bri->scl_gpiod, 0); + udelay(10); + gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN); + + /* Wait a bit in case of a SDA glitch, and then release SCL. */ + udelay(10); + gpiod_direction_output(bri->scl_gpiod, 1); + + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) + bri->sda_gpiod = gpiod; + } + +cleanup_pinctrl_state: + /* change the state of the pins back to their default state */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); + + return ret; +} + +static int i2c_gpio_init_recovery(struct i2c_adapter *adap) +{ + i2c_gpio_init_pinctrl_recovery(adap); + return i2c_gpio_init_generic_recovery(adap); +} + static void i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; @@ -259,6 +383,8 @@ static void i2c_init_recovery(struct i2c_adapter *adap) if (!bri) return; + i2c_gpio_init_recovery(adap); + if (!bri->recover_bus) { err_str = "no recover_bus() found"; goto err; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8ea9c3f86dba..d387d3786429 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -606,6 +606,14 @@ struct i2c_timings { * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. + * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. + * Optional. + * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned + * to the I2C bus. Optional. Populated internally for GPIO recovery, if + * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. + * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as + * GPIOs. Optional. Populated internally for GPIO recovery, if this state + * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); @@ -622,6 +630,9 @@ struct i2c_bus_recovery_info { /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); -- cgit From 23a698fe65eca15fbdf925e8d70c5bf20ebf989e Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:25 +0300 Subject: i2c: core: treat EPROBE_DEFER when acquiring SCL/SDA GPIOs Even if I2C bus GPIO recovery is optional, devm_gpiod_get() can return -EPROBE_DEFER, so we should at least treat that. This ends up with i2c_register_adapter() to be able to return -EPROBE_DEFER. Signed-off-by: Codrin Ciubotariu Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ece25560eae4..34a9609f256d 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -375,15 +375,16 @@ static int i2c_gpio_init_recovery(struct i2c_adapter *adap) return i2c_gpio_init_generic_recovery(adap); } -static void i2c_init_recovery(struct i2c_adapter *adap) +static int i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; char *err_str; if (!bri) - return; + return 0; - i2c_gpio_init_recovery(adap); + if (i2c_gpio_init_recovery(adap) == -EPROBE_DEFER) + return -EPROBE_DEFER; if (!bri->recover_bus) { err_str = "no recover_bus() found"; @@ -399,10 +400,7 @@ static void i2c_init_recovery(struct i2c_adapter *adap) if (gpiod_get_direction(bri->sda_gpiod) == 0) bri->set_sda = set_sda_gpio_value; } - return; - } - - if (bri->recover_bus == i2c_generic_scl_recovery) { + } else if (bri->recover_bus == i2c_generic_scl_recovery) { /* Generic SCL recovery */ if (!bri->set_scl || !bri->get_scl) { err_str = "no {get|set}_scl() found"; @@ -414,10 +412,12 @@ static void i2c_init_recovery(struct i2c_adapter *adap) } } - return; + return 0; err: dev_err(&adap->dev, "Not using recovery: %s\n", err_str); adap->bus_recovery_info = NULL; + + return -EINVAL; } static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) @@ -1444,12 +1444,16 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (res) goto out_reg; - dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); - pm_runtime_no_callbacks(&adap->dev); pm_suspend_ignore_children(&adap->dev, true); pm_runtime_enable(&adap->dev); + res = i2c_init_recovery(adap); + if (res == -EPROBE_DEFER) + goto out_reg; + + dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); + #ifdef CONFIG_I2C_COMPAT res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev, adap->dev.parent); @@ -1458,8 +1462,6 @@ static int i2c_register_adapter(struct i2c_adapter *adap) "Failed to create compatibility class link\n"); #endif - i2c_init_recovery(adap); - /* create pre-declared device nodes */ of_i2c_register_devices(adap); i2c_acpi_register_devices(adap); -- cgit From 543aa2c4da8b2421633ec225cd2d4110115415f2 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:26 +0300 Subject: i2c: at91: Move to generic GPIO bus recovery Make the Microchip at91 driver the first to use the generic GPIO bus recovery support from the I2C core and discard the driver implementation. Signed-off-by: Codrin Ciubotariu Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-at91-master.c | 69 ++---------------------------------- drivers/i2c/busses/i2c-at91.h | 3 -- 2 files changed, 3 insertions(+), 69 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 363d540a8345..66864f9cf7ac 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -816,79 +816,16 @@ error: return ret; } -static void at91_prepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); -} - -static void at91_unprepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); -} - static int at91_init_twi_recovery_gpio(struct platform_device *pdev, struct at91_twi_dev *dev) { struct i2c_bus_recovery_info *rinfo = &dev->rinfo; - dev->pinctrl = devm_pinctrl_get(&pdev->dev); - if (!dev->pinctrl || IS_ERR(dev->pinctrl)) { + rinfo->pinctrl = devm_pinctrl_get(&pdev->dev); + if (!rinfo->pinctrl || IS_ERR(rinfo->pinctrl)) { dev_info(dev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(dev->pinctrl); + return PTR_ERR(rinfo->pinctrl); } - - dev->pinctrl_pins_default = pinctrl_lookup_state(dev->pinctrl, - PINCTRL_STATE_DEFAULT); - dev->pinctrl_pins_gpio = pinctrl_lookup_state(dev->pinctrl, - "gpio"); - if (IS_ERR(dev->pinctrl_pins_default) || - IS_ERR(dev->pinctrl_pins_gpio)) { - dev_info(&pdev->dev, "pinctrl states incomplete for recovery\n"); - return -EINVAL; - } - - /* - * pins will be taken as GPIO, so we might as well inform pinctrl about - * this and move the state to GPIO - */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); - - rinfo->sda_gpiod = devm_gpiod_get(&pdev->dev, "sda", GPIOD_IN); - if (PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - rinfo->scl_gpiod = devm_gpiod_get(&pdev->dev, "scl", - GPIOD_OUT_HIGH_OPEN_DRAIN); - if (PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - if (IS_ERR(rinfo->sda_gpiod) || - IS_ERR(rinfo->scl_gpiod)) { - dev_info(&pdev->dev, "recovery information incomplete\n"); - if (!IS_ERR(rinfo->sda_gpiod)) { - gpiod_put(rinfo->sda_gpiod); - rinfo->sda_gpiod = NULL; - } - if (!IS_ERR(rinfo->scl_gpiod)) { - gpiod_put(rinfo->scl_gpiod); - rinfo->scl_gpiod = NULL; - } - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - return -EINVAL; - } - - /* change the state of the pins back to their default state */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - - dev_info(&pdev->dev, "using scl, sda for recovery\n"); - - rinfo->prepare_recovery = at91_prepare_twi_recovery; - rinfo->unprepare_recovery = at91_unprepare_twi_recovery; - rinfo->recover_bus = i2c_generic_scl_recovery; dev->adapter.bus_recovery_info = rinfo; return 0; diff --git a/drivers/i2c/busses/i2c-at91.h b/drivers/i2c/busses/i2c-at91.h index 7e7b4955ca7f..eae673ae786c 100644 --- a/drivers/i2c/busses/i2c-at91.h +++ b/drivers/i2c/busses/i2c-at91.h @@ -157,9 +157,6 @@ struct at91_twi_dev { struct at91_twi_dma dma; bool slave_detected; struct i2c_bus_recovery_info rinfo; - struct pinctrl *pinctrl; - struct pinctrl_state *pinctrl_pins_default; - struct pinctrl_state *pinctrl_pins_gpio; #ifdef CONFIG_I2C_AT91_SLAVE_EXPERIMENTAL unsigned smr; struct i2c_client *slave; -- cgit From cee807cf9e5beb3e7dbdd6804773f5b8749baa25 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 2 Aug 2020 10:21:22 +0200 Subject: Documentation: i2c: dev: 'block process call' is supported And it has been for a while (since 2012 at least), only it was not documented. Add it. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst index bdb247f2f11a..73ad34849f99 100644 --- a/Documentation/i2c/dev-interface.rst +++ b/Documentation/i2c/dev-interface.rst @@ -159,6 +159,8 @@ for details) through the following functions:: __s32 i2c_smbus_read_word_data(int file, __u8 command); __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value); __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value); + __s32 i2c_smbus_block_process_call(int file, __u8 command, __u8 length, + __u8 *values); __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values); __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, __u8 *values); -- cgit From db4694e66286b691bf67cb2d1280e42d3604ea79 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 17 Jul 2020 20:52:47 +0200 Subject: i2c: bcm2835: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov [wsa: shortened commit message] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index d9b86fcc3825..5dc519516292 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -392,7 +392,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = { /* * The BCM2835 was reported to have problems with clock stretching: - * http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html + * https://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html * https://www.raspberrypi.org/forums/viewtopic.php?p=146272 */ static const struct i2c_adapter_quirks bcm2835_i2c_quirks = { -- cgit From 12745b071e2b6b43e623e6cce521a1cb3c4c28dc Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 18 Jun 2020 16:42:39 +0300 Subject: i2c: i801: Add support for Intel Emmitsburg PCH Add support for SMBus controller on Intel Emmitsburg PCH. This is the same IP as used in Cannon Lake and derivatives. Signed-off-by: Mika Westerberg Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index b83da0e94184..faf32330c335 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -43,6 +43,7 @@ Supported adapters: * Intel Elkhart Lake (PCH) * Intel Tiger Lake (PCH) * Intel Jasper Lake (SOC) + * Intel Emmitsburg (PCH) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 735bf31a3fdf..085ca9b009ed 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -146,6 +146,7 @@ config I2C_I801 Elkhart Lake (PCH) Tiger Lake (PCH) Jasper Lake (SOC) + Emmitsburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 638e7f7c66cc..63b0c9d054ff 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -54,6 +54,7 @@ * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes + * Emmitsburg (PCH) 0x1bc9 32 hard yes yes yes * Broxton (SOC) 0x5ad4 32 hard yes yes yes * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes @@ -207,6 +208,7 @@ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 #define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df +#define PCI_DEVICE_ID_INTEL_EBG_SMBUS 0x1bc9 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */ @@ -1062,6 +1064,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) }, @@ -1749,6 +1752,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: + case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; -- cgit From f46efbcad97bfb2caded0397eccce7c71402868c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Jun 2020 16:42:40 +0300 Subject: i2c: i801: Add support for Intel Tiger Lake PCH-H Add SMBus PCI ID on Intel Tiger Lake PCH-H. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 63b0c9d054ff..e32ef3f01fe8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -68,6 +68,7 @@ * Comet Lake-H (PCH) 0x06a3 32 hard yes yes yes * Elkhart Lake (PCH) 0x4b23 32 hard yes yes yes * Tiger Lake-LP (PCH) 0xa0a3 32 hard yes yes yes + * Tiger Lake-H (PCH) 0x43a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes * @@ -223,6 +224,7 @@ #define PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS 0x31d4 #define PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS 0x34a3 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS 0x3b30 +#define PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS 0x43a3 #define PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS 0x4b23 #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 @@ -1077,6 +1079,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, { 0, } }; @@ -1751,6 +1754,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; -- cgit From a19b4785d9eec9b7915a3944f279d73db80ee8f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Aug 2020 18:32:28 -0400 Subject: NFS: Report the stateid + status in trace_nfs4_layoutreturn_on_close() Ensure we correctly report the stateid and status in the layoutreturn on close tracepoint. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 2 +- fs/nfs/pnfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 543541173a3d..07ea8d847710 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1993,7 +1993,7 @@ TRACE_EVENT(nfs4_layoutget, DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn); -DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn_on_close); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index dd2e14f5875d..d8cdb94c6668 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1549,12 +1549,12 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, default: arg_stateid = &args->stateid; } + trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, res_stateid); if (ld_private && ld_private->ops && ld_private->ops->free) ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); - trace_nfs4_layoutreturn_on_close(args->inode, 0); } bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) -- cgit From 638037b158df00a860251772239778a25a692b8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Aug 2020 11:02:44 -0400 Subject: NFS: Add tracepoints for layouterror and layoutstats. Allow tracing of the NFSv4.2 layouterror and layoutstats operations. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 10 ++++++++-- fs/nfs/nfs4trace.h | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index e200522469af..142225f0af59 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -17,6 +17,7 @@ #include "nfs4session.h" #include "internal.h" #include "delegation.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); @@ -714,7 +715,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: - break; + return; case -NFS4ERR_BADHANDLE: case -ESTALE: pnfs_destroy_layout(NFS_I(inode)); @@ -760,6 +761,8 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) case -EOPNOTSUPP: NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; } + + trace_nfs4_layoutstats(inode, &data->args.stateid, task->tk_status); } static void @@ -882,7 +885,7 @@ nfs42_layouterror_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: - break; + return; case -NFS4ERR_BADHANDLE: case -ESTALE: pnfs_destroy_layout(NFS_I(inode)); @@ -926,6 +929,9 @@ nfs42_layouterror_done(struct rpc_task *task, void *calldata) case -EOPNOTSUPP: NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR; } + + trace_nfs4_layouterror(inode, &data->args.errors[0].stateid, + task->tk_status); } static void diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 07ea8d847710..61c33536dcf3 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1994,6 +1994,8 @@ TRACE_EVENT(nfs4_layoutget, DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn_on_close); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layouterror); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutstats); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS); -- cgit From 2dd2111d0d383df104b144e0d1f6b5a00cb7cd88 Mon Sep 17 00:00:00 2001 From: Guoyu Huang Date: Wed, 5 Aug 2020 03:53:50 -0700 Subject: io_uring: Fix NULL pointer dereference in loop_rw_iter() loop_rw_iter() does not check whether the file has a read or write function. This can lead to NULL pointer dereference when the user passes in a file descriptor that does not have read or write function. The crash log looks like this: [ 99.834071] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 99.835364] #PF: supervisor instruction fetch in kernel mode [ 99.836522] #PF: error_code(0x0010) - not-present page [ 99.837771] PGD 8000000079d62067 P4D 8000000079d62067 PUD 79d8c067 PMD 0 [ 99.839649] Oops: 0010 [#2] SMP PTI [ 99.840591] CPU: 1 PID: 333 Comm: io_wqe_worker-0 Tainted: G D 5.8.0 #2 [ 99.842622] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 [ 99.845140] RIP: 0010:0x0 [ 99.845840] Code: Bad RIP value. [ 99.846672] RSP: 0018:ffffa1c7c01ebc08 EFLAGS: 00010202 [ 99.848018] RAX: 0000000000000000 RBX: ffff92363bd67300 RCX: ffff92363d461208 [ 99.849854] RDX: 0000000000000010 RSI: 00007ffdbf696bb0 RDI: ffff92363bd67300 [ 99.851743] RBP: ffffa1c7c01ebc40 R08: 0000000000000000 R09: 0000000000000000 [ 99.853394] R10: ffffffff9ec692a0 R11: 0000000000000000 R12: 0000000000000010 [ 99.855148] R13: 0000000000000000 R14: ffff92363d461208 R15: ffffa1c7c01ebc68 [ 99.856914] FS: 0000000000000000(0000) GS:ffff92363dd00000(0000) knlGS:0000000000000000 [ 99.858651] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 99.860032] CR2: ffffffffffffffd6 CR3: 000000007ac66000 CR4: 00000000000006e0 [ 99.861979] Call Trace: [ 99.862617] loop_rw_iter.part.0+0xad/0x110 [ 99.863838] io_write+0x2ae/0x380 [ 99.864644] ? kvm_sched_clock_read+0x11/0x20 [ 99.865595] ? sched_clock+0x9/0x10 [ 99.866453] ? sched_clock_cpu+0x11/0xb0 [ 99.867326] ? newidle_balance+0x1d4/0x3c0 [ 99.868283] io_issue_sqe+0xd8f/0x1340 [ 99.869216] ? __switch_to+0x7f/0x450 [ 99.870280] ? __switch_to_asm+0x42/0x70 [ 99.871254] ? __switch_to_asm+0x36/0x70 [ 99.872133] ? lock_timer_base+0x72/0xa0 [ 99.873155] ? switch_mm_irqs_off+0x1bf/0x420 [ 99.874152] io_wq_submit_work+0x64/0x180 [ 99.875192] ? kthread_use_mm+0x71/0x100 [ 99.876132] io_worker_handle_work+0x267/0x440 [ 99.877233] io_wqe_worker+0x297/0x350 [ 99.878145] kthread+0x112/0x150 [ 99.878849] ? __io_worker_unuse+0x100/0x100 [ 99.879935] ? kthread_park+0x90/0x90 [ 99.880874] ret_from_fork+0x22/0x30 [ 99.881679] Modules linked in: [ 99.882493] CR2: 0000000000000000 [ 99.883324] ---[ end trace 4453745f4673190b ]--- [ 99.884289] RIP: 0010:0x0 [ 99.884837] Code: Bad RIP value. [ 99.885492] RSP: 0018:ffffa1c7c01ebc08 EFLAGS: 00010202 [ 99.886851] RAX: 0000000000000000 RBX: ffff92363acd7f00 RCX: ffff92363d461608 [ 99.888561] RDX: 0000000000000010 RSI: 00007ffe040d9e10 RDI: ffff92363acd7f00 [ 99.890203] RBP: ffffa1c7c01ebc40 R08: 0000000000000000 R09: 0000000000000000 [ 99.891907] R10: ffffffff9ec692a0 R11: 0000000000000000 R12: 0000000000000010 [ 99.894106] R13: 0000000000000000 R14: ffff92363d461608 R15: ffffa1c7c01ebc68 [ 99.896079] FS: 0000000000000000(0000) GS:ffff92363dd00000(0000) knlGS:0000000000000000 [ 99.898017] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 99.899197] CR2: ffffffffffffffd6 CR3: 000000007ac66000 CR4: 00000000000006e0 Fixes: 32960613b7c3 ("io_uring: correctly handle non ->{read,write}_iter() file_operations") Cc: stable@vger.kernel.org Signed-off-by: Guoyu Huang Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5e1c08e22990..8f96566603f3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3066,7 +3066,10 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) { if (req->file->f_op->read_iter) return call_read_iter(req->file, &req->rw.kiocb, iter); - return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); + else if (req->file->f_op->read) + return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); + else + return -EINVAL; } static int io_read(struct io_kiocb *req, bool force_nonblock, @@ -3203,8 +3206,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, if (req->file->f_op->write_iter) ret2 = call_write_iter(req->file, kiocb, &iter); - else + else if (req->file->f_op->write) ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); + else + ret2 = -EINVAL; /* * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just -- cgit From 02e37571f9e79022498fd0525c073b07e9d9ac69 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 4 Aug 2020 12:31:56 -0400 Subject: ceph: handle zero-length feature mask in session messages Most session messages contain a feature mask, but the MDS will routinely send a REJECT message with one that is zero-length. Commit 0fa8263367db ("ceph: fix endianness bug when handling MDS session feature bits") fixed the decoding of the feature mask, but failed to account for the MDS sending a zero-length feature mask. This causes REJECT message decoding to fail. Skip trying to decode a feature mask if the word count is zero. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/46823 Fixes: 0fa8263367db ("ceph: fix endianness bug when handling MDS session feature bits") Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Tested-by: Patrick Donnelly Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1095802ad9bd..4a26862d7667 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3358,8 +3358,10 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_64_safe(&p, end, features, bad); - p += len - sizeof(features); + if (len) { + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); + } } mutex_lock(&mdsc->mutex); -- cgit From b63da6c8dfa9b2ab3554e8c59ef294d1f28bb9bd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Aug 2020 08:49:58 -0700 Subject: xfs: delete duplicated words + other fixes Delete repeated words in fs/xfs/. {we, that, the, a, to, fork} Change "it it" to "it is" in one location. Signed-off-by: Randy Dunlap To: linux-fsdevel@vger.kernel.org Cc: Darrick J. Wong Cc: linux-xfs@vger.kernel.org Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_sb.c | 2 +- fs/xfs/xfs_attr_list.c | 2 +- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_buf_item_recover.c | 2 +- fs/xfs/xfs_dquot.c | 2 +- fs/xfs/xfs_export.c | 2 +- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode_item.c | 4 ++-- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_refcount_item.c | 2 +- fs/xfs/xfs_reflink.c | 2 +- fs/xfs/xfs_trans_ail.c | 4 ++-- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4df87546bd40..ae9aaf1f34bf 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -600,7 +600,7 @@ xfs_sb_quota_to_disk( * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it - * it beyond the "end" of the valid non-pquotino superblock. + * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index e380bd1a9bfc..50f922cad91a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -44,7 +44,7 @@ xfs_attr_shortform_compare(const void *a, const void *b) /* * Copy out entries of shortform attribute lists for attr_list(). * Shortform attribute lists are not stored in hashval sorted order. - * If the output buffer is not large enough to hold them all, then we + * If the output buffer is not large enough to hold them all, then * we have to calculate each entries' hashvalue and sort them before * we can begin returning them to the user. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 5bb6f22cc11a..408d1b572d3f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -127,7 +127,7 @@ xfs_buf_item_size_segment( * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged * in a single iovec. * - * Discontiguous buffers need a format structure per region that that is being + * Discontiguous buffers need a format structure per region that is being * logged. This makes the changes in the buffer appear to log recovery as though * they came from separate buffers, just like would occur if multiple buffers * were used instead of a single discontiguous buffer. This enables diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d480f11e6b00..8f0457d67d77 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -948,7 +948,7 @@ xlog_recover_buf_commit_pass2( * or inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel - * running with a different inode cluster size. Regardless, if the + * running with a different inode cluster size. Regardless, if * the inode buffer size isn't max(blocksize, inode_cluster_size) * for *our* value of inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 04dc2be19c3a..bcd73b9c2994 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -807,7 +807,7 @@ xfs_qm_dqget_checks( } /* - * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked + * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked * dquot, doing an allocation (if requested) as needed. */ int diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 5a4b0119143a..465fd9e048d4 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -56,7 +56,7 @@ xfs_fs_encode_fh( fileid_type = FILEID_INO32_GEN_PARENT; /* - * If the the filesystem may contain 64bit inode numbers, we need + * If the filesystem may contain 64bit inode numbers, we need * to use larger file handles that can represent them. * * While we only allocate inodes that do not fit into 32 bits any diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 407d6299606d..c06129cffba9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -451,7 +451,7 @@ xfs_lock_inodes( /* * Currently supports between 2 and 5 inodes with exclusive locking. We * support an arbitrary depth of locking here, but absolute limits on - * inodes depend on the the type of locking and the limits placed by + * inodes depend on the type of locking and the limits placed by * lockdep annotations in xfs_lock_inumorder. These are all checked by * the asserts. */ @@ -3105,7 +3105,7 @@ out_trans_abort: /* * xfs_rename_alloc_whiteout() * - * Return a referenced, unlinked, unlocked inode that that can be used as a + * Return a referenced, unlinked, unlocked inode that can be used as a * whiteout in a rename transaction. We use a tmpfile inode here so that if we * crash between allocating the inode and linking it into the rename transaction * recovery will free the inode and we won't leak it. diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 895f61b2b4f0..6c65938cee1c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -191,7 +191,7 @@ xfs_inode_item_format_data_fork( ip->i_df.if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); @@ -275,7 +275,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0e3f62cde375..3abb8b9d6f4c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -865,7 +865,7 @@ xfs_buffered_write_iomap_begin( } /* - * Search the data fork fork first to look up our source mapping. We + * Search the data fork first to look up our source mapping. We * always need the data fork map, as we have to return it to the * iomap code so that the higher level write code can read data in to * perform read-modify-write cycles for unaligned writes. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 56c32eecffea..b0ef071b3cb5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -239,7 +239,7 @@ xfs_cil_prepare_item( * this CIL context and so we need to pin it. If we are replacing the * old_lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the - * shadow buffer, so update the the pointer to it appropriately. + * shadow buffer, so update the pointer to it appropriately. */ if (!old_lv) { if (lv->lv_item->li_ops->iop_pin) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 52a65a74208f..e2ec91b2d0f4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1100,7 +1100,7 @@ xlog_verify_head( * * Note that xlog_find_tail() clears the blocks at the new head * (i.e., the records with invalid CRC) if the cycle number - * matches the the current cycle. + * matches the current cycle. */ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, buffer, rhead_blk, rhead, wrapped); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7b2c72bc2858..ca93b6488377 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -485,7 +485,7 @@ xfs_cui_item_recover( * transaction. Normally, any work that needs to be deferred * gets attached to the same defer_ops that scheduled the * refcount update. However, we're in log recovery here, so we - * we use the passed in defer_ops and to finish up any work that + * use the passed in defer_ops and to finish up any work that * doesn't fit. We need to reserve enough blocks to handle a * full btree split on either end of the refcount range. */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index aac83f9d6107..16098dc42add 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -721,7 +721,7 @@ xfs_reflink_end_cow( * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * - * If we're being called by writeback then the the pages will still + * If we're being called by writeback then the pages will still * have PageWriteback set, which prevents races with reflink remapping * and truncate. Reflink remapping prevents races with writeback by * taking the iolock and mmaplock before flushing the pages and diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0c783d339675..dbb69b4bf3ed 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -480,7 +480,7 @@ xfsaild_push( * inode buffer is locked because we already pushed the * updates to it as part of inode clustering. * - * We do not want to to stop flushing just because lots + * We do not want to stop flushing just because lots * of items are already being flushed, but we need to * re-try the flushing relatively soon if most of the * AIL is being flushed. @@ -515,7 +515,7 @@ xfsaild_push( /* * Are there too many items we can't do anything with? * - * If we we are skipping too many items because we can't flush + * If we are skipping too many items because we can't flush * them or they are already being flushed, we back off and * given them time to complete whatever operation is being * done. i.e. remove pressure from the AIL while we can't make -- cgit From 923a3a863ae0c26876d704fb3453069e11ebdcb6 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Mon, 4 May 2020 17:14:24 +0300 Subject: platform_data/mlxreg: support new watchdog type with longer timeout period Add new watchdog type 3 with longer timeout period. Extend size of health_cntr field that that can be used to init watchdog timeout period. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200504141427.17685-2-michaelsh@mellanox.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/platform_data/mlxreg.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index b8da8aef2446..2c5e58d1d77b 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -43,10 +43,13 @@ * * TYPE1 HW watchdog implementation exist in old systems. * All new systems have TYPE2 HW watchdog. + * TYPE3 HW watchdog can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, MLX_WDT_TYPE2, + MLX_WDT_TYPE3, }; /** @@ -90,7 +93,7 @@ struct mlxreg_core_data { umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; - u8 health_cntr; + u32 health_cntr; bool attached; }; -- cgit From 7772b993fd96dc8f776356b1d5e18a4df7e68268 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Mon, 4 May 2020 17:14:25 +0300 Subject: platform/x86: mlx-platform: support new watchdog type with longer timeout Add verification of WD capability in order to distinguish between the existing WD types and new type, implemented in CPLD. Add configuration for a new WD type. Change access mode for watchdog registers. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200504141427.17685-3-michaelsh@mellanox.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/platform/x86/mlx-platform.c | 106 ++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index c27548fd386a..9d3371cd58d5 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -178,7 +178,9 @@ #define MLXPLAT_CPLD_WD_RESET_ACT_MASK GENMASK(7, 1) #define MLXPLAT_CPLD_WD_FAN_ACT_MASK (GENMASK(7, 0) & ~BIT(4)) #define MLXPLAT_CPLD_WD_COUNT_ACT_MASK (GENMASK(7, 0) & ~BIT(7)) +#define MLXPLAT_CPLD_WD_CPBLTY_MASK (GENMASK(7, 0) & ~BIT(6)) #define MLXPLAT_CPLD_WD_DFLT_TIMEOUT 30 +#define MLXPLAT_CPLD_WD3_DFLT_TIMEOUT 600 #define MLXPLAT_CPLD_WD_MAX_DEVS 2 /* mlxplat_priv - platform private data @@ -1959,6 +1961,84 @@ static struct mlxreg_core_platform_data mlxplat_mlxcpld_wd_set_type2[] = { }, }; +/* Watchdog type3: hardware implementation version 3 + * Can be on all systems. It's differentiated by WD capability bit. + * Old systems (MSN2700, MSN2410, MSN2740, MSN2100 and MSN2140) + * still have only one main watchdog. + */ +static struct mlxreg_core_data mlxplat_mlxcpld_wd_main_regs_type3[] = { + { + .label = "action", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_RESET_ACT_MASK, + .bit = 0, + }, + { + .label = "timeout", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + .health_cntr = MLXPLAT_CPLD_WD3_DFLT_TIMEOUT, + }, + { + .label = "timeleft", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + }, + { + .label = "ping", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_RESET_ACT_MASK, + .bit = 0, + }, + { + .label = "reset", + .reg = MLXPLAT_CPLD_LPC_REG_RESET_CAUSE_OFFSET, + .mask = GENMASK(7, 0) & ~BIT(6), + .bit = 6, + }, +}; + +static struct mlxreg_core_data mlxplat_mlxcpld_wd_aux_regs_type3[] = { + { + .label = "action", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_FAN_ACT_MASK, + .bit = 4, + }, + { + .label = "timeout", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + .health_cntr = MLXPLAT_CPLD_WD3_DFLT_TIMEOUT, + }, + { + .label = "timeleft", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + }, + { + .label = "ping", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_FAN_ACT_MASK, + .bit = 4, + }, +}; + +static struct mlxreg_core_platform_data mlxplat_mlxcpld_wd_set_type3[] = { + { + .data = mlxplat_mlxcpld_wd_main_regs_type3, + .counter = ARRAY_SIZE(mlxplat_mlxcpld_wd_main_regs_type3), + .version = MLX_WDT_TYPE3, + .identity = "mlx-wdt-main", + }, + { + .data = mlxplat_mlxcpld_wd_aux_regs_type3, + .counter = ARRAY_SIZE(mlxplat_mlxcpld_wd_aux_regs_type3), + .version = MLX_WDT_TYPE3, + .identity = "mlx-wdt-aux", + }, +}; + static bool mlxplat_mlxcpld_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { @@ -1989,8 +2069,10 @@ static bool mlxplat_mlxcpld_writeable_reg(struct device *dev, unsigned int reg) case MLXPLAT_CPLD_LPC_REG_WD1_TMR_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD1_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET: + case MLXPLAT_CPLD_LPC_REG_WD2_TLEFT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET: + case MLXPLAT_CPLD_LPC_REG_WD3_TLEFT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_PWM1_OFFSET: case MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET: @@ -2601,6 +2683,27 @@ static int mlxplat_mlxcpld_verify_bus_topology(int *nr) return 0; } +static int mlxplat_mlxcpld_check_wd_capability(void *regmap) +{ + u32 regval; + int i, rc; + + rc = regmap_read(regmap, MLXPLAT_CPLD_LPC_REG_PSU_I2C_CAP_OFFSET, + ®val); + if (rc) + return rc; + + if (!(regval & ~MLXPLAT_CPLD_WD_CPBLTY_MASK)) { + for (i = 0; i < ARRAY_SIZE(mlxplat_mlxcpld_wd_set_type3); i++) { + if (mlxplat_wd_data[i]) + mlxplat_wd_data[i] = + &mlxplat_mlxcpld_wd_set_type3[i]; + } + } + + return 0; +} + static int __init mlxplat_init(void) { struct mlxplat_priv *priv; @@ -2733,6 +2836,9 @@ static int __init mlxplat_init(void) } /* Add WD drivers. */ + err = mlxplat_mlxcpld_check_wd_capability(priv->regmap); + if (err) + goto fail_platform_wd_register; for (j = 0; j < MLXPLAT_CPLD_WD_MAX_DEVS; j++) { if (mlxplat_wd_data[j]) { mlxplat_wd_data[j]->regmap = priv->regmap; -- cgit From eee851143bca4422eeee3bb2e104b85537ba449d Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Mon, 4 May 2020 17:14:26 +0300 Subject: watchdog: mlx-wdt: support new watchdog type with longer timeout period New programmable logic device can have watchdog type 3 implementation. It's same as Type 2 with extended maximum timeout period. Maximum timeout is up-to 65535 sec. Type 3 HW watchdog implementation can exist on all Mellanox systems. It is differentiated by WD capability bit. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20200504141427.17685-4-michaelsh@mellanox.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mlx_wdt.c | 73 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c index 03b9ac4b99af..54193369e85c 100644 --- a/drivers/watchdog/mlx_wdt.c +++ b/drivers/watchdog/mlx_wdt.c @@ -21,6 +21,7 @@ #define MLXREG_WDT_CLOCK_SCALE 1000 #define MLXREG_WDT_MAX_TIMEOUT_TYPE1 32 #define MLXREG_WDT_MAX_TIMEOUT_TYPE2 255 +#define MLXREG_WDT_MAX_TIMEOUT_TYPE3 65535 #define MLXREG_WDT_MIN_TIMEOUT 1 #define MLXREG_WDT_OPTIONS_BASE (WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | \ WDIOF_SETTIMEOUT) @@ -49,6 +50,7 @@ struct mlxreg_wdt { int tleft_idx; int ping_idx; int reset_idx; + int regmap_val_sz; enum mlxreg_wdt_type wdt_type; }; @@ -111,7 +113,8 @@ static int mlxreg_wdt_set_timeout(struct watchdog_device *wdd, u32 regval, set_time, hw_timeout; int rc; - if (wdt->wdt_type == MLX_WDT_TYPE1) { + switch (wdt->wdt_type) { + case MLX_WDT_TYPE1: rc = regmap_read(wdt->regmap, reg_data->reg, ®val); if (rc) return rc; @@ -120,14 +123,32 @@ static int mlxreg_wdt_set_timeout(struct watchdog_device *wdd, regval = (regval & reg_data->mask) | hw_timeout; /* Rowndown to actual closest number of sec. */ set_time = BIT(hw_timeout) / MLXREG_WDT_CLOCK_SCALE; - } else { + rc = regmap_write(wdt->regmap, reg_data->reg, regval); + break; + case MLX_WDT_TYPE2: + set_time = timeout; + rc = regmap_write(wdt->regmap, reg_data->reg, timeout); + break; + case MLX_WDT_TYPE3: + /* WD_TYPE3 has 2B set time register */ set_time = timeout; - regval = timeout; + if (wdt->regmap_val_sz == 1) { + regval = timeout & 0xff; + rc = regmap_write(wdt->regmap, reg_data->reg, regval); + if (!rc) { + regval = (timeout & 0xff00) >> 8; + rc = regmap_write(wdt->regmap, + reg_data->reg + 1, regval); + } + } else { + rc = regmap_write(wdt->regmap, reg_data->reg, timeout); + } + break; + default: + return -EINVAL; } wdd->timeout = set_time; - rc = regmap_write(wdt->regmap, reg_data->reg, regval); - if (!rc) { /* * Restart watchdog with new timeout period @@ -147,10 +168,25 @@ static unsigned int mlxreg_wdt_get_timeleft(struct watchdog_device *wdd) { struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->tleft_idx]; - u32 regval; + u32 regval, msb, lsb; int rc; - rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + if (wdt->wdt_type == MLX_WDT_TYPE2) { + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + } else { + /* WD_TYPE3 has 2 byte timeleft register */ + if (wdt->regmap_val_sz == 1) { + rc = regmap_read(wdt->regmap, reg_data->reg, &lsb); + if (!rc) { + rc = regmap_read(wdt->regmap, + reg_data->reg + 1, &msb); + regval = (msb & 0xff) << 8 | (lsb & 0xff); + } + } else { + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + } + } + /* Return 0 timeleft in case of failure register read. */ return rc == 0 ? regval : 0; } @@ -212,13 +248,23 @@ static void mlxreg_wdt_config(struct mlxreg_wdt *wdt, wdt->wdd.info = &mlxreg_wdt_aux_info; wdt->wdt_type = pdata->version; - if (wdt->wdt_type == MLX_WDT_TYPE2) { - wdt->wdd.ops = &mlxreg_wdt_ops_type2; - wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE2; - } else { + switch (wdt->wdt_type) { + case MLX_WDT_TYPE1: wdt->wdd.ops = &mlxreg_wdt_ops_type1; wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE1; + break; + case MLX_WDT_TYPE2: + wdt->wdd.ops = &mlxreg_wdt_ops_type2; + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE2; + break; + case MLX_WDT_TYPE3: + wdt->wdd.ops = &mlxreg_wdt_ops_type2; + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE3; + break; + default: + break; } + wdt->wdd.min_timeout = MLXREG_WDT_MIN_TIMEOUT; } @@ -249,6 +295,11 @@ static int mlxreg_wdt_probe(struct platform_device *pdev) wdt->wdd.parent = dev; wdt->regmap = pdata->regmap; + rc = regmap_get_val_bytes(wdt->regmap); + if (rc < 0) + return -EINVAL; + + wdt->regmap_val_sz = rc; mlxreg_wdt_config(wdt, pdata); if ((pdata->features & MLXREG_CORE_WD_FEATURE_NOWAYOUT)) -- cgit From d6e6d5627f0aaa16d6b6e94238d62a594a35b5ce Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Mon, 4 May 2020 17:14:27 +0300 Subject: docs: watchdog: mlx-wdt: Add description of new watchdog type 3 Add documentation with details of new type of Mellanox watchdog driver. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20200504141427.17685-5-michaelsh@mellanox.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/watchdog/mlx-wdt.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/watchdog/mlx-wdt.rst b/Documentation/watchdog/mlx-wdt.rst index bf5bafac47f0..35e690dea9db 100644 --- a/Documentation/watchdog/mlx-wdt.rst +++ b/Documentation/watchdog/mlx-wdt.rst @@ -24,10 +24,19 @@ Type 2: Maximum timeout is 255 sec. Get time-left is supported. +Type 3: + Same as Type 2 with extended maximum timeout period. + Maximum timeout is 65535 sec. + Type 1 HW watchdog implementation exist in old systems and all new systems have type 2 HW watchdog. Two types of HW implementation have also different register map. +Type 3 HW watchdog implementation can exist on all Mellanox systems +with new programmer logic device. +It's differentiated by WD capability bit. +Old systems still have only one main watchdog. + Mellanox system can have 2 watchdogs: main and auxiliary. Main and auxiliary watchdog devices can be enabled together on the same system. @@ -54,3 +63,4 @@ The driver checks during initialization if the previous system reset was done by the watchdog. If yes, it makes a notification about this event. Access to HW registers is performed through a generic regmap interface. +Programmable logic device registers have little-endian order. -- cgit From 0be01476dd527dfa00976911208332c706a1b752 Mon Sep 17 00:00:00 2001 From: Bumsik Kim Date: Fri, 29 May 2020 10:24:28 +0900 Subject: watchdog: test_bit() => watchdog_active() Use the dedicated function watchdog_active() instead of the generic test_bit() function. It is done using the following Coccinelle script: @@ identifier wdd; @@ - test_bit(WDOG_ACTIVE, &wdd->status) + watchdog_active(wdd) Signed-off-by: Bumsik Kim Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200529012428.84684-1-k.bumsik@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 7e4cd34a8c20..3ae608d78af2 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -916,7 +916,7 @@ static int watchdog_release(struct inode *inode, struct file *file) * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then * watchdog_stop will fail. */ - if (!test_bit(WDOG_ACTIVE, &wdd->status)) + if (!watchdog_active(wdd)) err = 0; else if (test_and_clear_bit(_WDOG_ALLOW_RELEASE, &wd_data->status) || !(wdd->info->options & WDIOF_MAGICCLOSE)) -- cgit From ff01cb1ca1f9e02e54e562bb97999e4bf1726a9e Mon Sep 17 00:00:00 2001 From: Martin Wu Date: Fri, 29 May 2020 17:45:14 +0800 Subject: watchdog: sunxi_wdt: fix improper error exit code sunxi_wdt_probe() should return -ENOMEM when devm_kzalloc() fails. Signed-off-by: Martin Wu Signed-off-by: Frank Lee Acked-by: Maxime Ripard Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200529094514.26374-1-frank@allwinnertech.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sunxi_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index 5f05a45ac187..b50757882a98 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -235,7 +235,7 @@ static int sunxi_wdt_probe(struct platform_device *pdev) sunxi_wdt = devm_kzalloc(dev, sizeof(*sunxi_wdt), GFP_KERNEL); if (!sunxi_wdt) - return -EINVAL; + return -ENOMEM; sunxi_wdt->wdt_regs = of_device_get_match_data(dev); if (!sunxi_wdt->wdt_regs) -- cgit From fd998972458f06d3fadbf4e55a3725c4edc17c12 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 25 May 2020 15:31:04 +0800 Subject: watchdog: bcm_kona_wdt: Use correct return value for bcm_kona_wdt_probe() When call function devm_platform_ioremap_resource(), we should use IS_ERR() to check the return value and return PTR_ERR() if failed. Signed-off-by: Tiezhu Yang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1590391864-308-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm_kona_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index eb850a8d19df..8237c4e9c2a0 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -279,7 +279,7 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) wdt->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(wdt->base)) - return -ENODEV; + return PTR_ERR(wdt->base); wdt->resolution = SECWDOG_DEFAULT_RESOLUTION; ret = bcm_kona_wdt_set_resolution_reg(wdt); -- cgit From 893c3d82b425d8d448da030c80420072ff4edbf1 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Fri, 12 Jun 2020 15:03:04 +0800 Subject: watchdog: Use kobj_to_dev() API Use kobj_to_dev() API instead of container_of(). Signed-off-by: Wang Qing Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1591945384-14587-1-git-send-email-wangqing@vivo.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 3ae608d78af2..de02d90a4878 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -587,7 +587,7 @@ static DEVICE_ATTR_RW(pretimeout_governor); static umode_t wdt_is_visible(struct kobject *kobj, struct attribute *attr, int n) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct watchdog_device *wdd = dev_get_drvdata(dev); umode_t mode = attr->mode; -- cgit From 04daa8c049bc0cd4747d671d8b855f3d0fa39997 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 2 Jun 2020 07:21:04 +0200 Subject: MAINTAINERS: rectify entry in ARM SMC WATCHDOG DRIVER Commit 5c24a28b4eb8 ("dt-bindings: watchdog: Add ARM smc wdt for mt8173 watchdog") added the new ARM SMC WATCHDOG DRIVER entry in MAINTAINERS, but slipped in a minor mistake. Luckily, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: devicetree/bindings/watchdog/arm-smc-wdt.yaml Update file entry to intended file location. Signed-off-by: Lukas Bulwahn Reviewed-by: Guenter Roeck Reviewed-by: Julius Werner Reviewed-by: Evan Benn Link: https://lore.kernel.org/r/20200602052104.7795-1-lukas.bulwahn@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f0569cf304ca..284c14fe080c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1489,7 +1489,7 @@ ARM SMC WATCHDOG DRIVER M: Julius Werner R: Evan Benn S: Maintained -F: devicetree/bindings/watchdog/arm-smc-wdt.yaml +F: Documentation/devicetree/bindings/watchdog/arm-smc-wdt.yaml F: drivers/watchdog/arm_smc_wdt.c ARM SMMU DRIVERS -- cgit From 9807a8884143148f2ee5a882d6273806403e9345 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:51 +0300 Subject: dt-bindings: watchdog: Convert DW WDT binding to DT schema Modern device tree bindings are supposed to be created as YAML-files in accordance with dt-schema. This commit replaces the DW Watchdog legacy bare text bindings with YAML file. As before the binding states that the corresponding dts node is supposed to have a registers range, a watchdog timer references clock source, optional reset line and pre-timeout interrupt. Signed-off-by: Serge Semin Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: linux-mips@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-2-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/dw_wdt.txt | 24 ----------- .../devicetree/bindings/watchdog/snps,dw-wdt.yaml | 50 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/watchdog/dw_wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt deleted file mode 100644 index eb0914420c7c..000000000000 --- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt +++ /dev/null @@ -1,24 +0,0 @@ -Synopsys Designware Watchdog Timer - -Required Properties: - -- compatible : Should contain "snps,dw-wdt" -- reg : Base address and size of the watchdog timer registers. -- clocks : phandle + clock-specifier for the clock that drives the - watchdog timer. - -Optional Properties: - -- interrupts : The interrupt used for the watchdog timeout warning. -- resets : phandle pointing to the system reset controller with - line index for the watchdog. - -Example: - - watchdog0: wd@ffd02000 { - compatible = "snps,dw-wdt"; - reg = <0xffd02000 0x1000>; - interrupts = <0 171 4>; - clocks = <&per_base_clk>; - resets = <&rst WDT0_RESET>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml new file mode 100644 index 000000000000..4f6944756ab4 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/snps,dw-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys Designware Watchdog Timer + +allOf: + - $ref: "watchdog.yaml#" + +maintainers: + - Jamie Iles + +properties: + compatible: + const: snps,dw-wdt + + reg: + maxItems: 1 + + interrupts: + description: DW Watchdog pre-timeout interrupt + maxItems: 1 + + clocks: + items: + - description: Watchdog timer reference clock + + resets: + description: Phandle to the DW Watchdog reset lane + maxItems: 1 + +unevaluatedProperties: false + +required: + - compatible + - reg + - clocks + +examples: + - | + watchdog@ffd02000 { + compatible = "snps,dw-wdt"; + reg = <0xffd02000 0x1000>; + interrupts = <0 171 4>; + clocks = <&per_base_clk>; + resets = <&wdt_rst>; + }; +... -- cgit From 5b4f68f808ff96a0a48456ff8da250a2caf3f545 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:52 +0300 Subject: dt-bindings: watchdog: dw-wdt: Support devices with asynch clocks DW Watchdog IP core can be synthesised with asynchronous timer/APB clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case separate clock signals are supposed to be used to feed watchdog timer and APB interface of the device. Let's update the DW Watchdog DT node schema so it would support the optional APB3 bus clock specified along with the mandatory watchdog timer reference clock. Signed-off-by: Serge Semin Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: linux-mips@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-3-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml index 4f6944756ab4..5bf6dc6377f3 100644 --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -24,8 +24,16 @@ properties: maxItems: 1 clocks: + minItems: 1 items: - description: Watchdog timer reference clock + - description: APB3 interface clock + + clock-names: + minItems: 1 + items: + - const: tclk + - const: pclk resets: description: Phandle to the DW Watchdog reset lane -- cgit From 4ce4e7fdc3c924ea3e6d5d68178127b46568d8df Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:53 +0300 Subject: dt-bindings: watchdog: dw-wdt: Add watchdog TOPs array property In case if DW Watchdog IP core is built with WDT_USE_FIX_TOP == false, a custom timeout periods are used to preset the timer counter. In this case that periods should be specified in a new "snps,watchdog-tops" property of the DW watchdog dts node. Signed-off-by: Serge Semin Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: linux-mips@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-4-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/snps,dw-wdt.yaml | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml index 5bf6dc6377f3..d9fc7bb851b1 100644 --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -39,6 +39,23 @@ properties: description: Phandle to the DW Watchdog reset lane maxItems: 1 + snps,watchdog-tops: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: | + DW APB Watchdog custom timer intervals - Timeout Period ranges (TOPs). + Each TOP is a number loaded into the watchdog counter at the moment of + the timer restart. The counter decrementing happens each tick of the + reference clock. Therefore the TOPs array is equivalent to an array of + the timer expiration intervals supported by the DW APB Watchdog. Note + DW APB Watchdog IP-core might be synthesized with fixed TOP values, + in which case this property is unnecessary with default TOPs utilized. + default: [0x0001000 0x0002000 0x0004000 0x0008000 + 0x0010000 0x0020000 0x0040000 0x0080000 + 0x0100000 0x0200000 0x0400000 0x0800000 + 0x1000000 0x2000000 0x4000000 0x8000000] + minItems: 16 + maxItems: 16 + unevaluatedProperties: false required: @@ -55,4 +72,19 @@ examples: clocks = <&per_base_clk>; resets = <&wdt_rst>; }; + + - | + watchdog@ffd02000 { + compatible = "snps,dw-wdt"; + reg = <0xffd02000 0x1000>; + interrupts = <0 171 4>; + clocks = <&per_base_clk>; + clock-names = "tclk"; + snps,watchdog-tops = <0x000000FF 0x000001FF 0x000003FF + 0x000007FF 0x0000FFFF 0x0001FFFF + 0x0003FFFF 0x0007FFFF 0x000FFFFF + 0x001FFFFF 0x003FFFFF 0x007FFFFF + 0x00FFFFFF 0x01FFFFFF 0x03FFFFFF + 0x07FFFFFF>; + }; ... -- cgit From 86445535887e580036d0094a849126703a33fd62 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:54 +0300 Subject: watchdog: dw_wdt: Support devices with non-fixed TOP values In case if the DW Watchdog IP core is synthesised with WDT_USE_FIX_TOP == false, the TOP interval indexes make the device to load a custom periods to the counter. These periods are hardwired at the IP synthesis stage and can be within [2^8, 2^(WDT_CNT_WIDTH - 1)]. Alas their values can't be detected at runtime and must be somehow supplied to the driver so one could properly determine the watchdog timeout intervals. For this purpose we suggest to have a vendor- specific dts property "snps,watchdog-tops" utilized, which would provide an array of sixteen counter values. At device probe stage they will be used to initialize the watchdog device timeouts determined from the array values and current clocks source rate. In order to have custom TOP values supported the driver must be altered in the following way. First of all the fixed-top values ready-to-use array must be determined for compatibility with currently supported devices, which were synthesised with WDT_USE_FIX_TOP == true. It will be used if either fixed TOP feature is detected being enabled or no custom TOPs are fetched from the device dt node. Secondly at the probe stage we must initialize an array of the watchdog timeouts corresponding to the detected TOPs list and the reference clock rate. For generality the procedure of initialization is designed in a way to support the TOPs array with no limitations on the items order or value. Finally the watchdog period search methods should be altered to support the new timeouts data structure. Signed-off-by: Serge Semin Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: Rob Herring Cc: linux-mips@vger.kernel.org Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-5-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 191 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 167 insertions(+), 24 deletions(-) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index fba21de2bbad..693c0d1fd796 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -13,6 +13,8 @@ */ #include +#include +#include #include #include #include @@ -34,21 +36,40 @@ #define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 +#define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 +#define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) -/* The maximum TOP (timeout period) value that can be set in the watchdog. */ -#define DW_WDT_MAX_TOP 15 +/* There are sixteen TOPs (timeout periods) that can be set in the watchdog. */ +#define DW_WDT_NUM_TOPS 16 +#define DW_WDT_FIX_TOP(_idx) (1U << (16 + _idx)) #define DW_WDT_DEFAULT_SECONDS 30 +static const u32 dw_wdt_fix_tops[DW_WDT_NUM_TOPS] = { + DW_WDT_FIX_TOP(0), DW_WDT_FIX_TOP(1), DW_WDT_FIX_TOP(2), + DW_WDT_FIX_TOP(3), DW_WDT_FIX_TOP(4), DW_WDT_FIX_TOP(5), + DW_WDT_FIX_TOP(6), DW_WDT_FIX_TOP(7), DW_WDT_FIX_TOP(8), + DW_WDT_FIX_TOP(9), DW_WDT_FIX_TOP(10), DW_WDT_FIX_TOP(11), + DW_WDT_FIX_TOP(12), DW_WDT_FIX_TOP(13), DW_WDT_FIX_TOP(14), + DW_WDT_FIX_TOP(15) +}; + static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +struct dw_wdt_timeout { + u32 top_val; + unsigned int sec; + unsigned int msec; +}; + struct dw_wdt { void __iomem *regs; struct clk *clk; unsigned long rate; + struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; struct watchdog_device wdd; struct reset_control *rst; /* Save/restore */ @@ -64,20 +85,66 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt) WDOG_CONTROL_REG_WDT_EN_MASK; } -static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top) +static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt, + unsigned int timeout, u32 *top_val) { + int idx; + /* - * There are 16 possible timeout values in 0..15 where the number of - * cycles is 2 ^ (16 + i) and the watchdog counts down. + * Find a TOP with timeout greater or equal to the requested number. + * Note we'll select a TOP with maximum timeout if the requested + * timeout couldn't be reached. */ - return (1U << (16 + top)) / dw_wdt->rate; + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].sec >= timeout) + break; + } + + if (idx == DW_WDT_NUM_TOPS) + --idx; + + *top_val = dw_wdt->timeouts[idx].top_val; + + return dw_wdt->timeouts[idx].sec; } -static int dw_wdt_get_top(struct dw_wdt *dw_wdt) +static unsigned int dw_wdt_get_min_timeout(struct dw_wdt *dw_wdt) { - int top = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + int idx; + + /* + * We'll find a timeout greater or equal to one second anyway because + * the driver probe would have failed if there was none. + */ + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].sec) + break; + } - return dw_wdt_top_in_seconds(dw_wdt, top); + return dw_wdt->timeouts[idx].sec; +} + +static unsigned int dw_wdt_get_max_timeout_ms(struct dw_wdt *dw_wdt) +{ + struct dw_wdt_timeout *timeout = &dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1]; + u64 msec; + + msec = (u64)timeout->sec * MSEC_PER_SEC + timeout->msec; + + return msec < UINT_MAX ? msec : UINT_MAX; +} + +static unsigned int dw_wdt_get_timeout(struct dw_wdt *dw_wdt) +{ + int top_val = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + int idx; + + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].top_val == top_val) + break; + } + + return dw_wdt->timeouts[idx].sec; } static int dw_wdt_ping(struct watchdog_device *wdd) @@ -93,17 +160,10 @@ static int dw_wdt_ping(struct watchdog_device *wdd) static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) { struct dw_wdt *dw_wdt = to_dw_wdt(wdd); - int i, top_val = DW_WDT_MAX_TOP; + unsigned int timeout; + u32 top_val; - /* - * Iterate over the timeout values until we find the closest match. We - * always look for >=. - */ - for (i = 0; i <= DW_WDT_MAX_TOP; ++i) - if (dw_wdt_top_in_seconds(dw_wdt, i) >= top_s) { - top_val = i; - break; - } + timeout = dw_wdt_find_best_top(dw_wdt, top_s, &top_val); /* * Set the new value in the watchdog. Some versions of dw_wdt @@ -120,7 +180,7 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) * wdd->max_hw_heartbeat_ms */ if (top_s * 1000 <= wdd->max_hw_heartbeat_ms) - wdd->timeout = dw_wdt_top_in_seconds(dw_wdt, top_val); + wdd->timeout = timeout; else wdd->timeout = top_s; @@ -238,6 +298,86 @@ static int dw_wdt_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(dw_wdt_pm_ops, dw_wdt_suspend, dw_wdt_resume); +/* + * In case if DW WDT IP core is synthesized with fixed TOP feature disabled the + * TOPs array can be arbitrary ordered with nearly any sixteen uint numbers + * depending on the system engineer imagination. The next method handles the + * passed TOPs array to pre-calculate the effective timeouts and to sort the + * TOP items out in the ascending order with respect to the timeouts. + */ + +static void dw_wdt_handle_tops(struct dw_wdt *dw_wdt, const u32 *tops) +{ + struct dw_wdt_timeout tout, *dst; + int val, tidx; + u64 msec; + + /* + * We walk over the passed TOPs array and calculate corresponding + * timeouts in seconds and milliseconds. The milliseconds granularity + * is needed to distinguish the TOPs with very close timeouts and to + * set the watchdog max heartbeat setting further. + */ + for (val = 0; val < DW_WDT_NUM_TOPS; ++val) { + tout.top_val = val; + tout.sec = tops[val] / dw_wdt->rate; + msec = (u64)tops[val] * MSEC_PER_SEC; + do_div(msec, dw_wdt->rate); + tout.msec = msec - ((u64)tout.sec * MSEC_PER_SEC); + + /* + * Find a suitable place for the current TOP in the timeouts + * array so that the list is remained in the ascending order. + */ + for (tidx = 0; tidx < val; ++tidx) { + dst = &dw_wdt->timeouts[tidx]; + if (tout.sec > dst->sec || (tout.sec == dst->sec && + tout.msec >= dst->msec)) + continue; + else + swap(*dst, tout); + } + + dw_wdt->timeouts[val] = tout; + } +} + +static int dw_wdt_init_timeouts(struct dw_wdt *dw_wdt, struct device *dev) +{ + u32 data, of_tops[DW_WDT_NUM_TOPS]; + const u32 *tops; + int ret; + + /* + * Retrieve custom or fixed counter values depending on the + * WDT_USE_FIX_TOP flag found in the component specific parameters + * #1 register. + */ + data = readl(dw_wdt->regs + WDOG_COMP_PARAMS_1_REG_OFFSET); + if (data & WDOG_COMP_PARAMS_1_USE_FIX_TOP) { + tops = dw_wdt_fix_tops; + } else { + ret = of_property_read_variable_u32_array(dev_of_node(dev), + "snps,watchdog-tops", of_tops, DW_WDT_NUM_TOPS, + DW_WDT_NUM_TOPS); + if (ret < 0) { + dev_warn(dev, "No valid TOPs array specified\n"); + tops = dw_wdt_fix_tops; + } else { + tops = of_tops; + } + } + + /* Convert the specified TOPs into an array of watchdog timeouts. */ + dw_wdt_handle_tops(dw_wdt, tops); + if (!dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1].sec) { + dev_err(dev, "No any valid TOP detected\n"); + return -EINVAL; + } + + return 0; +} + static int dw_wdt_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -275,12 +415,15 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) reset_control_deassert(dw_wdt->rst); + ret = dw_wdt_init_timeouts(dw_wdt, dev); + if (ret) + goto out_disable_clk; + wdd = &dw_wdt->wdd; wdd->info = &dw_wdt_ident; wdd->ops = &dw_wdt_ops; - wdd->min_timeout = 1; - wdd->max_hw_heartbeat_ms = - dw_wdt_top_in_seconds(dw_wdt, DW_WDT_MAX_TOP) * 1000; + wdd->min_timeout = dw_wdt_get_min_timeout(dw_wdt); + wdd->max_hw_heartbeat_ms = dw_wdt_get_max_timeout_ms(dw_wdt); wdd->parent = dev; watchdog_set_drvdata(wdd, dw_wdt); @@ -293,7 +436,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) * devicetree. */ if (dw_wdt_is_enabled(dw_wdt)) { - wdd->timeout = dw_wdt_get_top(dw_wdt); + wdd->timeout = dw_wdt_get_timeout(dw_wdt); set_bit(WDOG_HW_RUNNING, &wdd->status); } else { wdd->timeout = DW_WDT_DEFAULT_SECONDS; -- cgit From a16f58bf154cdf589cf72cb1596b6217da5872a1 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:55 +0300 Subject: watchdog: dw_wdt: Support devices with asynch clocks DW Watchdog IP core can be synthesised with asynchronous timer/APB clocks support (WDT_ASYNC_CLK_MODE_ENABLE == 1). In this case separate clock signals are supposed to be used to feed watchdog timer and APB interface of the device. Currently the driver supports the synchronous mode only. Since there is no way to determine which mode was actually activated for device from its registers, we have to rely on the platform device configuration data. If optional "pclk" clock source is supplied, we consider the device working in asynchronous mode, otherwise the driver falls back to the synchronous configuration. Signed-off-by: Serge Semin Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: Rob Herring Cc: linux-mips@vger.kernel.org Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-6-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 48 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 693c0d1fd796..efbc36872670 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -68,6 +68,7 @@ struct dw_wdt_timeout { struct dw_wdt { void __iomem *regs; struct clk *clk; + struct clk *pclk; unsigned long rate; struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; struct watchdog_device wdd; @@ -274,6 +275,7 @@ static int dw_wdt_suspend(struct device *dev) dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + clk_disable_unprepare(dw_wdt->pclk); clk_disable_unprepare(dw_wdt->clk); return 0; @@ -287,6 +289,12 @@ static int dw_wdt_resume(struct device *dev) if (err) return err; + err = clk_prepare_enable(dw_wdt->pclk); + if (err) { + clk_disable_unprepare(dw_wdt->clk); + return err; + } + writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); @@ -393,9 +401,18 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) if (IS_ERR(dw_wdt->regs)) return PTR_ERR(dw_wdt->regs); - dw_wdt->clk = devm_clk_get(dev, NULL); - if (IS_ERR(dw_wdt->clk)) - return PTR_ERR(dw_wdt->clk); + /* + * Try to request the watchdog dedicated timer clock source. It must + * be supplied if asynchronous mode is enabled. Otherwise fallback + * to the common timer/bus clocks configuration, in which the very + * first found clock supply both timer and APB signals. + */ + dw_wdt->clk = devm_clk_get(dev, "tclk"); + if (IS_ERR(dw_wdt->clk)) { + dw_wdt->clk = devm_clk_get(dev, NULL); + if (IS_ERR(dw_wdt->clk)) + return PTR_ERR(dw_wdt->clk); + } ret = clk_prepare_enable(dw_wdt->clk); if (ret) @@ -407,10 +424,27 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) goto out_disable_clk; } + /* + * Request APB clock if device is configured with async clocks mode. + * In this case both tclk and pclk clocks are supposed to be specified. + * Alas we can't know for sure whether async mode was really activated, + * so the pclk phandle reference is left optional. If it couldn't be + * found we consider the device configured in synchronous clocks mode. + */ + dw_wdt->pclk = devm_clk_get_optional(dev, "pclk"); + if (IS_ERR(dw_wdt->pclk)) { + ret = PTR_ERR(dw_wdt->pclk); + goto out_disable_clk; + } + + ret = clk_prepare_enable(dw_wdt->pclk); + if (ret) + goto out_disable_clk; + dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL); if (IS_ERR(dw_wdt->rst)) { ret = PTR_ERR(dw_wdt->rst); - goto out_disable_clk; + goto out_disable_pclk; } reset_control_deassert(dw_wdt->rst); @@ -449,10 +483,13 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) ret = watchdog_register_device(wdd); if (ret) - goto out_disable_clk; + goto out_disable_pclk; return 0; +out_disable_pclk: + clk_disable_unprepare(dw_wdt->pclk); + out_disable_clk: clk_disable_unprepare(dw_wdt->clk); return ret; @@ -464,6 +501,7 @@ static int dw_wdt_drv_remove(struct platform_device *pdev) watchdog_unregister_device(&dw_wdt->wdd); reset_control_assert(dw_wdt->rst); + clk_disable_unprepare(dw_wdt->pclk); clk_disable_unprepare(dw_wdt->clk); return 0; -- cgit From 46a1946314bfe7db46e1f28e0ce2b6f701fae50b Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:56 +0300 Subject: watchdog: dw_wdt: Add pre-timeouts support DW Watchdog can rise an interrupt in case if IRQ request mode is enabled and timer reaches the zero value. In this case the IRQ lane is left pending until either the next watchdog kick event (watchdog restart) or until the WDT_EOI register is read or the device/system reset. This interface can be used to implement the pre-timeout functionality optionally provided by the Linux kernel watchdog devices. IRQ mode provides a two stages timeout interface. It means the IRQ is raised when the counter reaches zero, while the system reset occurs only after subsequent timeout if the timer restart is not performed. Due to this peculiarity the pre-timeout value is actually set to the achieved hardware timeout, while the real watchdog timeout is considered to be twice as much of it. This applies a significant limitation on the pre-timeout values, so current implementation supports either zero value, which disables the pre-timeout events, or non-zero values, which imply the pre-timeout to be at least half of the current watchdog timeout. Note that we ask the interrupt controller to detect the rising-edge pre-timeout interrupts to prevent the high-level-IRQs flood, since if the pre-timeout happens, the IRQ lane will be left pending until it's cleared by the timer restart. Signed-off-by: Serge Semin Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: Rob Herring Cc: linux-mips@vger.kernel.org Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-7-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 138 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 130 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index efbc36872670..7a171920dd21 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 +#define WDOG_INTERRUPT_STATUS_REG_OFFSET 0x10 +#define WDOG_INTERRUPT_CLEAR_REG_OFFSET 0x14 #define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 #define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) @@ -59,6 +62,11 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +enum dw_wdt_rmod { + DW_WDT_RMOD_RESET = 1, + DW_WDT_RMOD_IRQ = 2 +}; + struct dw_wdt_timeout { u32 top_val; unsigned int sec; @@ -70,6 +78,7 @@ struct dw_wdt { struct clk *clk; struct clk *pclk; unsigned long rate; + enum dw_wdt_rmod rmod; struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; struct watchdog_device wdd; struct reset_control *rst; @@ -86,6 +95,20 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt) WDOG_CONTROL_REG_WDT_EN_MASK; } +static void dw_wdt_update_mode(struct dw_wdt *dw_wdt, enum dw_wdt_rmod rmod) +{ + u32 val; + + val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); + if (rmod == DW_WDT_RMOD_IRQ) + val |= WDOG_CONTROL_REG_RESP_MODE_MASK; + else + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; + writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); + + dw_wdt->rmod = rmod; +} + static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt, unsigned int timeout, u32 *top_val) { @@ -145,7 +168,11 @@ static unsigned int dw_wdt_get_timeout(struct dw_wdt *dw_wdt) break; } - return dw_wdt->timeouts[idx].sec; + /* + * In IRQ mode due to the two stages counter, the actual timeout is + * twice greater than the TOP setting. + */ + return dw_wdt->timeouts[idx].sec * dw_wdt->rmod; } static int dw_wdt_ping(struct watchdog_device *wdd) @@ -164,7 +191,20 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) unsigned int timeout; u32 top_val; - timeout = dw_wdt_find_best_top(dw_wdt, top_s, &top_val); + /* + * Note IRQ mode being enabled means having a non-zero pre-timeout + * setup. In this case we try to find a TOP as close to the half of the + * requested timeout as possible since DW Watchdog IRQ mode is designed + * in two stages way - first timeout rises the pre-timeout interrupt, + * second timeout performs the system reset. So basically the effective + * watchdog-caused reset happens after two watchdog TOPs elapsed. + */ + timeout = dw_wdt_find_best_top(dw_wdt, DIV_ROUND_UP(top_s, dw_wdt->rmod), + &top_val); + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) + wdd->pretimeout = timeout; + else + wdd->pretimeout = 0; /* * Set the new value in the watchdog. Some versions of dw_wdt @@ -175,25 +215,47 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + /* Kick new TOP value into the watchdog counter if activated. */ + if (watchdog_active(wdd)) + dw_wdt_ping(wdd); + /* * In case users set bigger timeout value than HW can support, * kernel(watchdog_dev.c) helps to feed watchdog before * wdd->max_hw_heartbeat_ms */ if (top_s * 1000 <= wdd->max_hw_heartbeat_ms) - wdd->timeout = timeout; + wdd->timeout = timeout * dw_wdt->rmod; else wdd->timeout = top_s; return 0; } +static int dw_wdt_set_pretimeout(struct watchdog_device *wdd, unsigned int req) +{ + struct dw_wdt *dw_wdt = to_dw_wdt(wdd); + + /* + * We ignore actual value of the timeout passed from user-space + * using it as a flag whether the pretimeout functionality is intended + * to be activated. + */ + dw_wdt_update_mode(dw_wdt, req ? DW_WDT_RMOD_IRQ : DW_WDT_RMOD_RESET); + dw_wdt_set_timeout(wdd, wdd->timeout); + + return 0; +} + static void dw_wdt_arm_system_reset(struct dw_wdt *dw_wdt) { u32 val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); - /* Disable interrupt mode; always perform system reset. */ - val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; + /* Disable/enable interrupt mode depending on the RMOD flag. */ + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) + val |= WDOG_CONTROL_REG_RESP_MODE_MASK; + else + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; /* Enable watchdog. */ val |= WDOG_CONTROL_REG_WDT_EN_MASK; writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); @@ -231,6 +293,7 @@ static int dw_wdt_restart(struct watchdog_device *wdd, struct dw_wdt *dw_wdt = to_dw_wdt(wdd); writel(0, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET); if (dw_wdt_is_enabled(dw_wdt)) writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt->regs + WDOG_COUNTER_RESTART_REG_OFFSET); @@ -246,9 +309,19 @@ static int dw_wdt_restart(struct watchdog_device *wdd, static unsigned int dw_wdt_get_timeleft(struct watchdog_device *wdd) { struct dw_wdt *dw_wdt = to_dw_wdt(wdd); + unsigned int sec; + u32 val; + + val = readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET); + sec = val / dw_wdt->rate; - return readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET) / - dw_wdt->rate; + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) { + val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET); + if (!val) + sec += wdd->pretimeout; + } + + return sec; } static const struct watchdog_info dw_wdt_ident = { @@ -257,16 +330,41 @@ static const struct watchdog_info dw_wdt_ident = { .identity = "Synopsys DesignWare Watchdog", }; +static const struct watchdog_info dw_wdt_pt_ident = { + .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_PRETIMEOUT | WDIOF_MAGICCLOSE, + .identity = "Synopsys DesignWare Watchdog", +}; + static const struct watchdog_ops dw_wdt_ops = { .owner = THIS_MODULE, .start = dw_wdt_start, .stop = dw_wdt_stop, .ping = dw_wdt_ping, .set_timeout = dw_wdt_set_timeout, + .set_pretimeout = dw_wdt_set_pretimeout, .get_timeleft = dw_wdt_get_timeleft, .restart = dw_wdt_restart, }; +static irqreturn_t dw_wdt_irq(int irq, void *devid) +{ + struct dw_wdt *dw_wdt = devid; + u32 val; + + /* + * We don't clear the IRQ status. It's supposed to be done by the + * following ping operations. + */ + val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET); + if (!val) + return IRQ_NONE; + + watchdog_notify_pretimeout(&dw_wdt->wdd); + + return IRQ_HANDLED; +} + #ifdef CONFIG_PM_SLEEP static int dw_wdt_suspend(struct device *dev) { @@ -447,6 +545,31 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) goto out_disable_pclk; } + /* Enable normal reset without pre-timeout by default. */ + dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET); + + /* + * Pre-timeout IRQ is optional, since some hardware may lack support + * of it. Note we must request rising-edge IRQ, since the lane is left + * pending either until the next watchdog kick event or up to the + * system reset. + */ + ret = platform_get_irq_optional(pdev, 0); + if (ret > 0) { + ret = devm_request_irq(dev, ret, dw_wdt_irq, + IRQF_SHARED | IRQF_TRIGGER_RISING, + pdev->name, dw_wdt); + if (ret) + goto out_disable_pclk; + + dw_wdt->wdd.info = &dw_wdt_pt_ident; + } else { + if (ret == -EPROBE_DEFER) + goto out_disable_pclk; + + dw_wdt->wdd.info = &dw_wdt_ident; + } + reset_control_deassert(dw_wdt->rst); ret = dw_wdt_init_timeouts(dw_wdt, dev); @@ -454,7 +577,6 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) goto out_disable_clk; wdd = &dw_wdt->wdd; - wdd->info = &dw_wdt_ident; wdd->ops = &dw_wdt_ops; wdd->min_timeout = dw_wdt_get_min_timeout(dw_wdt); wdd->max_hw_heartbeat_ms = dw_wdt_get_max_timeout_ms(dw_wdt); -- cgit From 4105f19fd0ce51b571f8112457182dfad142768e Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 30 May 2020 10:35:57 +0300 Subject: watchdog: dw_wdt: Add DebugFS files For the sake of the easier device-driver debug procedure, we added a DebugFS file with the controller registers state. It's available only if kernel is configured with DebugFS support. Signed-off-by: Serge Semin Reviewed-by: Guenter Roeck Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Arnd Bergmann Cc: Rob Herring Cc: linux-mips@vger.kernel.org Cc: devicetree@vger.kernel.org Link: https://lore.kernel.org/r/20200530073557.22661-8-Sergey.Semin@baikalelectronics.ru Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/dw_wdt.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 7a171920dd21..4fec2c6a3c01 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -28,6 +28,7 @@ #include #include #include +#include #define WDOG_CONTROL_REG_OFFSET 0x00 #define WDOG_CONTROL_REG_WDT_EN_MASK 0x01 @@ -39,8 +40,14 @@ #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 #define WDOG_INTERRUPT_STATUS_REG_OFFSET 0x10 #define WDOG_INTERRUPT_CLEAR_REG_OFFSET 0x14 +#define WDOG_COMP_PARAMS_5_REG_OFFSET 0xe4 +#define WDOG_COMP_PARAMS_4_REG_OFFSET 0xe8 +#define WDOG_COMP_PARAMS_3_REG_OFFSET 0xec +#define WDOG_COMP_PARAMS_2_REG_OFFSET 0xf0 #define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 #define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) +#define WDOG_COMP_VERSION_REG_OFFSET 0xf8 +#define WDOG_COMP_TYPE_REG_OFFSET 0xfc /* There are sixteen TOPs (timeout periods) that can be set in the watchdog. */ #define DW_WDT_NUM_TOPS 16 @@ -85,6 +92,10 @@ struct dw_wdt { /* Save/restore */ u32 control; u32 timeout; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; +#endif }; #define to_dw_wdt(wdd) container_of(wdd, struct dw_wdt, wdd) @@ -484,6 +495,59 @@ static int dw_wdt_init_timeouts(struct dw_wdt *dw_wdt, struct device *dev) return 0; } +#ifdef CONFIG_DEBUG_FS + +#define DW_WDT_DBGFS_REG(_name, _off) \ +{ \ + .name = _name, \ + .offset = _off \ +} + +static const struct debugfs_reg32 dw_wdt_dbgfs_regs[] = { + DW_WDT_DBGFS_REG("cr", WDOG_CONTROL_REG_OFFSET), + DW_WDT_DBGFS_REG("torr", WDOG_TIMEOUT_RANGE_REG_OFFSET), + DW_WDT_DBGFS_REG("ccvr", WDOG_CURRENT_COUNT_REG_OFFSET), + DW_WDT_DBGFS_REG("crr", WDOG_COUNTER_RESTART_REG_OFFSET), + DW_WDT_DBGFS_REG("stat", WDOG_INTERRUPT_STATUS_REG_OFFSET), + DW_WDT_DBGFS_REG("param5", WDOG_COMP_PARAMS_5_REG_OFFSET), + DW_WDT_DBGFS_REG("param4", WDOG_COMP_PARAMS_4_REG_OFFSET), + DW_WDT_DBGFS_REG("param3", WDOG_COMP_PARAMS_3_REG_OFFSET), + DW_WDT_DBGFS_REG("param2", WDOG_COMP_PARAMS_2_REG_OFFSET), + DW_WDT_DBGFS_REG("param1", WDOG_COMP_PARAMS_1_REG_OFFSET), + DW_WDT_DBGFS_REG("version", WDOG_COMP_VERSION_REG_OFFSET), + DW_WDT_DBGFS_REG("type", WDOG_COMP_TYPE_REG_OFFSET) +}; + +static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt) +{ + struct device *dev = dw_wdt->wdd.parent; + struct debugfs_regset32 *regset; + + regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); + if (!regset) + return; + + regset->regs = dw_wdt_dbgfs_regs; + regset->nregs = ARRAY_SIZE(dw_wdt_dbgfs_regs); + regset->base = dw_wdt->regs; + + dw_wdt->dbgfs_dir = debugfs_create_dir(dev_name(dev), NULL); + + debugfs_create_regset32("registers", 0444, dw_wdt->dbgfs_dir, regset); +} + +static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt) +{ + debugfs_remove_recursive(dw_wdt->dbgfs_dir); +} + +#else /* !CONFIG_DEBUG_FS */ + +static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt) {} +static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt) {} + +#endif /* !CONFIG_DEBUG_FS */ + static int dw_wdt_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -607,6 +671,8 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) if (ret) goto out_disable_pclk; + dw_wdt_dbgfs_init(dw_wdt); + return 0; out_disable_pclk: @@ -621,6 +687,8 @@ static int dw_wdt_drv_remove(struct platform_device *pdev) { struct dw_wdt *dw_wdt = platform_get_drvdata(pdev); + dw_wdt_dbgfs_clear(dw_wdt); + watchdog_unregister_device(&dw_wdt->wdd); reset_control_assert(dw_wdt->rst); clk_disable_unprepare(dw_wdt->pclk); -- cgit From 137e9e68dd3805deb9fa43b3eb1a963a2d093746 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 26 Jun 2020 00:59:04 +0530 Subject: dt-bindings: watchdog: Convert QCOM watchdog timer bindings to YAML Convert QCOM watchdog timer bindings to DT schema format using json-schema. Signed-off-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/77b47aad9d17cb4ec8359bdbbb30c33d818117e6.1593112534.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/qcom-wdt.txt | 28 -------------- .../devicetree/bindings/watchdog/qcom-wdt.yaml | 44 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 28 deletions(-) delete mode 100644 Documentation/devicetree/bindings/watchdog/qcom-wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt deleted file mode 100644 index 41aeaa2ff0f8..000000000000 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt +++ /dev/null @@ -1,28 +0,0 @@ -Qualcomm Krait Processor Sub-system (KPSS) Watchdog ---------------------------------------------------- - -Required properties : -- compatible : shall contain only one of the following: - - "qcom,kpss-wdt-msm8960" - "qcom,kpss-wdt-apq8064" - "qcom,kpss-wdt-ipq8064" - "qcom,kpss-wdt-ipq4019" - "qcom,kpss-timer" - "qcom,scss-timer" - "qcom,kpss-wdt" - -- reg : shall contain base register location and length -- clocks : shall contain the input clock - -Optional properties : -- timeout-sec : shall contain the default watchdog timeout in seconds, - if unset, the default timeout is 30 seconds - -Example: - watchdog@208a038 { - compatible = "qcom,kpss-wdt-ipq8064"; - reg = <0x0208a038 0x40>; - clocks = <&sleep_clk>; - timeout-sec = <10>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml new file mode 100644 index 000000000000..5448cc537a03 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/qcom-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Krait Processor Sub-system (KPSS) Watchdog timer + +maintainers: + - Sai Prakash Ranjan + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + enum: + - qcom,kpss-timer + - qcom,kpss-wdt + - qcom,kpss-wdt-apq8064 + - qcom,kpss-wdt-ipq4019 + - qcom,kpss-wdt-ipq8064 + - qcom,kpss-wdt-msm8960 + - qcom,scss-timer + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - clocks + +examples: + - | + watchdog@208a038 { + compatible = "qcom,kpss-wdt-ipq8064"; + reg = <0x0208a038 0x40>; + clocks = <&sleep_clk>; + timeout-sec = <10>; + }; -- cgit From a4fd26f6187b13cace0f90029fe790799e34e4e2 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 26 Jun 2020 00:59:05 +0530 Subject: dt-bindings: watchdog: Add compatible for QCS404, SC7180, SDM845, SM8150 Add missing compatible for watchdog timer on QCS404, SC7180, SDM845 and SM8150 SoCs. Signed-off-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Acked-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/09da1ba319dc4a27ef4e4e177e67e68f1cb4f35b.1593112534.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index 5448cc537a03..0709ddf0b6a5 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -15,6 +15,10 @@ allOf: properties: compatible: enum: + - qcom,apss-wdt-qcs404 + - qcom,apss-wdt-sc7180 + - qcom,apss-wdt-sdm845 + - qcom,apss-wdt-sm8150 - qcom,kpss-timer - qcom,kpss-wdt - qcom,kpss-wdt-apq8064 -- cgit From d51d3852d1fd28409107274dc1d8077022cc0dc2 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:42 +0200 Subject: docs: watchdog: codify ident.options as superset of possible status flags The FIXME comment has been in-tree since the very first git commit. The described behavior has been since relied on by some userspace, e.g. the util-linux wdctl command and has been ignored by some kernelspace, like the f71808e_wdt driver. The functionality is useful to have to be able to differentiate between a driver that doesn't support WDIOF_CARDRESET and one that does, but hasn't had a watchdog reset, thus drop the FIXME to encourage drivers adopting this convention. Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-2-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/watchdog/watchdog-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/watchdog/watchdog-api.rst b/Documentation/watchdog/watchdog-api.rst index c6c1e9fa9f73..800dcd7586f2 100644 --- a/Documentation/watchdog/watchdog-api.rst +++ b/Documentation/watchdog/watchdog-api.rst @@ -168,7 +168,7 @@ the fields returned in the ident struct are: the options field can have the following bits set, and describes what kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can -return. [FIXME -- Is this correct?] +return. ================ ========================= WDIOF_OVERHEAT Reset due to CPU overheat -- cgit From e871e93fb08a619dfc015974a05768ed6880fd82 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:43 +0200 Subject: watchdog: f71808e_wdt: indicate WDIOF_CARDRESET support in watchdog_info.options The driver supports populating bootstatus with WDIOF_CARDRESET, but so far userspace couldn't portably determine whether absence of this flag meant no watchdog reset or no driver support. Or-in the bit to fix this. Fixes: b97cb21a4634 ("watchdog: f71808e_wdt: Fix WDTMOUT_STS register read") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-3-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index a3c44d75d80e..c8ce80c13403 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -692,7 +692,8 @@ static int __init watchdog_init(int sioaddr) watchdog.sioaddr = sioaddr; watchdog.ident.options = WDIOC_SETTIMEOUT | WDIOF_MAGICCLOSE - | WDIOF_KEEPALIVEPING; + | WDIOF_KEEPALIVEPING + | WDIOF_CARDRESET; snprintf(watchdog.ident.identity, sizeof(watchdog.ident.identity), "%s watchdog", -- cgit From 802141462d844f2e6a4d63a12260d79b7afc4c34 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:44 +0200 Subject: watchdog: f71808e_wdt: remove use of wrong watchdog_info option The flags that should be or-ed into the watchdog_info.options by drivers all start with WDIOF_, e.g. WDIOF_SETTIMEOUT, which indicates that the driver's watchdog_ops has a usable set_timeout. WDIOC_SETTIMEOUT was used instead, which expands to 0xc0045706, which equals: WDIOF_FANFAULT | WDIOF_EXTERN1 | WDIOF_PRETIMEOUT | WDIOF_ALARMONLY | WDIOF_MAGICCLOSE | 0xc0045000 These were so far indicated to userspace on WDIOC_GETSUPPORT. As the driver has not yet been migrated to the new watchdog kernel API, the constant can just be dropped without substitute. Fixes: 96cb4eb019ce ("watchdog: f71808e_wdt: new watchdog driver for Fintek F71808E and F71882FG") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-4-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index c8ce80c13403..8e5584c54423 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -690,8 +690,7 @@ static int __init watchdog_init(int sioaddr) * into the module have been registered yet. */ watchdog.sioaddr = sioaddr; - watchdog.ident.options = WDIOC_SETTIMEOUT - | WDIOF_MAGICCLOSE + watchdog.ident.options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_CARDRESET; -- cgit From 4f39d575844148fbf3081571a1f3b4ae04150958 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:45 +0200 Subject: watchdog: f71808e_wdt: clear watchdog timeout occurred flag The flag indicating a watchdog timeout having occurred normally persists till Power-On Reset of the Fintek Super I/O chip. The user can clear it by writing a `1' to the bit. The driver doesn't offer a restart method, so regular system reboot might not reset the Super I/O and if the watchdog isn't enabled, we won't touch the register containing the bit on the next boot. In this case all subsequent regular reboots will be wrongly flagged by the driver as being caused by the watchdog. Fix this by having the flag cleared after read. This is also done by other drivers like those for the i6300esb and mpc8xxx_wdt. Fixes: b97cb21a4634 ("watchdog: f71808e_wdt: Fix WDTMOUT_STS register read") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-5-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 8e5584c54423..26bf366aebc2 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -706,6 +706,13 @@ static int __init watchdog_init(int sioaddr) wdt_conf = superio_inb(sioaddr, F71808FG_REG_WDT_CONF); watchdog.caused_reboot = wdt_conf & BIT(F71808FG_FLAG_WDTMOUT_STS); + /* + * We don't want WDTMOUT_STS to stick around till regular reboot. + * Write 1 to the bit to clear it to zero. + */ + superio_outb(sioaddr, F71808FG_REG_WDT_CONF, + wdt_conf | BIT(F71808FG_FLAG_WDTMOUT_STS)); + superio_exit(sioaddr); err = watchdog_set_timeout(timeout); -- cgit From 5edc8c687357b40469d90e3dab0b59b72eaabf59 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:46 +0200 Subject: watchdog: f71808e_wdt: do stricter parameter validation We check the f71862fg_pin module parameter every time a watchdog device for the f71862fg is opened, but the parameter can't change at runtime. If we move the check to the start of init: - We catch userspace passing invalid, but unused, values - We check the condition only once - We simplify the code Do so. Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-6-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 26bf366aebc2..9f7823819ed1 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -306,27 +306,6 @@ exit_unlock: return err; } -static int f71862fg_pin_configure(unsigned short ioaddr) -{ - /* When ioaddr is non-zero the calling function has to take care of - mutex handling and superio preparation! */ - - if (f71862fg_pin == 63) { - if (ioaddr) { - /* SPI must be disabled first to use this pin! */ - superio_clear_bit(ioaddr, SIO_REG_ROM_ADDR_SEL, 6); - superio_set_bit(ioaddr, SIO_REG_MFUNCT3, 4); - } - } else if (f71862fg_pin == 56) { - if (ioaddr) - superio_set_bit(ioaddr, SIO_REG_MFUNCT1, 1); - } else { - pr_err("Invalid argument f71862fg_pin=%d\n", f71862fg_pin); - return -EINVAL; - } - return 0; -} - static int watchdog_start(void) { int err; @@ -352,9 +331,13 @@ static int watchdog_start(void) break; case f71862fg: - err = f71862fg_pin_configure(watchdog.sioaddr); - if (err) - goto exit_superio; + if (f71862fg_pin == 63) { + /* SPI must be disabled first to use this pin! */ + superio_clear_bit(watchdog.sioaddr, SIO_REG_ROM_ADDR_SEL, 6); + superio_set_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 4); + } else if (f71862fg_pin == 56) { + superio_set_bit(watchdog.sioaddr, SIO_REG_MFUNCT1, 1); + } break; case f71868: @@ -810,7 +793,6 @@ static int __init f71808e_find(int sioaddr) break; case SIO_F71862_ID: watchdog.type = f71862fg; - err = f71862fg_pin_configure(0); /* validate module parameter */ break; case SIO_F71868_ID: watchdog.type = f71868; @@ -859,6 +841,11 @@ static int __init f71808e_init(void) int err = -ENODEV; int i; + if (f71862fg_pin != 63 && f71862fg_pin != 56) { + pr_err("Invalid argument f71862fg_pin=%d\n", f71862fg_pin); + return -EINVAL; + } + for (i = 0; i < ARRAY_SIZE(addrs); i++) { err = f71808e_find(addrs[i]); if (err == 0) -- cgit From bd490f8222510dea748b495a502910119060e869 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Jul 2020 12:11:21 -0500 Subject: watchdog: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200707171121.GA13472@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/advantechwdt.c | 2 +- drivers/watchdog/alim1535_wdt.c | 2 +- drivers/watchdog/alim7101_wdt.c | 2 +- drivers/watchdog/ar7_wdt.c | 3 +-- drivers/watchdog/ath79_wdt.c | 2 +- drivers/watchdog/eurotechwdt.c | 2 +- drivers/watchdog/f71808e_wdt.c | 4 ++-- drivers/watchdog/gef_wdt.c | 2 +- drivers/watchdog/geodewdt.c | 2 +- drivers/watchdog/ib700wdt.c | 2 +- drivers/watchdog/it8712f_wdt.c | 2 +- drivers/watchdog/ixp4xx_wdt.c | 2 +- drivers/watchdog/m54xx_wdt.c | 2 +- drivers/watchdog/machzwd.c | 2 +- drivers/watchdog/mv64x60_wdt.c | 2 +- drivers/watchdog/nv_tco.c | 2 +- drivers/watchdog/pc87413_wdt.c | 2 +- drivers/watchdog/pcwd.c | 2 +- drivers/watchdog/pcwd_pci.c | 2 +- drivers/watchdog/pcwd_usb.c | 2 +- drivers/watchdog/rc32434_wdt.c | 2 +- drivers/watchdog/riowd.c | 2 +- drivers/watchdog/sa1100_wdt.c | 2 +- drivers/watchdog/sb_wdog.c | 2 +- drivers/watchdog/sbc60xxwdt.c | 2 +- drivers/watchdog/sbc7240_wdt.c | 2 +- drivers/watchdog/sbc_fitpc2_wdt.c | 2 +- drivers/watchdog/sc520_wdt.c | 2 +- drivers/watchdog/sch311x_wdt.c | 2 +- drivers/watchdog/smsc37b787_wdt.c | 2 +- drivers/watchdog/w83877f_wdt.c | 2 +- drivers/watchdog/w83977f_wdt.c | 2 +- drivers/watchdog/wafer5823wdt.c | 2 +- drivers/watchdog/watchdog_dev.c | 2 +- drivers/watchdog/wdt.c | 2 +- drivers/watchdog/wdt285.c | 2 +- drivers/watchdog/wdt977.c | 2 +- drivers/watchdog/wdt_pci.c | 2 +- 38 files changed, 39 insertions(+), 40 deletions(-) diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c index 0e4c18a2aa42..554fe85da50e 100644 --- a/drivers/watchdog/advantechwdt.c +++ b/drivers/watchdog/advantechwdt.c @@ -177,7 +177,7 @@ static long advwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (advwdt_set_heartbeat(new_timeout)) return -EINVAL; advwdt_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/alim1535_wdt.c b/drivers/watchdog/alim1535_wdt.c index 42338c7d4540..bfb9a91ca1df 100644 --- a/drivers/watchdog/alim1535_wdt.c +++ b/drivers/watchdog/alim1535_wdt.c @@ -220,7 +220,7 @@ static long ali_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; ali_keepalive(); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/alim7101_wdt.c b/drivers/watchdog/alim7101_wdt.c index 5af0358f4390..4ff7f5afb7aa 100644 --- a/drivers/watchdog/alim7101_wdt.c +++ b/drivers/watchdog/alim7101_wdt.c @@ -279,7 +279,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index c087027ffd5d..ff37dc91057d 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -235,8 +235,7 @@ static long ar7_wdt_ioctl(struct file *file, ar7_wdt_update_margin(new_margin); ar7_wdt_kick(1); spin_unlock(&wdt_lock); - /* Fall through */ - + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, (int *)arg)) return -EFAULT; diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index d6dff97c280b..0f18f06a21b6 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -215,8 +215,8 @@ static long ath79_wdt_ioctl(struct file *file, unsigned int cmd, err = ath79_wdt_set_timeout(t); if (err) break; + fallthrough; - /* fallthrough */ case WDIOC_GETTIMEOUT: err = put_user(timeout, p); break; diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c index f5ffa7be066e..2418ebb707bd 100644 --- a/drivers/watchdog/eurotechwdt.c +++ b/drivers/watchdog/eurotechwdt.c @@ -286,7 +286,7 @@ static long eurwdt_ioctl(struct file *file, eurwdt_timeout = time; eurwdt_set_timeout(time); spin_unlock(&eurwdt_lock); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(eurwdt_timeout, p); diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 9f7823819ed1..f60beec1bbae 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -612,7 +612,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, if (new_options & WDIOS_ENABLECARD) return watchdog_start(); - /* fall through */ + fallthrough; case WDIOC_KEEPALIVE: watchdog_keepalive(); @@ -626,7 +626,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, return -EINVAL; watchdog_keepalive(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(watchdog.timeout, uarg.i); diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index f6541d1b65e3..df5406aa7d25 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -201,7 +201,7 @@ static long gef_wdt_ioctl(struct file *file, unsigned int cmd, if (get_user(timeout, (int __user *)argp)) return -EFAULT; gef_wdt_set_timeout(timeout); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(gef_wdt_timeout, (int __user *)argp)) diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 9914a4283cb2..83418924e30a 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -185,7 +185,7 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd, if (geodewdt_set_heartbeat(interval)) return -EINVAL; - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c index 2b65ea9451d1..a0ddedc362fc 100644 --- a/drivers/watchdog/ib700wdt.c +++ b/drivers/watchdog/ib700wdt.c @@ -214,7 +214,7 @@ static long ibwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ibwdt_set_heartbeat(new_margin)) return -EINVAL; ibwdt_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c index 2fed40d14007..9b89d2f09568 100644 --- a/drivers/watchdog/it8712f_wdt.c +++ b/drivers/watchdog/it8712f_wdt.c @@ -303,7 +303,7 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, superio_exit(); it8712f_wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 09886616fd21..aae29dcfaf11 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -136,7 +136,7 @@ static long ixp4xx_wdt_ioctl(struct file *file, unsigned int cmd, heartbeat = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(heartbeat, (int *)arg); diff --git a/drivers/watchdog/m54xx_wdt.c b/drivers/watchdog/m54xx_wdt.c index 60ed6252e5f4..f388a769dbd3 100644 --- a/drivers/watchdog/m54xx_wdt.c +++ b/drivers/watchdog/m54xx_wdt.c @@ -155,7 +155,7 @@ static long m54xx_wdt_ioctl(struct file *file, unsigned int cmd, heartbeat = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(heartbeat, (int *)arg); diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index 80ff94688487..743377c5b173 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -171,7 +171,7 @@ static inline void zf_set_timer(unsigned short new, unsigned char n) switch (n) { case WD1: zf_writew(COUNTER_1, new); - /* fall through */ + fallthrough; case WD2: zf_writeb(COUNTER_2, new > 0xff ? 0xff : new); default: diff --git a/drivers/watchdog/mv64x60_wdt.c b/drivers/watchdog/mv64x60_wdt.c index 0bc72dd69b70..894aa63488d3 100644 --- a/drivers/watchdog/mv64x60_wdt.c +++ b/drivers/watchdog/mv64x60_wdt.c @@ -222,7 +222,7 @@ static long mv64x60_wdt_ioctl(struct file *file, if (get_user(timeout, (int __user *)argp)) return -EFAULT; mv64x60_wdt_set_timeout(timeout); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(mv64x60_wdt_timeout, (int __user *)argp)) diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index d7a560e348d5..d15ad8583a59 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -250,7 +250,7 @@ static long nv_tco_ioctl(struct file *file, unsigned int cmd, if (tco_timer_set_heartbeat(new_heartbeat)) return -EINVAL; tco_timer_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index 73fbfc99083b..2d4504302c9e 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -433,7 +433,7 @@ static long pc87413_ioctl(struct file *file, unsigned int cmd, return -EINVAL; timeout = new_timeout; pc87413_refresh(); - /* fall through - and return the new timeout... */ + fallthrough; /* and return the new timeout */ case WDIOC_GETTIMEOUT: new_timeout = timeout * 60; return put_user(new_timeout, uarg.i); diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 7a0587fdc52c..e86fa7f8351d 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -651,7 +651,7 @@ static long pcwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; pcwd_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, argp); diff --git a/drivers/watchdog/pcwd_pci.c b/drivers/watchdog/pcwd_pci.c index 81508a42a90c..54d86fcb1837 100644 --- a/drivers/watchdog/pcwd_pci.c +++ b/drivers/watchdog/pcwd_pci.c @@ -542,7 +542,7 @@ static long pcipcwd_ioctl(struct file *file, unsigned int cmd, pcipcwd_keepalive(); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 2f44af1831d0..79efca47cebb 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -452,7 +452,7 @@ static long usb_pcwd_ioctl(struct file *file, unsigned int cmd, usb_pcwd_keepalive(usb_pcwd_device); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index aee3c2efd565..e74802f3a32e 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -230,7 +230,7 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd, return -EFAULT; if (rc32434_wdt_set(new_timeout)) return -EINVAL; - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return copy_to_user(argp, &timeout, sizeof(int)) ? -EFAULT : 0; default: diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index 1b9a6dc8f982..7008596a575f 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -134,7 +134,7 @@ static long riowd_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EINVAL; riowd_timeout = (new_margin + 59) / 60; riowd_writereg(p, riowd_timeout, WDTO_INDEX); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(riowd_timeout * 60, (int __user *)argp); diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index 9b93be00109f..27846c6bdfb0 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -127,7 +127,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, pre_margin = oscr_freq * time; writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); - /*fall through*/ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(pre_margin / oscr_freq, p); diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index da2dad00d473..504be461f992 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -202,7 +202,7 @@ static long sbwdog_ioctl(struct file *file, unsigned int cmd, timeout = time; sbwdog_set(user_dog, timeout); sbwdog_pet(user_dog); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: /* diff --git a/drivers/watchdog/sbc60xxwdt.c b/drivers/watchdog/sbc60xxwdt.c index f2cbe6d880a8..a947a63fb44a 100644 --- a/drivers/watchdog/sbc60xxwdt.c +++ b/drivers/watchdog/sbc60xxwdt.c @@ -265,7 +265,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c index 520b8dd77ed4..d640b26e18a6 100644 --- a/drivers/watchdog/sbc7240_wdt.c +++ b/drivers/watchdog/sbc7240_wdt.c @@ -195,7 +195,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_timeout(new_timeout)) return -EINVAL; } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, (int __user *)arg); default: diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c index 1b20b33879c4..04483d6453d6 100644 --- a/drivers/watchdog/sbc_fitpc2_wdt.c +++ b/drivers/watchdog/sbc_fitpc2_wdt.c @@ -154,7 +154,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, margin = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(margin, (int *)arg); diff --git a/drivers/watchdog/sc520_wdt.c b/drivers/watchdog/sc520_wdt.c index fbe79bcc9297..e66e6b905964 100644 --- a/drivers/watchdog/sc520_wdt.c +++ b/drivers/watchdog/sc520_wdt.c @@ -321,7 +321,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 83949a385f62..d8b77fe10eba 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -295,7 +295,7 @@ static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd, if (sch311x_wdt_set_heartbeat(new_timeout)) return -EINVAL; sch311x_wdt_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/smsc37b787_wdt.c b/drivers/watchdog/smsc37b787_wdt.c index 43de56acd767..7463df479d11 100644 --- a/drivers/watchdog/smsc37b787_wdt.c +++ b/drivers/watchdog/smsc37b787_wdt.c @@ -474,7 +474,7 @@ static long wb_smsc_wdt_ioctl(struct file *file, return -EINVAL; timeout = new_timeout; wb_smsc_wdt_set_timeout(timeout); - /* fall through - and return the new timeout... */ + fallthrough; /* and return the new timeout */ case WDIOC_GETTIMEOUT: new_timeout = timeout; if (unit == UNIT_MINUTE) diff --git a/drivers/watchdog/w83877f_wdt.c b/drivers/watchdog/w83877f_wdt.c index 6b3b667e6f23..5772cc5d3780 100644 --- a/drivers/watchdog/w83877f_wdt.c +++ b/drivers/watchdog/w83877f_wdt.c @@ -289,7 +289,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c index 5212e68c6b01..fd64ae77780a 100644 --- a/drivers/watchdog/w83977f_wdt.c +++ b/drivers/watchdog/w83977f_wdt.c @@ -422,7 +422,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; wdt_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c index a6925847f76f..a8a1ed215e1e 100644 --- a/drivers/watchdog/wafer5823wdt.c +++ b/drivers/watchdog/wafer5823wdt.c @@ -174,7 +174,7 @@ static long wafwdt_ioctl(struct file *file, unsigned int cmd, timeout = new_timeout; wafwdt_stop(); wafwdt_start(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index de02d90a4878..b277eebd377e 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -776,7 +776,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, err = watchdog_ping(wdd); if (err < 0) break; - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: /* timeout == 0 means that we don't know the timeout */ if (wdd->timeout == 0) { diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index f9054cb0f8e2..a9e40b5c633e 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -389,7 +389,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_heartbeat(new_heartbeat)) return -EINVAL; wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: diff --git a/drivers/watchdog/wdt285.c b/drivers/watchdog/wdt285.c index e60993d0767e..110249e5f642 100644 --- a/drivers/watchdog/wdt285.c +++ b/drivers/watchdog/wdt285.c @@ -168,7 +168,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, soft_margin = new_margin; reload = soft_margin * (mem_fclk_21285 / 256); watchdog_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(soft_margin, int_arg); break; diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c index 066a4fb4d75b..c9b8e863f70f 100644 --- a/drivers/watchdog/wdt977.c +++ b/drivers/watchdog/wdt977.c @@ -398,7 +398,7 @@ static long wdt977_ioctl(struct file *file, unsigned int cmd, return -EINVAL; wdt977_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c index e528024faa41..c3254ba5ace6 100644 --- a/drivers/watchdog/wdt_pci.c +++ b/drivers/watchdog/wdt_pci.c @@ -426,7 +426,7 @@ static long wdtpci_ioctl(struct file *file, unsigned int cmd, if (wdtpci_set_heartbeat(new_heartbeat)) return -EINVAL; wdtpci_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: -- cgit From 55a1b87e07fdac9194d8b0c3ded3c4d01e9234ad Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 17 Jul 2020 11:40:59 -0500 Subject: watchdog: scx200_wdt: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/v5.7-rc7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717164059.GA26947@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/scx200_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c index c94098acb78f..7b5e18323f3f 100644 --- a/drivers/watchdog/scx200_wdt.c +++ b/drivers/watchdog/scx200_wdt.c @@ -186,7 +186,7 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd, margin = new_margin; scx200_wdt_update_margin(); scx200_wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; -- cgit From ee86a03a5dc64899edb39fe1ce21429fed98d107 Mon Sep 17 00:00:00 2001 From: Timothy Myers Date: Mon, 6 Jul 2020 16:33:31 +0000 Subject: watchdog: booke_wdt: Add common nowayout parameter driver Add the common "nowayout" parameter to booke_wdt to make this behavior selectable at runtime and to make the implementation more consistent with many other watchdog drivers. Signed-off-by: Timothy Myers Reviewed-by: Guenter Roeck [groeck: Dropped version data, cleaned up subject line] Link: https://lore.kernel.org/r/CH2PR19MB359059AA5C8917D8D24633FF9D690@CH2PR19MB3590.namprd19.prod.outlook.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/booke_wdt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index 9d09bbfdef20..7817fb976f9c 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -39,6 +39,11 @@ static bool booke_wdt_enabled; module_param(booke_wdt_enabled, bool, 0); static int booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT; module_param(booke_wdt_period, int, 0); +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); #ifdef CONFIG_PPC_FSL_BOOK3E @@ -215,7 +220,6 @@ static void __exit booke_wdt_exit(void) static int __init booke_wdt_init(void) { int ret = 0; - bool nowayout = WATCHDOG_NOWAYOUT; pr_info("powerpc book-e watchdog driver loaded\n"); booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value; -- cgit From cb36e29bb0e4b0c33c3d5866a0a4aebace4c99b7 Mon Sep 17 00:00:00 2001 From: Krzysztof Sobota Date: Fri, 17 Jul 2020 12:31:09 +0200 Subject: watchdog: initialize device before misc_register When watchdog device is being registered, it calls misc_register that makes watchdog available for systemd to open. This is a data race scenario, because when device is open it may still have device struct not initialized - this in turn causes a crash. This patch moves device initialization before misc_register call and it solves the problem printed below. ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1 at lib/kobject.c:612 kobject_get+0x50/0x54 kobject: '(null)' ((ptrval)): is not initialized, yet kobject_get() is being called. Modules linked in: k2_reset_status(O) davinci_wdt(+) sfn_platform_hwbcn(O) fsmddg_sfn(O) clk_misc_mmap(O) clk_sw_bcn(O) fsp_reset(O) cma_mod(O) slave_sup_notif(O) fpga_master(O) latency(O+) evnotify(O) enable_arm_pmu(O) xge(O) rio_mport_cdev br_netfilter bridge stp llc nvrd_checksum(O) ipv6 CPU: 3 PID: 1 Comm: systemd Tainted: G O 4.19.113-g2579778-fsm4_k2 #1 Hardware name: Keystone [] (unwind_backtrace) from [] (show_stack+0x18/0x1c) [] (show_stack) from [] (dump_stack+0xb4/0xe8) [] (dump_stack) from [] (__warn+0xfc/0x114) [] (__warn) from [] (warn_slowpath_fmt+0x50/0x74) [] (warn_slowpath_fmt) from [] (kobject_get+0x50/0x54) [] (kobject_get) from [] (get_device+0x1c/0x24) [] (get_device) from [] (watchdog_open+0x90/0xf0) [] (watchdog_open) from [] (misc_open+0x130/0x17c) [] (misc_open) from [] (chrdev_open+0xec/0x1a8) [] (chrdev_open) from [] (do_dentry_open+0x204/0x3cc) [] (do_dentry_open) from [] (path_openat+0x330/0x1148) [] (path_openat) from [] (do_filp_open+0x78/0xec) [] (do_filp_open) from [] (do_sys_open+0x130/0x1f4) [] (do_sys_open) from [] (ret_fast_syscall+0x0/0x28) Exception stack(0xd2ceffa8 to 0xd2cefff0) ffa0: b6f69968 00000000 ffffff9c b6ebd210 000a0001 00000000 ffc0: b6f69968 00000000 00000000 00000142 fffffffd ffffffff 00b65530 bed7bb78 ffe0: 00000142 bed7ba70 b6cc2503 b6cc41d6 ---[ end trace 7b16eb105513974f ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1 at lib/refcount.c:153 kobject_get+0x24/0x54 refcount_t: increment on 0; use-after-free. Modules linked in: k2_reset_status(O) davinci_wdt(+) sfn_platform_hwbcn(O) fsmddg_sfn(O) clk_misc_mmap(O) clk_sw_bcn(O) fsp_reset(O) cma_mod(O) slave_sup_notif(O) fpga_master(O) latency(O+) evnotify(O) enable_arm_pmu(O) xge(O) rio_mport_cdev br_netfilter bridge stp llc nvrd_checksum(O) ipv6 CPU: 3 PID: 1 Comm: systemd Tainted: G W O 4.19.113-g2579778-fsm4_k2 #1 Hardware name: Keystone [] (unwind_backtrace) from [] (show_stack+0x18/0x1c) [] (show_stack) from [] (dump_stack+0xb4/0xe8) [] (dump_stack) from [] (__warn+0xfc/0x114) [] (__warn) from [] (warn_slowpath_fmt+0x50/0x74) [] (warn_slowpath_fmt) from [] (kobject_get+0x24/0x54) [] (kobject_get) from [] (get_device+0x1c/0x24) [] (get_device) from [] (watchdog_open+0x90/0xf0) [] (watchdog_open) from [] (misc_open+0x130/0x17c) [] (misc_open) from [] (chrdev_open+0xec/0x1a8) [] (chrdev_open) from [] (do_dentry_open+0x204/0x3cc) [] (do_dentry_open) from [] (path_openat+0x330/0x1148) [] (path_openat) from [] (do_filp_open+0x78/0xec) [] (do_filp_open) from [] (do_sys_open+0x130/0x1f4) [] (do_sys_open) from [] (ret_fast_syscall+0x0/0x28) Exception stack(0xd2ceffa8 to 0xd2cefff0) ffa0: b6f69968 00000000 ffffff9c b6ebd210 000a0001 00000000 ffc0: b6f69968 00000000 00000000 00000142 fffffffd ffffffff 00b65530 bed7bb78 ffe0: 00000142 bed7ba70 b6cc2503 b6cc41d6 ---[ end trace 7b16eb1055139750 ]--- Fixes: 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev") Reviewed-by: Guenter Roeck Reviewed-by: Alexander Sverdlin Signed-off-by: Krzysztof Sobota Link: https://lore.kernel.org/r/20200717103109.14660-1-krzysztof.sobota@nokia.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_dev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index b277eebd377e..b7d750b24e49 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -994,6 +994,15 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) if (IS_ERR_OR_NULL(watchdog_kworker)) return -ENODEV; + device_initialize(&wd_data->dev); + wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); + wd_data->dev.class = &watchdog_class; + wd_data->dev.parent = wdd->parent; + wd_data->dev.groups = wdd->groups; + wd_data->dev.release = watchdog_core_data_release; + dev_set_drvdata(&wd_data->dev, wdd); + dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); + kthread_init_work(&wd_data->work, watchdog_ping_work); hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); wd_data->timer.function = watchdog_timer_expired; @@ -1014,15 +1023,6 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) } } - device_initialize(&wd_data->dev); - wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); - wd_data->dev.class = &watchdog_class; - wd_data->dev.parent = wdd->parent; - wd_data->dev.groups = wdd->groups; - wd_data->dev.release = watchdog_core_data_release; - dev_set_drvdata(&wd_data->dev, wdd); - dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); - /* Fill in the data structures */ cdev_init(&wd_data->cdev, &watchdog_fops); -- cgit From d821ab28dfd2fa85a9aed133e9bfd368c156cc04 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Jul 2020 12:09:06 +0100 Subject: dt-bindings: watchdog: renesas,wdt: Document r8a774e1 support RZ/G2H (a.k.a. R8A774E1) watchdog implementation is compatible with R-Car Gen3, therefore add the relevant documentation. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Acked-by: Rob Herring Link: https://lore.kernel.org/r/1594811350-14066-17-git-send-email-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml index 572f4c912fef..6933005b52bd 100644 --- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml @@ -41,6 +41,7 @@ properties: - renesas,r8a774a1-wdt # RZ/G2M - renesas,r8a774b1-wdt # RZ/G2N - renesas,r8a774c0-wdt # RZ/G2E + - renesas,r8a774e1-wdt # RZ/G2H - renesas,r8a7795-wdt # R-Car H3 - renesas,r8a7796-wdt # R-Car M3-W - renesas,r8a77961-wdt # R-Car M3-W+ -- cgit From 2ab77a34d070b1611a52b50c7cfae074153d475b Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 22:58:21 +0200 Subject: watchdog: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Reviewed-by: Guenter Roeck Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200713205821.38223-1-grandmaster@al2klimov.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/davinci-wdt.txt | 4 ++-- drivers/watchdog/Kconfig | 2 +- drivers/watchdog/dw_wdt.c | 2 +- drivers/watchdog/nv_tco.c | 2 +- drivers/watchdog/nv_tco.h | 2 +- drivers/watchdog/sp5100_tco.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt index e60b9a13bdcb..aa10b8ec36e2 100644 --- a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt @@ -11,8 +11,8 @@ Optional properties: See clock-bindings.txt Documentation: -Davinci DM646x - http://www.ti.com/lit/ug/spruer5b/spruer5b.pdf -Keystone - http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf +Davinci DM646x - https://www.ti.com/lit/ug/spruer5b/spruer5b.pdf +Keystone - https://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf Examples: diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4f4687c46d38..ab7aad5a1e69 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1027,7 +1027,7 @@ config ADVANTECH_WDT If you are configuring a Linux kernel for the Advantech single-board computer, say `Y' here to support its built-in watchdog timer feature. More information can be found at - + config ALIM1535_WDT tristate "ALi M1535 PMU Watchdog Timer" diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 4fec2c6a3c01..32d0e1781e63 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2010-2011 Picochip Ltd., Jamie Iles - * http://www.picochip.com + * https://www.picochip.com * * This file implements a driver for the Synopsys DesignWare watchdog device * in the many subsystems. The watchdog has 16 different timeout periods diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index d15ad8583a59..f6902a337422 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -7,7 +7,7 @@ * Based off i8xx_tco.c: * (c) Copyright 2000 kernel concepts , All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * TCO timer driver for NV chipsets * based on softdog.c by Alan Cox diff --git a/drivers/watchdog/nv_tco.h b/drivers/watchdog/nv_tco.h index d325e528010f..c65f82588386 100644 --- a/drivers/watchdog/nv_tco.h +++ b/drivers/watchdog/nv_tco.h @@ -9,7 +9,7 @@ * * (c) Copyright 2000 kernel concepts , All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * Neither kernel concepts nor Nils Faerber admit liability nor provide * warranty for any of this software. This material is provided diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 93bd302ae7c5..85e9664318c9 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -7,7 +7,7 @@ * Based on i8xx_tco.c: * (c) Copyright 2000 kernel concepts , All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * See AMD Publication 43009 "AMD SB700/710/750 Register Reference Guide", * AMD Publication 45482 "AMD SB800-Series Southbridges Register -- cgit From f1889c9066722f56428978ce9431a6cacabecb57 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 13 Jul 2020 05:53:48 +0000 Subject: watchdog: pcwd_usb: remove needless check before usb_free_coherent() usb_free_coherent() is safe with NULL usb_pcwd->intr_buffer and this check is not required. Signed-off-by: Xu Wang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200713055348.21620-1-vulab@iscas.ac.cn Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/pcwd_usb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 79efca47cebb..41a928eb91ed 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -585,9 +585,8 @@ static struct notifier_block usb_pcwd_notifier = { static inline void usb_pcwd_delete(struct usb_pcwd_private *usb_pcwd) { usb_free_urb(usb_pcwd->intr_urb); - if (usb_pcwd->intr_buffer != NULL) - usb_free_coherent(usb_pcwd->udev, usb_pcwd->intr_size, - usb_pcwd->intr_buffer, usb_pcwd->intr_dma); + usb_free_coherent(usb_pcwd->udev, usb_pcwd->intr_size, + usb_pcwd->intr_buffer, usb_pcwd->intr_dma); kfree(usb_pcwd); } -- cgit From 36a8947c6b5423b2fb867a7af87ae40eb12f4123 Mon Sep 17 00:00:00 2001 From: Woody Lin Date: Wed, 8 Jul 2020 16:32:19 +0800 Subject: watchdog: softdog: Add options 'soft_reboot_cmd' and 'soft_active_on_boot' Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for customizing softdog configuration; config reboot command by assigning soft_reboot_cmd, and set soft_active_on_boot to start up softdog timer at module initialization stage. Signed-off-by: Woody Lin Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200708083218.3157213-1-woodylin@google.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/softdog.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c index 3e4885c1545e..7a1096265f18 100644 --- a/drivers/watchdog/softdog.c +++ b/drivers/watchdog/softdog.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define TIMER_MARGIN 60 /* Default is 60 seconds */ static unsigned int soft_margin = TIMER_MARGIN; /* in seconds */ @@ -49,11 +51,34 @@ module_param(soft_panic, int, 0); MODULE_PARM_DESC(soft_panic, "Softdog action, set to 1 to panic, 0 to reboot (default=0)"); +static char *soft_reboot_cmd; +module_param(soft_reboot_cmd, charp, 0000); +MODULE_PARM_DESC(soft_reboot_cmd, + "Set reboot command. Emergency reboot takes place if unset"); + +static bool soft_active_on_boot; +module_param(soft_active_on_boot, bool, 0000); +MODULE_PARM_DESC(soft_active_on_boot, + "Set to true to active Softdog on boot (default=false)"); + static struct hrtimer softdog_ticktock; static struct hrtimer softdog_preticktock; +static int reboot_kthread_fn(void *data) +{ + kernel_restart(soft_reboot_cmd); + return -EPERM; /* Should not reach here */ +} + +static void reboot_work_fn(struct work_struct *unused) +{ + kthread_run(reboot_kthread_fn, NULL, "softdog_reboot"); +} + static enum hrtimer_restart softdog_fire(struct hrtimer *timer) { + static bool soft_reboot_fired; + module_put(THIS_MODULE); if (soft_noboot) { pr_crit("Triggered - Reboot ignored\n"); @@ -62,6 +87,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer) panic("Software Watchdog Timer expired"); } else { pr_crit("Initiating system reboot\n"); + if (!soft_reboot_fired && soft_reboot_cmd != NULL) { + static DECLARE_WORK(reboot_work, reboot_work_fn); + /* + * The 'kernel_restart' is a 'might-sleep' operation. + * Also, executing it in system-wide workqueues blocks + * any driver from using the same workqueue in its + * shutdown callback function. Thus, we should execute + * the 'kernel_restart' in a standalone kernel thread. + * But since starting a kernel thread is also a + * 'might-sleep' operation, so the 'reboot_work' is + * required as a launcher of the kernel thread. + * + * After request the reboot, restart the timer to + * schedule an 'emergency_restart' reboot after + * 'TIMER_MARGIN' seconds. It's because if the softdog + * hangs, it might be because of scheduling issues. And + * if that is the case, both 'schedule_work' and + * 'kernel_restart' may possibly be malfunctional at the + * same time. + */ + soft_reboot_fired = true; + schedule_work(&reboot_work); + hrtimer_add_expires_ns(timer, + (u64)TIMER_MARGIN * NSEC_PER_SEC); + + return HRTIMER_RESTART; + } emergency_restart(); pr_crit("Reboot didn't ?????\n"); } @@ -145,12 +197,17 @@ static int __init softdog_init(void) softdog_preticktock.function = softdog_pretimeout; } + if (soft_active_on_boot) + softdog_ping(&softdog_dev); + ret = watchdog_register_device(&softdog_dev); if (ret) return ret; pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n", soft_noboot, softdog_dev.timeout, soft_panic, nowayout); + pr_info(" soft_reboot_cmd=%s soft_active_on_boot=%d\n", + soft_reboot_cmd ?: "", soft_active_on_boot); return 0; } -- cgit From fbbe35dfcf949f4d6cf987648e52a85fc0c2a23a Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 17 Jul 2020 16:29:55 +0300 Subject: watchdog: use __watchdog_ping in startup Current watchdog startup functionality does not respect the minimum hw heartbeat setup and the last watchdog ping timeframe when watchdog is already running and userspace process attaches to it. Fix this by using the __watchdog_ping from the startup also. For this code path, we can also let the __watchdog_ping handle the bookkeeping for the worker and last keepalive times. Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717132958.14304-2-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_dev.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index b7d750b24e49..0ad1c393c00e 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -275,15 +275,18 @@ static int watchdog_start(struct watchdog_device *wdd) set_bit(_WDOG_KEEPALIVE, &wd_data->status); started_at = ktime_get(); - if (watchdog_hw_running(wdd) && wdd->ops->ping) - err = wdd->ops->ping(wdd); - else + if (watchdog_hw_running(wdd) && wdd->ops->ping) { + err = __watchdog_ping(wdd); + if (err == 0) + set_bit(WDOG_ACTIVE, &wdd->status); + } else { err = wdd->ops->start(wdd); - if (err == 0) { - set_bit(WDOG_ACTIVE, &wdd->status); - wd_data->last_keepalive = started_at; - wd_data->last_hw_keepalive = started_at; - watchdog_update_worker(wdd); + if (err == 0) { + set_bit(WDOG_ACTIVE, &wdd->status); + wd_data->last_keepalive = started_at; + wd_data->last_hw_keepalive = started_at; + watchdog_update_worker(wdd); + } } return err; -- cgit From cef9572e9af373cefd1adc9e771e89670da5da3c Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 17 Jul 2020 16:29:56 +0300 Subject: watchdog: add support for adjusting last known HW keepalive time Certain watchdogs require the watchdog only to be pinged within a specific time window, pinging too early or too late cause the watchdog to fire. In cases where this sort of watchdog has been started before kernel comes up, we must adjust the watchdog keepalive window to match the actually running timer, so add a new driver API for this purpose. Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717132958.14304-3-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/watchdog/watchdog-kernel-api.rst | 12 +++++++++++ drivers/watchdog/watchdog_dev.c | 30 ++++++++++++++++++++++++++ include/linux/watchdog.h | 2 ++ 3 files changed, 44 insertions(+) diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst index 068a55ee0d4a..baf44e986b07 100644 --- a/Documentation/watchdog/watchdog-kernel-api.rst +++ b/Documentation/watchdog/watchdog-kernel-api.rst @@ -336,3 +336,15 @@ an action is taken by a preconfigured pretimeout governor preassigned to the watchdog device. If watchdog pretimeout governor framework is not enabled, watchdog_notify_pretimeout() prints a notification message to the kernel log buffer. + +To set the last known HW keepalive time for a watchdog, the following function +should be used:: + + int watchdog_set_last_hw_keepalive(struct watchdog_device *wdd, + unsigned int last_ping_ms) + +This function must be called immediately after watchdog registration. It +sets the last known hardware heartbeat to have happened last_ping_ms before +current time. Calling this is only needed if the watchdog is already running +when probe is called, and the watchdog can only be pinged after the +min_hw_heartbeat_ms time has passed from the last ping. diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 0ad1c393c00e..531e74994b61 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -1138,6 +1138,36 @@ void watchdog_dev_unregister(struct watchdog_device *wdd) watchdog_cdev_unregister(wdd); } +/* + * watchdog_set_last_hw_keepalive: set last HW keepalive time for watchdog + * @wdd: watchdog device + * @last_ping_ms: time since last HW heartbeat + * + * Adjusts the last known HW keepalive time for a watchdog timer. + * This is needed if the watchdog is already running when the probe + * function is called, and it can't be pinged immediately. This + * function must be called immediately after watchdog registration, + * and min_hw_heartbeat_ms must be set for this to be useful. + */ +int watchdog_set_last_hw_keepalive(struct watchdog_device *wdd, + unsigned int last_ping_ms) +{ + struct watchdog_core_data *wd_data; + ktime_t now; + + if (!wdd) + return -EINVAL; + + wd_data = wdd->wd_data; + + now = ktime_get(); + + wd_data->last_hw_keepalive = ktime_sub(now, ms_to_ktime(last_ping_ms)); + + return __watchdog_ping(wdd); +} +EXPORT_SYMBOL_GPL(watchdog_set_last_hw_keepalive); + /* * watchdog_dev_init: init dev part of watchdog core * diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 1464ce6ffa31..9b19e6bb68b5 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -210,6 +210,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd, extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); +int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int); + /* devres register variant */ int devm_watchdog_register_device(struct device *dev, struct watchdog_device *); -- cgit From 5527483f8f7c8edadb5ad4fe8fcb569f565369ea Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 17 Jul 2020 16:29:57 +0300 Subject: watchdog: rti-wdt: attach to running watchdog during probe If the RTI watchdog is running already during probe, the driver must configure itself to match the HW. Window size and timeout is probed from hardware, and the last keepalive ping is adjusted to match it also. Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717132958.14304-4-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 112 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index d456dd72d99a..7cbdc178ffe8 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -35,7 +35,11 @@ #define RTIWWDRX_NMI 0xa -#define RTIWWDSIZE_50P 0x50 +#define RTIWWDSIZE_50P 0x50 +#define RTIWWDSIZE_25P 0x500 +#define RTIWWDSIZE_12P5 0x5000 +#define RTIWWDSIZE_6P25 0x50000 +#define RTIWWDSIZE_3P125 0x500000 #define WDENABLE_KEY 0xa98559da @@ -48,7 +52,7 @@ #define DWDST BIT(1) -static int heartbeat; +static int heartbeat = DEFAULT_HEARTBEAT; /* * struct to hold data for each WDT device @@ -79,11 +83,9 @@ static int rti_wdt_start(struct watchdog_device *wdd) * be petted during the open window; not too early or not too late. * The HW configuration options only allow for the open window size * to be 50% or less than that; we obviouly want to configure the open - * window as large as possible so we select the 50% option. To avoid - * any glitches, we accommodate 5% safety margin also, so we setup - * the min_hw_hearbeat at 55% of the timeout period. + * window as large as possible so we select the 50% option. */ - wdd->min_hw_heartbeat_ms = 11 * wdd->timeout * 1000 / 20; + wdd->min_hw_heartbeat_ms = 500 * wdd->timeout; /* Generate NMI when wdt expires */ writel_relaxed(RTIWWDRX_NMI, wdt->base + RTIWWDRXCTRL); @@ -110,7 +112,48 @@ static int rti_wdt_ping(struct watchdog_device *wdd) return 0; } -static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) +static int rti_wdt_setup_hw_hb(struct watchdog_device *wdd, u32 wsize) +{ + /* + * RTI only supports a windowed mode, where the watchdog can only + * be petted during the open window; not too early or not too late. + * The HW configuration options only allow for the open window size + * to be 50% or less than that. + */ + switch (wsize) { + case RTIWWDSIZE_50P: + /* 50% open window => 50% min heartbeat */ + wdd->min_hw_heartbeat_ms = 500 * heartbeat; + break; + + case RTIWWDSIZE_25P: + /* 25% open window => 75% min heartbeat */ + wdd->min_hw_heartbeat_ms = 750 * heartbeat; + break; + + case RTIWWDSIZE_12P5: + /* 12.5% open window => 87.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 875 * heartbeat; + break; + + case RTIWWDSIZE_6P25: + /* 6.5% open window => 93.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 935 * heartbeat; + break; + + case RTIWWDSIZE_3P125: + /* 3.125% open window => 96.9% min heartbeat */ + wdd->min_hw_heartbeat_ms = 969 * heartbeat; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static unsigned int rti_wdt_get_timeleft_ms(struct watchdog_device *wdd) { u64 timer_counter; u32 val; @@ -123,11 +166,18 @@ static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) timer_counter = readl_relaxed(wdt->base + RTIDWDCNTR); + timer_counter *= 1000; + do_div(timer_counter, wdt->freq); return timer_counter; } +static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) +{ + return rti_wdt_get_timeleft_ms(wdd) / 1000; +} + static const struct watchdog_info rti_wdt_info = { .options = WDIOF_KEEPALIVEPING, .identity = "K3 RTI Watchdog", @@ -148,6 +198,7 @@ static int rti_wdt_probe(struct platform_device *pdev) struct watchdog_device *wdd; struct rti_wdt_device *wdt; struct clk *clk; + u32 last_ping = 0; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); if (!wdt) @@ -169,6 +220,14 @@ static int rti_wdt_probe(struct platform_device *pdev) return -EINVAL; } + /* + * If watchdog is running at 32k clock, it is not accurate. + * Adjust frequency down in this case so that we don't pet + * the watchdog too often. + */ + if (wdt->freq < 32768) + wdt->freq = wdt->freq * 9 / 10; + pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret) { @@ -185,11 +244,8 @@ static int rti_wdt_probe(struct platform_device *pdev) wdd->min_timeout = 1; wdd->max_hw_heartbeat_ms = (WDT_PRELOAD_MAX << WDT_PRELOAD_SHIFT) / wdt->freq * 1000; - wdd->timeout = DEFAULT_HEARTBEAT; wdd->parent = dev; - watchdog_init_timeout(wdd, heartbeat, dev); - watchdog_set_drvdata(wdd, wdt); watchdog_set_nowayout(wdd, 1); watchdog_set_restart_priority(wdd, 128); @@ -201,12 +257,48 @@ static int rti_wdt_probe(struct platform_device *pdev) goto err_iomap; } + if (readl(wdt->base + RTIDWDCTRL) == WDENABLE_KEY) { + u32 time_left_ms; + u64 heartbeat_ms; + u32 wsize; + + set_bit(WDOG_HW_RUNNING, &wdd->status); + time_left_ms = rti_wdt_get_timeleft_ms(wdd); + heartbeat_ms = readl(wdt->base + RTIDWDPRLD); + heartbeat_ms <<= WDT_PRELOAD_SHIFT; + heartbeat_ms *= 1000; + do_div(heartbeat_ms, wdt->freq); + if (heartbeat_ms != heartbeat * 1000) + dev_warn(dev, "watchdog already running, ignoring heartbeat config!\n"); + + heartbeat = heartbeat_ms; + heartbeat /= 1000; + + wsize = readl(wdt->base + RTIWWDSIZECTRL); + ret = rti_wdt_setup_hw_hb(wdd, wsize); + if (ret) { + dev_err(dev, "bad window size.\n"); + goto err_iomap; + } + + last_ping = heartbeat_ms - time_left_ms; + if (time_left_ms > heartbeat_ms) { + dev_warn(dev, "time_left > heartbeat? Assuming last ping just before now.\n"); + last_ping = 0; + } + } + + watchdog_init_timeout(wdd, heartbeat, dev); + ret = watchdog_register_device(wdd); if (ret) { dev_err(dev, "cannot register watchdog device\n"); goto err_iomap; } + if (last_ping) + watchdog_set_last_hw_keepalive(wdd, last_ping); + return 0; err_iomap: -- cgit From d5b29c2c5ba2bd5bbdb5b744659984185d17d079 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 17 Jul 2020 16:29:58 +0300 Subject: watchdog: rti-wdt: balance pm runtime enable calls PM runtime should be disabled in the fail path of probe and when the driver is removed. Fixes: 2d63908bdbfb ("watchdog: Add K3 RTI watchdog support") Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717132958.14304-5-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/rti_wdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 7cbdc178ffe8..705e8f7523e8 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -303,6 +303,7 @@ static int rti_wdt_probe(struct platform_device *pdev) err_iomap: pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } @@ -313,6 +314,7 @@ static int rti_wdt_remove(struct platform_device *pdev) watchdog_unregister_device(&wdt->wdd); pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); return 0; } -- cgit From 045838bc7f47f142df82a5a95a1ced73d80705e1 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 08:40:57 +0900 Subject: openrisc: io: Fixup defines and move include to the end This didn't seem to cause any issues, but while working on fixing up sparse annotations for OpenRISC I noticed this. This patch moves the include of asm-generic/io.h to the end of the file. Also, we add defines of ioremap and iounmap, that way we don't get duplicate definitions from asm-generic/io.h. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/io.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index db02fb2077d9..7d6b4a77b379 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -14,6 +14,8 @@ #ifndef __ASM_OPENRISC_IO_H #define __ASM_OPENRISC_IO_H +#include + /* * PCI: can we really do 0 here if we have no port IO? */ @@ -25,9 +27,12 @@ #define PIO_OFFSET 0 #define PIO_MASK 0 -#include - +#define ioremap ioremap void __iomem *ioremap(phys_addr_t offset, unsigned long size); + +#define iounmap iounmap extern void iounmap(void *addr); +#include + #endif -- cgit From 8426fe70cfa4e4545c6a3709918638a7f613d528 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:19 +0800 Subject: i2c: mediatek: Add apdma sync in i2c driver With the apdma remove hand-shake signal, it need to keep i2c and apdma in sync manually. Reviewed-by: Yingjoe Chen Reviewed-by: Matthias Brugger Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index deef69e56906..e6b984aebe74 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -48,6 +48,9 @@ #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 +#define I2C_DMA_ASYNC_MODE 0x0004 +#define I2C_DMA_SKIP_CONFIG 0x0010 +#define I2C_DMA_DIR_CHANGE 0x0200 #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 @@ -205,6 +208,7 @@ struct mtk_i2c_compatible { unsigned char timing_adjust: 1; unsigned char dma_sync: 1; unsigned char ltiming_adjust: 1; + unsigned char apdma_sync: 1; }; struct mtk_i2c_ac_timing { @@ -311,6 +315,7 @@ static const struct mtk_i2c_compatible mt2712_compat = { .timing_adjust = 1, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt6577_compat = { @@ -324,6 +329,7 @@ static const struct mtk_i2c_compatible mt6577_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt6589_compat = { @@ -337,6 +343,7 @@ static const struct mtk_i2c_compatible mt6589_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt7622_compat = { @@ -350,6 +357,7 @@ static const struct mtk_i2c_compatible mt7622_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt8173_compat = { @@ -362,6 +370,7 @@ static const struct mtk_i2c_compatible mt8173_compat = { .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, }; static const struct mtk_i2c_compatible mt8183_compat = { @@ -375,6 +384,7 @@ static const struct mtk_i2c_compatible mt8183_compat = { .timing_adjust = 1, .dma_sync = 1, .ltiming_adjust = 1, + .apdma_sync = 0, }; static const struct of_device_id mtk_i2c_of_match[] = { @@ -798,6 +808,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, u16 start_reg; u16 control_reg; u16 restart_flag = 0; + u16 dma_sync = 0; u32 reg_4g_mode; u8 *dma_rd_buf = NULL; u8 *dma_wr_buf = NULL; @@ -851,10 +862,16 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, mtk_i2c_writew(i2c, num, OFFSET_TRANSAC_LEN); } + if (i2c->dev_comp->apdma_sync) { + dma_sync = I2C_DMA_SKIP_CONFIG | I2C_DMA_ASYNC_MODE; + if (i2c->op == I2C_MASTER_WRRD) + dma_sync |= I2C_DMA_DIR_CHANGE; + } + /* Prepare buffer data to start transfer */ if (i2c->op == I2C_MASTER_RD) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_RX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_RX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_rd_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_rd_buf) @@ -877,7 +894,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_RX_LEN); } else if (i2c->op == I2C_MASTER_WR) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_TX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_TX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) @@ -900,7 +917,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_TX_LEN); } else { writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CLR_FLAG | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) -- cgit From 908d984336daeb0cad65ba015ab0ade132f2e315 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:20 +0800 Subject: i2c: mediatek: Add access to more than 8GB dram in i2c driver Newer MTK chip support more than 8GB of dram. Replace support_33bits with more general dma_max_support and remove mtk_i2c_set_4g_mode. Reviewed-by: Yingjoe Chen Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index e6b984aebe74..463860e1d54b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -55,7 +55,6 @@ #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 #define I2C_DMA_HARD_RST 0x0002 -#define I2C_DMA_4G_MODE 0x0001 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -204,11 +203,11 @@ struct mtk_i2c_compatible { unsigned char dcm: 1; unsigned char auto_restart: 1; unsigned char aux_len_reg: 1; - unsigned char support_33bits: 1; unsigned char timing_adjust: 1; unsigned char dma_sync: 1; unsigned char ltiming_adjust: 1; unsigned char apdma_sync: 1; + unsigned char max_dma_support; }; struct mtk_i2c_ac_timing { @@ -311,11 +310,11 @@ static const struct mtk_i2c_compatible mt2712_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt6577_compat = { @@ -325,11 +324,11 @@ static const struct mtk_i2c_compatible mt6577_compat = { .dcm = 1, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt6589_compat = { @@ -339,11 +338,11 @@ static const struct mtk_i2c_compatible mt6589_compat = { .dcm = 0, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt7622_compat = { @@ -353,11 +352,11 @@ static const struct mtk_i2c_compatible mt7622_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt8173_compat = { @@ -366,11 +365,11 @@ static const struct mtk_i2c_compatible mt8173_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt8183_compat = { @@ -380,11 +379,11 @@ static const struct mtk_i2c_compatible mt8183_compat = { .dcm = 0, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 1, .ltiming_adjust = 1, .apdma_sync = 0, + .max_dma_support = 33, }; static const struct of_device_id mtk_i2c_of_match[] = { @@ -796,11 +795,6 @@ static int mtk_i2c_set_speed(struct mtk_i2c *i2c, unsigned int parent_clk) return 0; } -static inline u32 mtk_i2c_set_4g_mode(dma_addr_t addr) -{ - return (addr & BIT_ULL(32)) ? I2C_DMA_4G_MODE : I2C_DMA_CLR_FLAG; -} - static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, int num, int left_num) { @@ -885,8 +879,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -908,8 +902,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); } @@ -954,11 +948,11 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -1232,8 +1226,9 @@ static int mtk_i2c_probe(struct platform_device *pdev) return -EINVAL; } - if (i2c->dev_comp->support_33bits) { - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(33)); + if (i2c->dev_comp->max_dma_support > 32) { + ret = dma_set_mask(&pdev->dev, + DMA_BIT_MASK(i2c->dev_comp->max_dma_support)); if (ret) { dev_err(&pdev->dev, "dma_set_mask return error.\n"); return ret; -- cgit From 311df9d580306cbfd98b6f7c860175a2471e2130 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:21 +0800 Subject: dt-bindings: i2c: update bindings for MT8192 SoC Add a DT binding documentation for the MT8192 soc. Acked-by: Rob Herring Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt index 88b71c1b32c9..7f0194fdd0cc 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt @@ -14,6 +14,7 @@ Required properties: "mediatek,mt7629-i2c", "mediatek,mt2712-i2c": for MediaTek MT7629 "mediatek,mt8173-i2c": for MediaTek MT8173 "mediatek,mt8183-i2c": for MediaTek MT8183 + "mediatek,mt8192-i2c": for MediaTek MT8192 "mediatek,mt8516-i2c", "mediatek,mt2712-i2c": for MediaTek MT8516 - reg: physical base address of the controller and dma base, length of memory mapped region. -- cgit From 789e67ba5454c0fa78471a6cc5ae143794fb8033 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Wed, 5 Aug 2020 18:52:22 +0800 Subject: i2c: mediatek: Add i2c compatible for MediaTek MT8192 Add i2c compatible for MT8192. Compare to MT8183 i2c controller, MT8192 support more then 8GB DMA mode. Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 463860e1d54b..e889f74703e4 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -386,6 +386,20 @@ static const struct mtk_i2c_compatible mt8183_compat = { .max_dma_support = 33, }; +static const struct mtk_i2c_compatible mt8192_compat = { + .quirks = &mt8183_i2c_quirks, + .regs = mt_i2c_regs_v2, + .pmic_i2c = 0, + .dcm = 0, + .auto_restart = 1, + .aux_len_reg = 1, + .timing_adjust = 1, + .dma_sync = 1, + .ltiming_adjust = 1, + .apdma_sync = 1, + .max_dma_support = 36, +}; + static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt2712-i2c", .data = &mt2712_compat }, { .compatible = "mediatek,mt6577-i2c", .data = &mt6577_compat }, @@ -393,6 +407,7 @@ static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat }, { .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat }, { .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat }, + { .compatible = "mediatek,mt8192-i2c", .data = &mt8192_compat }, {} }; MODULE_DEVICE_TABLE(of, mtk_i2c_of_match); -- cgit From e8efa9b88e3c20a20ff34bb33e2e94bf6896016a Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 4 Aug 2020 17:27:18 -0700 Subject: md: get sysfs entry after redundancy attr group create "sync_completed" and "degraded" belongs to redundancy attr group, it was not exist yet when md device was created. Reported-by: kernel test robot Fixes: e1a86dbbbd6a ("md: fix deadlock causing by sysfs_notify") Signed-off-by: Junxiao Bi Signed-off-by: Song Liu --- drivers/md/md.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9c69084cae73..6b511c9007d3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -876,7 +876,13 @@ void mddev_unlock(struct mddev *mddev) sysfs_remove_group(&mddev->kobj, &md_redundancy_group); if (mddev->sysfs_action) sysfs_put(mddev->sysfs_action); + if (mddev->sysfs_completed) + sysfs_put(mddev->sysfs_completed); + if (mddev->sysfs_degraded) + sysfs_put(mddev->sysfs_degraded); mddev->sysfs_action = NULL; + mddev->sysfs_completed = NULL; + mddev->sysfs_degraded = NULL; } } mddev->sysfs_active = 0; @@ -4094,6 +4100,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) pr_warn("md: cannot register extra attributes for %s\n", mdname(mddev)); mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); + mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); + mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); } if (oldpers->sync_request != NULL && pers->sync_request == NULL) { @@ -5609,14 +5617,9 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_state) sysfs_put(mddev->sysfs_state); - if (mddev->sysfs_completed) - sysfs_put(mddev->sysfs_completed); - if (mddev->sysfs_degraded) - sysfs_put(mddev->sysfs_degraded); if (mddev->sysfs_level) sysfs_put(mddev->sysfs_level); - if (mddev->gendisk) del_gendisk(mddev->gendisk); if (mddev->queue) @@ -5783,8 +5786,6 @@ static int md_alloc(dev_t dev, char *name) if (!error && mddev->kobj.sd) { kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); - mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); - mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); } mddev_put(mddev); @@ -6064,6 +6065,8 @@ int md_run(struct mddev *mddev) pr_warn("md: cannot register extra attributes for %s\n", mdname(mddev)); mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); + mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); + mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; -- cgit From b35fd7422c2f8e04496f5a770bd4e1a205414b3f Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 6 Aug 2020 01:25:03 +0800 Subject: block: check queue's limits.discard_granularity in __blkdev_issue_discard() If create a loop device with a backing NVMe SSD, current loop device driver doesn't correctly set its queue's limits.discard_granularity and leaves it as 0. If a discard request at LBA 0 on this loop device, in __blkdev_issue_discard() the calculated req_sects will be 0, and a zero length discard request will trigger a BUG() panic in generic block layer code at block/blk-mq.c:563. [ 955.565006][ C39] ------------[ cut here ]------------ [ 955.559660][ C39] invalid opcode: 0000 [#1] SMP NOPTI [ 955.622171][ C39] CPU: 39 PID: 248 Comm: ksoftirqd/39 Tainted: G E 5.8.0-default+ #40 [ 955.622171][ C39] Hardware name: Lenovo ThinkSystem SR650 -[7X05CTO1WW]-/-[7X05CTO1WW]-, BIOS -[IVE160M-2.70]- 07/17/2020 [ 955.622175][ C39] RIP: 0010:blk_mq_end_request+0x107/0x110 [ 955.622177][ C39] Code: 48 8b 03 e9 59 ff ff ff 48 89 df 5b 5d 41 5c e9 9f ed ff ff 48 8b 35 98 3c f4 00 48 83 c7 10 48 83 c6 19 e8 cb 56 c9 ff eb cb <0f> 0b 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 56 41 54 [ 955.622179][ C39] RSP: 0018:ffffb1288701fe28 EFLAGS: 00010202 [ 955.749277][ C39] RAX: 0000000000000001 RBX: ffff956fffba5080 RCX: 0000000000004003 [ 955.749278][ C39] RDX: 0000000000000003 RSI: 0000000000000000 RDI: 0000000000000000 [ 955.749279][ C39] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 955.749279][ C39] R10: ffffb1288701fd28 R11: 0000000000000001 R12: ffffffffa8e05160 [ 955.749280][ C39] R13: 0000000000000004 R14: 0000000000000004 R15: ffffffffa7ad3a1e [ 955.749281][ C39] FS: 0000000000000000(0000) GS:ffff95bfbda00000(0000) knlGS:0000000000000000 [ 955.749282][ C39] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 955.749282][ C39] CR2: 00007f6f0ef766a8 CR3: 0000005a37012002 CR4: 00000000007606e0 [ 955.749283][ C39] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 955.749284][ C39] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 955.749284][ C39] PKRU: 55555554 [ 955.749285][ C39] Call Trace: [ 955.749290][ C39] blk_done_softirq+0x99/0xc0 [ 957.550669][ C39] __do_softirq+0xd3/0x45f [ 957.550677][ C39] ? smpboot_thread_fn+0x2f/0x1e0 [ 957.550679][ C39] ? smpboot_thread_fn+0x74/0x1e0 [ 957.550680][ C39] ? smpboot_thread_fn+0x14e/0x1e0 [ 957.550684][ C39] run_ksoftirqd+0x30/0x60 [ 957.550687][ C39] smpboot_thread_fn+0x149/0x1e0 [ 957.886225][ C39] ? sort_range+0x20/0x20 [ 957.886226][ C39] kthread+0x137/0x160 [ 957.886228][ C39] ? kthread_park+0x90/0x90 [ 957.886231][ C39] ret_from_fork+0x22/0x30 [ 959.117120][ C39] ---[ end trace 3dacdac97e2ed164 ]--- This is the procedure to reproduce the panic, # modprobe scsi_debug delay=0 dev_size_mb=2048 max_queue=1 # losetup -f /dev/nvme0n1 --direct-io=on # blkdiscard /dev/loop0 -o 0 -l 0x200 This patch fixes the issue by checking q->limits.discard_granularity in __blkdev_issue_discard() before composing the discard bio. If the value is 0, then prints a warning oops information and returns -EOPNOTSUPP to the caller to indicate that this buggy device driver doesn't support discard request. Fixes: 9b15d109a6b2 ("block: improve discard bio alignment in __blkdev_issue_discard()") Fixes: c52abf563049 ("loop: Better discard support for block devices") Reported-and-suggested-by: Ming Lei Signed-off-by: Coly Li Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Jack Wang Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: Enzo Matsumiya Cc: Evan Green Cc: Jens Axboe Cc: Martin K. Petersen Cc: Xiao Ni Signed-off-by: Jens Axboe --- block/blk-lib.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/blk-lib.c b/block/blk-lib.c index 019e09bb9c0e..0d1811e57ac7 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -47,6 +47,15 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, op = REQ_OP_DISCARD; } + /* In case the discard granularity isn't set by buggy device driver */ + if (WARN_ON_ONCE(!q->limits.discard_granularity)) { + char dev_name[BDEVNAME_SIZE]; + + bdevname(bdev, dev_name); + pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name); + return -EOPNOTSUPP; + } + bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; if ((sector | nr_sects) & bs_mask) return -EINVAL; -- cgit From e8abe1de43dac658dacbd04a4543e0c988a8d386 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Aug 2020 13:16:45 +0300 Subject: md-cluster: Fix potential error pointer dereference in resize_bitmaps() The error handling calls md_bitmap_free(bitmap) which checks for NULL but will Oops if we pass an error pointer. Let's set "bitmap" to NULL on this error path. Fixes: afd756286083 ("md-cluster/raid10: resize all the bitmaps before start reshape") Signed-off-by: Dan Carpenter Reviewed-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 73fd50e77975..d50737ec4039 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1139,6 +1139,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz bitmap = get_bitmap_from_slot(mddev, i); if (IS_ERR(bitmap)) { pr_err("can't get bitmap from slot %d\n", i); + bitmap = NULL; goto out; } counts = &bitmap->counts; -- cgit From a1bd06853ee478d37fae9435c5521e301de94c67 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Wed, 5 Aug 2020 16:31:38 -0400 Subject: sched: Fix use of count for nr_running tracepoint The count field is meant to tell if an update to nr_running is an add or a subtract. Make it do so by adding the missing minus sign. Fixes: 9d246053a691 ("sched: Add a tracepoint to track rq->nr_running") Signed-off-by: Phil Auld Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200805203138.1411-1-pauld@redhat.com --- kernel/sched/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3fd283892761..28709f6b0975 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1999,7 +1999,7 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; if (trace_sched_update_nr_running_tp_enabled()) { - call_trace_sched_update_nr_running(rq, count); + call_trace_sched_update_nr_running(rq, -count); } /* Check if we still need preemption */ -- cgit From 4c5a116ada953b86125ab7c70a57c57463a55a55 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Aug 2020 22:37:48 +0200 Subject: vdso/treewide: Add vdso_data pointer argument to __arch_get_hw_counter() MIPS already uses and S390 will need the vdso data pointer in __arch_get_hw_counter(). This works nicely as long as the architecture does not support time namespaces in the VDSO. With time namespaces enabled the regular accessor to the vdso data pointer __arch_get_vdso_data() will return the namespace specific VDSO data page for tasks which are part of a non-root time namespace. This would cause the architectures which need the vdso data pointer in __arch_get_hw_counter() to access the wrong vdso data page. Add a vdso_data pointer argument to __arch_get_hw_counter() and hand it in from the call sites in the core code. For architectures which do not need the data pointer in their counter accessor function the compiler will just optimize it out. Fix up all existing architecture implementations and make MIPS utilize the pointer instead of invoking the accessor function. No functional change and no change in the resulting object code (except MIPS). Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/draft-87wo2ekuzn.fsf@nanos.tec.linutronix.de --- arch/arm/include/asm/vdso/gettimeofday.h | 3 ++- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 3 ++- arch/arm64/include/asm/vdso/gettimeofday.h | 3 ++- arch/mips/include/asm/vdso/gettimeofday.h | 5 +++-- arch/riscv/include/asm/vdso/gettimeofday.h | 3 ++- arch/x86/include/asm/vdso/gettimeofday.h | 3 ++- lib/vdso/gettimeofday.c | 4 ++-- 7 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 1b207cf07697..2134cbd5469f 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -113,7 +113,8 @@ static inline bool arm_vdso_hres_capable(void) } #define __arch_vdso_hres_capable arm_vdso_hres_capable -static __always_inline u64 __arch_get_hw_counter(int clock_mode) +static __always_inline u64 __arch_get_hw_counter(int clock_mode, + const struct vdso_data *vd) { #ifdef CONFIG_ARM_ARCH_TIMER u64 cycle_now; diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 75cbae60455b..7508b0ac1d21 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -103,7 +103,8 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 9c29ad3049f8..631ab1281633 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -64,7 +64,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index c63ddcaea54c..2203e2d0ae2a 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -167,7 +167,8 @@ static __always_inline u64 read_gic_count(const struct vdso_data *data) #endif -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { #ifdef CONFIG_CSRC_R4K if (clock_mode == VDSO_CLOCKMODE_R4K) @@ -175,7 +176,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) #endif #ifdef CONFIG_CLKSRC_MIPS_GIC if (clock_mode == VDSO_CLOCKMODE_GIC) - return read_gic_count(get_vdso_data()); + return read_gic_count(vd); #endif /* * Core checks mode already. So this raced against a concurrent diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index 3099362d9f26..f839f16e0d2a 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -60,7 +60,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { /* * The purpose of csr_read(CSR_TIME) is to trap the system into diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index fb81fea99093..df01d7349d79 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -241,7 +241,8 @@ static u64 vread_hvclock(void) } #endif -static inline u64 __arch_get_hw_counter(s32 clock_mode) +static inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) return (u64)rdtsc_ordered(); diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index bcc9a98a0524..2919f1698140 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -68,7 +68,7 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, if (unlikely(!vdso_clocksource_ok(vd))) return -1; - cycles = __arch_get_hw_counter(vd->clock_mode); + cycles = __arch_get_hw_counter(vd->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; ns = vdso_ts->nsec; @@ -138,7 +138,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, if (unlikely(!vdso_clocksource_ok(vd))) return -1; - cycles = __arch_get_hw_counter(vd->clock_mode); + cycles = __arch_get_hw_counter(vd->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; ns = vdso_ts->nsec; -- cgit From 19d0070a2792181f79df01277fe00b83b9f7eda7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Aug 2020 17:01:23 +0200 Subject: timekeeping/vsyscall: Provide vdso_update_begin/end() Architectures can have the requirement to add additional architecture specific data to the VDSO data page which needs to be updated independent of the timekeeper updates. To protect these updates vs. concurrent readers and a conflicting update through timekeeping, provide helper functions to make such updates safe. vdso_update_begin() takes the timekeeper_lock to protect against a potential update from timekeeper code and increments the VDSO sequence count to signal data inconsistency to concurrent readers. vdso_update_end() makes the sequence count even again to signal data consistency and drops the timekeeper lock. [ Sven: Add interrupt disable handling to the functions ] Signed-off-by: Thomas Gleixner Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200804150124.41692-3-svens@linux.ibm.com --- include/vdso/vsyscall.h | 3 +++ kernel/time/timekeeping.c | 2 +- kernel/time/timekeeping_internal.h | 11 +++++++--- kernel/time/vsyscall.c | 41 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/include/vdso/vsyscall.h b/include/vdso/vsyscall.h index 2c6134e0c23d..b0fdc9c6bf43 100644 --- a/include/vdso/vsyscall.h +++ b/include/vdso/vsyscall.h @@ -6,6 +6,9 @@ #include +unsigned long vdso_update_begin(void); +void vdso_update_end(unsigned long flags); + #endif /* !__ASSEMBLY__ */ #endif /* __VDSO_VSYSCALL_H */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 63a632f9896c..4c7212f3c603 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -50,7 +50,7 @@ static struct { .seq = SEQCNT_ZERO(tk_core.seq), }; -static DEFINE_RAW_SPINLOCK(timekeeper_lock); +DEFINE_RAW_SPINLOCK(timekeeper_lock); static struct timekeeper shadow_timekeeper; /** diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index bcbb52db2256..4ca2787d1642 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TIMEKEEPING_INTERNAL_H #define _TIMEKEEPING_INTERNAL_H -/* - * timekeeping debug functions - */ + #include +#include #include +/* + * timekeeping debug functions + */ #ifdef CONFIG_DEBUG_FS extern void tk_debug_account_sleep_time(const struct timespec64 *t); #else @@ -31,4 +33,7 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask) } #endif +/* Semi public for serialization of non timekeeper VDSO updates. */ +extern raw_spinlock_t timekeeper_lock; + #endif /* _TIMEKEEPING_INTERNAL_H */ diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 54ce6eb2ca36..88e6b8ed6ca5 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -13,6 +13,8 @@ #include #include +#include "timekeeping_internal.h" + static inline void update_vdso_data(struct vdso_data *vdata, struct timekeeper *tk) { @@ -127,3 +129,42 @@ void update_vsyscall_tz(void) __arch_sync_vdso_data(vdata); } + +/** + * vdso_update_begin - Start of a VDSO update section + * + * Allows architecture code to safely update the architecture specific VDSO + * data. Disables interrupts, acquires timekeeper lock to serialize against + * concurrent updates from timekeeping and invalidates the VDSO data + * sequence counter to prevent concurrent readers from accessing + * inconsistent data. + * + * Returns: Saved interrupt flags which need to be handed in to + * vdso_update_end(). + */ +unsigned long vdso_update_begin(void) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + unsigned long flags; + + raw_spin_lock_irqsave(&timekeeper_lock, flags); + vdso_write_begin(vdata); + return flags; +} + +/** + * vdso_update_end - End of a VDSO update section + * @flags: Interrupt flags as returned from vdso_update_begin() + * + * Pairs with vdso_update_begin(). Marks vdso data consistent, invokes data + * synchronization if the architecture requires it, drops timekeeper lock + * and restores interrupt flags. + */ +void vdso_update_end(unsigned long flags) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + + vdso_write_end(vdata); + __arch_sync_vdso_data(vdata); + raw_spin_unlock_irqrestore(&timekeeper_lock, flags); +} -- cgit From d60d7de3e16d7cea998bad17d87366a359625894 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 4 Aug 2020 17:01:22 +0200 Subject: lib/vdso: Allow to add architecture-specific vdso data The initial assumption that all VDSO related data can be completely generic does not hold. S390 needs architecture specific storage to access the clock steering information. Add struct arch_vdso_data to the vdso data struct. For architectures which do not need extra data this defaults to an empty struct. Architectures which require it, enable CONFIG_ARCH_HAS_VDSO_DATA and provide their specific struct in asm/vdso/data.h. Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200804150124.41692-2-svens@linux.ibm.com --- arch/Kconfig | 3 +++ include/vdso/datapage.h | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index a1124481d910..b44dd6b9e2bd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -975,6 +975,9 @@ config HAVE_SPARSE_SYSCALL_NR entries at 4000, 5000 and 6000 locations. This option turns on syscall related optimizations for a given architecture. +config ARCH_HAS_VDSO_DATA + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index ee810cae4e1e..73eb622e7663 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,6 +19,12 @@ #include #include +#ifdef CONFIG_ARCH_HAS_VDSO_DATA +#include +#else +struct arch_vdso_data {}; +#endif + #define VDSO_BASES (CLOCK_TAI + 1) #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ @@ -64,6 +70,8 @@ struct vdso_timestamp { * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution * @__unused: unused + * @arch_data: architecture specific data (optional, defaults + * to an empty struct) * * vdso_data will be accessed by 64 bit and compat code at the same time * so we should be careful before modifying this structure. @@ -97,6 +105,8 @@ struct vdso_data { s32 tz_dsttime; u32 hrtimer_res; u32 __unused; + + struct arch_vdso_data arch_data; }; /* -- cgit From 45fd22da97c6125d8d0d35bd1791e7c0c4175279 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Wed, 5 Aug 2020 10:56:56 +0300 Subject: perf/core: Take over CAP_SYS_PTRACE creds to CAP_PERFMON capability Open access to per-process monitoring for CAP_PERFMON only privileged processes [1]. Extend ptrace_may_access() check in perf_events subsystem with perfmon_capable() to simplify user experience and make monitoring more secure by reducing attack surface. [1] https://lore.kernel.org/lkml/7776fa40-6c65-2aa6-1322-eb3a01201000@linux.intel.com/ Signed-off-by: Alexey Budankov Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/6e8392ff-4732-0012-2949-e1587709f0f6@linux.intel.com --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 78e69e10482a..41e0cefb429b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11689,7 +11689,7 @@ SYSCALL_DEFINE5(perf_event_open, goto err_task; /* - * Reuse ptrace permission checks for now. + * Preserve ptrace permission check for backwards compatibility. * * We must hold exec_update_mutex across this and any potential * perf_install_in_context() call for this new event to @@ -11697,7 +11697,7 @@ SYSCALL_DEFINE5(perf_event_open, * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) goto err_cred; } -- cgit From bd74048108c179cea0ff52979506164c80f29da7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Aug 2020 12:58:23 -0600 Subject: io_uring: set ctx sq/cq entry count earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we hit an earlier error path in io_uring_create(), then we will have accounted memory, but not set ctx->{sq,cq}_entries yet. Then when the ring is torn down in error, we use those values to unaccount the memory. Ensure we set the ctx entries before we're able to hit a potential error path. Cc: stable@vger.kernel.org Reported-by: Tomáš Chaloupka Tested-by: Tomáš Chaloupka Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8f96566603f3..0d857f7ca507 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8193,6 +8193,10 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, struct io_rings *rings; size_t size, sq_array_offset; + /* make sure these are sane, as we already accounted them */ + ctx->sq_entries = p->sq_entries; + ctx->cq_entries = p->cq_entries; + size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); if (size == SIZE_MAX) return -EOVERFLOW; @@ -8209,8 +8213,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->cq_ring_entries = p->cq_entries; ctx->sq_mask = rings->sq_ring_mask; ctx->cq_mask = rings->cq_ring_mask; - ctx->sq_entries = rings->sq_ring_entries; - ctx->cq_entries = rings->cq_ring_entries; size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); if (size == SIZE_MAX) { -- cgit From 735d77d4fd28bd47db9998a3744346302f8c1ee9 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 30 Jul 2020 11:13:57 +0200 Subject: rnbd: remove rnbd_dev_submit_io The function only has one caller, so let's open code it in process_rdma. Another bonus is we can avoid push/pop stack, since we need to pass 8 arguments to rnbd_dev_submit_io. Signed-off-by: Guoqing Jiang Acked-by: Danil Kipnis Acked-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv-dev.c | 36 +++--------------------------------- drivers/block/rnbd/rnbd-srv-dev.h | 19 +++++-------------- drivers/block/rnbd/rnbd-srv.c | 32 +++++++++++++++++++++++--------- 3 files changed, 31 insertions(+), 56 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c index 5eddfd29ab64..49c62b506c9b 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.c +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -45,7 +45,7 @@ void rnbd_dev_close(struct rnbd_dev *dev) kfree(dev); } -static void rnbd_dev_bi_end_io(struct bio *bio) +void rnbd_dev_bi_end_io(struct bio *bio) { struct rnbd_dev_blk_io *io = bio->bi_private; @@ -63,8 +63,8 @@ static void rnbd_dev_bi_end_io(struct bio *bio) * Map the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -static struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, - unsigned int len, gfp_t gfp_mask) +struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, + unsigned int len, gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -102,33 +102,3 @@ static struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, bio->bi_end_io = bio_put; return bio; } - -int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data, - size_t len, u32 bi_size, enum rnbd_io_flags flags, - short prio, void *priv) -{ - struct rnbd_dev_blk_io *io; - struct bio *bio; - - /* Generate bio with pages pointing to the rdma buffer */ - bio = rnbd_bio_map_kern(data, dev->ibd_bio_set, len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - io = container_of(bio, struct rnbd_dev_blk_io, bio); - - io->dev = dev; - io->priv = priv; - - bio->bi_end_io = rnbd_dev_bi_end_io; - bio->bi_private = io; - bio->bi_opf = rnbd_to_bio_flags(flags); - bio->bi_iter.bi_sector = sector; - bio->bi_iter.bi_size = bi_size; - bio_set_prio(bio, prio); - bio_set_dev(bio, dev->bdev); - - submit_bio(bio); - - return 0; -} diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 0f65b09a270e..0eb23850afb9 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -41,6 +41,11 @@ void rnbd_dev_close(struct rnbd_dev *dev); void rnbd_endio(void *priv, int error); +void rnbd_dev_bi_end_io(struct bio *bio); + +struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, + unsigned int len, gfp_t gfp_mask); + static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev) { return queue_max_segments(bdev_get_queue(dev->bdev)); @@ -75,18 +80,4 @@ static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev) return bdev_get_queue(dev->bdev)->limits.discard_alignment; } -/** - * rnbd_dev_submit_io() - Submit an I/O to the disk - * @dev: device to that the I/O is submitted - * @sector: address to read/write data to - * @data: I/O data to write or buffer to read I/O date into - * @len: length of @data - * @bi_size: Amount of data that will be read/written - * @prio: IO priority - * @priv: private data passed to @io_fn - */ -int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data, - size_t len, u32 bi_size, enum rnbd_io_flags flags, - short prio, void *priv); - #endif /* RNBD_SRV_DEV_H */ diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 86e61523907b..0fb94843a495 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -124,6 +124,9 @@ static int process_rdma(struct rtrs_srv *sess, struct rnbd_srv_sess_dev *sess_dev; u32 dev_id; int err; + struct rnbd_dev_blk_io *io; + struct bio *bio; + short prio; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -142,18 +145,29 @@ static int process_rdma(struct rtrs_srv *sess, priv->sess_dev = sess_dev; priv->id = id; - err = rnbd_dev_submit_io(sess_dev->rnbd_dev, le64_to_cpu(msg->sector), - data, datalen, le32_to_cpu(msg->bi_size), - le32_to_cpu(msg->rw), - srv_sess->ver < RNBD_PROTO_VER_MAJOR || - usrlen < sizeof(*msg) ? - 0 : le16_to_cpu(msg->prio), priv); - if (unlikely(err)) { - rnbd_srv_err(sess_dev, "Submitting I/O to device failed, err: %d\n", - err); + /* Generate bio with pages pointing to the rdma buffer */ + bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL); + if (IS_ERR(bio)) { + rnbd_srv_err(sess_dev, "Failed to generate bio, err: %ld\n", PTR_ERR(bio)); goto sess_dev_put; } + io = container_of(bio, struct rnbd_dev_blk_io, bio); + io->dev = sess_dev->rnbd_dev; + io->priv = priv; + + bio->bi_end_io = rnbd_dev_bi_end_io; + bio->bi_private = io; + bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); + bio->bi_iter.bi_sector = le64_to_cpu(msg->sector); + bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size); + prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR || + usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio); + bio_set_prio(bio, prio); + bio_set_dev(bio, sess_dev->rnbd_dev->bdev); + + submit_bio(bio); + return 0; sess_dev_put: -- cgit From d7aaeef293404bc41ea9d51c20a518945eb38366 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 30 Jul 2020 11:13:58 +0200 Subject: rnbd: no need to set bi_end_io in rnbd_bio_map_kern Since we always set bi_end_io after call rnbd_bio_map_kern, so the setting in rnbd_bio_map_kern is redundant. Signed-off-by: Guoqing Jiang Acked-by: Danil Kipnis Acked-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv-dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c index 49c62b506c9b..b241a099aeae 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.c +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -99,6 +99,5 @@ struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, offset = 0; } - bio->bi_end_io = bio_put; return bio; } -- cgit From f74441e6311a28f0ee89b9c8e296a33730f812fc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Aug 2020 13:00:44 -0600 Subject: io_uring: account locked memory before potential error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tear down path will always unaccount the memory, so ensure that we have accounted it before hitting any of them. Reported-by: Tomáš Chaloupka Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0d857f7ca507..e9b27cdaa735 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8341,6 +8341,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->user = user; ctx->creds = get_current_cred(); + /* + * Account memory _before_ installing the file descriptor. Once + * the descriptor is installed, it can get closed at any time. Also + * do this before hitting the general error path, as ring freeing + * will un-account as well. + */ + io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries), + ACCT_LOCKED); + ctx->limit_mem = limit_mem; + ret = io_allocate_scq_urings(ctx, p); if (ret) goto err; @@ -8377,14 +8387,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } - /* - * Account memory _before_ installing the file descriptor. Once - * the descriptor is installed, it can get closed at any time. - */ - io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries), - ACCT_LOCKED); - ctx->limit_mem = limit_mem; - /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup -- cgit From 820903c784a01bf6e143253418508da4f5790cff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Jul 2020 12:14:05 +0200 Subject: posix-cpu-timers: Split run_posix_cpu_timers() Split it up as a preparatory step to move the heavy lifting out of interrupt context. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200730102337.677439437@linutronix.de --- kernel/time/posix-cpu-timers.c | 43 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 165117996ea0..e5ad87320468 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1080,32 +1080,15 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return false; } -/* - * This is called from the timer interrupt handler. The irq handler has - * already updated our counts. We need to check if any timers fire now. - * Interrupts are disabled. - */ -void run_posix_cpu_timers(void) +static void __run_posix_cpu_timers(struct task_struct *tsk) { - struct task_struct *tsk = current; struct k_itimer *timer, *next; unsigned long flags; LIST_HEAD(firing); - lockdep_assert_irqs_disabled(); - - /* - * The fast path checks that there are no expired thread or thread - * group timers. If that's so, just return. - */ - if (!fastpath_timer_check(tsk)) + if (!lock_task_sighand(tsk, &flags)) return; - lockdep_posixtimer_enter(); - if (!lock_task_sighand(tsk, &flags)) { - lockdep_posixtimer_exit(); - return; - } /* * Here we take off tsk->signal->cpu_timers[N] and * tsk->cpu_timers[N] all the timers that are firing, and @@ -1147,6 +1130,28 @@ void run_posix_cpu_timers(void) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } +} + +/* + * This is called from the timer interrupt handler. The irq handler has + * already updated our counts. We need to check if any timers fire now. + * Interrupts are disabled. + */ +void run_posix_cpu_timers(void) +{ + struct task_struct *tsk = current; + + lockdep_assert_irqs_disabled(); + + /* + * The fast path checks that there are no expired thread or thread + * group timers. If that's so, just return. + */ + if (!fastpath_timer_check(tsk)) + return; + + lockdep_posixtimer_enter(); + __run_posix_cpu_timers(tsk); lockdep_posixtimer_exit(); } -- cgit From 1fb497dd003009be95ce67689ac800c446b7acc5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Jul 2020 12:14:06 +0200 Subject: posix-cpu-timers: Provide mechanisms to defer timer handling to task_work Running posix CPU timers in hard interrupt context has a few downsides: - For PREEMPT_RT it cannot work as the expiry code needs to take sighand lock, which is a 'sleeping spinlock' in RT. The original RT approach of offloading the posix CPU timer handling into a high priority thread was clumsy and provided no real benefit in general. - For fine grained accounting it's just wrong to run this in context of the timer interrupt because that way a process specific CPU time is accounted to the timer interrupt. - Long running timer interrupts caused by a large amount of expiring timers which can be created and armed by unpriviledged user space. There is no hard requirement to expire them in interrupt context. If the signal is targeted at the task itself then it won't be delivered before the task returns to user space anyway. If the signal is targeted at a supervisor process then it might be slightly delayed, but posix CPU timers are inaccurate anyway due to the fact that they are tied to the tick. Provide infrastructure to schedule task work which allows splitting the posix CPU timer code into a quick check in interrupt context and a thread context expiry and signal delivery function. This has to be enabled by architectures as it requires that the architecture specific KVM implementation handles pending task work before exiting to guest mode. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200730102337.783470146@linutronix.de --- include/linux/posix-timers.h | 17 ++++ include/linux/sched.h | 4 + kernel/time/Kconfig | 9 ++ kernel/time/posix-cpu-timers.c | 185 ++++++++++++++++++++++++++++++++++++++--- kernel/time/timer.c | 1 + 5 files changed, 204 insertions(+), 12 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e3f0f8585da4..896c16d2c5fb 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -6,6 +6,7 @@ #include #include #include +#include struct kernel_siginfo; struct task_struct; @@ -125,6 +126,16 @@ struct posix_cputimers { unsigned int expiry_active; }; +/** + * posix_cputimers_work - Container for task work based posix CPU timer expiry + * @work: The task work to be scheduled + * @scheduled: @work has been scheduled already, no further processing + */ +struct posix_cputimers_work { + struct callback_head work; + unsigned int scheduled; +}; + static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); @@ -165,6 +176,12 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } #endif +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +void posix_cputimers_init_work(void); +#else +static inline void posix_cputimers_init_work(void) { } +#endif + #define REQUEUE_PENDING 1 /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 06ec60462af0..e9942ce07ef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -889,6 +889,10 @@ struct task_struct { /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK + struct posix_cputimers_work posix_cputimers_work; +#endif + /* Process credentials: */ /* Tracer's credentials at attach: */ diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index fcc42353f125..a09b1d61df6a 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -52,6 +52,15 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST config GENERIC_CMOS_UPDATE bool +# Select to handle posix CPU timers from task_work +# and not from the timer interrupt context +config HAVE_POSIX_CPU_TIMERS_TASK_WORK + bool + +config POSIX_CPU_TIMERS_TASK_WORK + bool + default y if POSIX_TIMERS && HAVE_POSIX_CPU_TIMERS_TASK_WORK + if GENERIC_CLOCKEVENTS menu "Timers subsystem" diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e5ad87320468..a71758e34e45 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) */ static int posix_cpu_timer_create(struct k_itimer *new_timer) { + static struct lock_class_key posix_cpu_timers_key; struct pid *pid; rcu_read_lock(); @@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) return -EINVAL; } + /* + * If posix timer expiry is handled in task work context then + * timer::it_lock can be taken without disabling interrupts as all + * other locking happens in task context. This requires a seperate + * lock class key otherwise regular posix timer expiry would record + * the lock class being taken in interrupt context and generate a + * false positive warning. + */ + if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) + lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key); + new_timer->kclock = &clock_posix_cpu; timerqueue_init(&new_timer->it.cpu.node); new_timer->it.cpu.pid = get_pid(pid); @@ -1080,26 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return false; } -static void __run_posix_cpu_timers(struct task_struct *tsk) +static void handle_posix_cpu_timers(struct task_struct *tsk); + +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +static void posix_cpu_timers_work(struct callback_head *work) +{ + handle_posix_cpu_timers(current); +} + +/* + * Initialize posix CPU timers task work in init task. Out of line to + * keep the callback static and to avoid header recursion hell. + */ +void __init posix_cputimers_init_work(void) +{ + init_task_work(¤t->posix_cputimers_work.work, + posix_cpu_timers_work); +} + +/* + * Note: All operations on tsk->posix_cputimer_work.scheduled happen either + * in hard interrupt context or in task context with interrupts + * disabled. Aside of that the writer/reader interaction is always in the + * context of the current task, which means they are strict per CPU. + */ +static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) +{ + return tsk->posix_cputimers_work.scheduled; +} + +static inline void __run_posix_cpu_timers(struct task_struct *tsk) +{ + if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled)) + return; + + /* Schedule task work to actually expire the timers */ + tsk->posix_cputimers_work.scheduled = true; + task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME); +} + +static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, + unsigned long start) +{ + bool ret = true; + + /* + * On !RT kernels interrupts are disabled while collecting expired + * timers, so no tick can happen and the fast path check can be + * reenabled without further checks. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + tsk->posix_cputimers_work.scheduled = false; + return true; + } + + /* + * On RT enabled kernels ticks can happen while the expired timers + * are collected under sighand lock. But any tick which observes + * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath + * checks. So reenabling the tick work has do be done carefully: + * + * Disable interrupts and run the fast path check if jiffies have + * advanced since the collecting of expired timers started. If + * jiffies have not advanced or the fast path check did not find + * newly expired timers, reenable the fast path check in the timer + * interrupt. If there are newly expired timers, return false and + * let the collection loop repeat. + */ + local_irq_disable(); + if (start != jiffies && fastpath_timer_check(tsk)) + ret = false; + else + tsk->posix_cputimers_work.scheduled = false; + local_irq_enable(); + + return ret; +} +#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ +static inline void __run_posix_cpu_timers(struct task_struct *tsk) +{ + lockdep_posixtimer_enter(); + handle_posix_cpu_timers(tsk); + lockdep_posixtimer_exit(); +} + +static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) +{ + return false; +} + +static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, + unsigned long start) +{ + return true; +} +#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ + +static void handle_posix_cpu_timers(struct task_struct *tsk) { struct k_itimer *timer, *next; - unsigned long flags; + unsigned long flags, start; LIST_HEAD(firing); if (!lock_task_sighand(tsk, &flags)) return; - /* - * Here we take off tsk->signal->cpu_timers[N] and - * tsk->cpu_timers[N] all the timers that are firing, and - * put them on the firing list. - */ - check_thread_timers(tsk, &firing); + do { + /* + * On RT locking sighand lock does not disable interrupts, + * so this needs to be careful vs. ticks. Store the current + * jiffies value. + */ + start = READ_ONCE(jiffies); + barrier(); - check_process_timers(tsk, &firing); + /* + * Here we take off tsk->signal->cpu_timers[N] and + * tsk->cpu_timers[N] all the timers that are firing, and + * put them on the firing list. + */ + check_thread_timers(tsk, &firing); + + check_process_timers(tsk, &firing); + + /* + * The above timer checks have updated the exipry cache and + * because nothing can have queued or modified timers after + * sighand lock was taken above it is guaranteed to be + * consistent. So the next timer interrupt fastpath check + * will find valid data. + * + * If timer expiry runs in the timer interrupt context then + * the loop is not relevant as timers will be directly + * expired in interrupt context. The stub function below + * returns always true which allows the compiler to + * optimize the loop out. + * + * If timer expiry is deferred to task work context then + * the following rules apply: + * + * - On !RT kernels no tick can have happened on this CPU + * after sighand lock was acquired because interrupts are + * disabled. So reenabling task work before dropping + * sighand lock and reenabling interrupts is race free. + * + * - On RT kernels ticks might have happened but the tick + * work ignored posix CPU timer handling because the + * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work + * must be done very carefully including a check whether + * ticks have happened since the start of the timer + * expiry checks. posix_cpu_timers_enable_work() takes + * care of that and eventually lets the expiry checks + * run again. + */ + } while (!posix_cpu_timers_enable_work(tsk, start)); /* - * We must release these locks before taking any timer's lock. + * We must release sighand lock before taking any timer's lock. * There is a potential race with timer deletion here, as the * siglock now protects our private firing list. We have set * the firing flag in each timer, so that a deletion attempt @@ -1117,6 +1266,13 @@ static void __run_posix_cpu_timers(struct task_struct *tsk) list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { int cpu_firing; + /* + * spin_lock() is sufficient here even independent of the + * expiry context. If expiry happens in hard interrupt + * context it's obvious. For task work context it's safe + * because all other operations on timer::it_lock happen in + * task context (syscall or exit). + */ spin_lock(&timer->it_lock); list_del_init(&timer->it.cpu.elist); cpu_firing = timer->it.cpu.firing; @@ -1143,6 +1299,13 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); + /* + * If the actual expiry is deferred to task work context and the + * work is already scheduled there is no point to do anything here. + */ + if (posix_cpu_timers_work_scheduled(tsk)) + return; + /* * The fast path checks that there are no expired thread or thread * group timers. If that's so, just return. @@ -1150,9 +1313,7 @@ void run_posix_cpu_timers(void) if (!fastpath_timer_check(tsk)) return; - lockdep_posixtimer_enter(); __run_posix_cpu_timers(tsk); - lockdep_posixtimer_exit(); } /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ae5029f984a8..a16764b0116e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2017,6 +2017,7 @@ static void __init init_timer_cpus(void) void __init init_timers(void) { init_timer_cpus(); + posix_cputimers_init_work(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit From 0099808553ad4f9c04ad7afd966f6d7f470f247f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Jul 2020 12:14:07 +0200 Subject: x86: Select POSIX_CPU_TIMERS_TASK_WORK Move POSIX CPU timer expiry and signal delivery into task context. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200730102337.888613724@linutronix.de --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fd03cefabd34..a82e7150964e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,6 +209,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_FUNCTION_ARG_ACCESS_API -- cgit From 09fc67b500c7f0bb1b5ed774197ac7f2c5285655 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 17 Jul 2020 17:42:55 +0900 Subject: kprobes: Remove show_registers() function prototype Remove show_registers() function prototype because this function has been renamed by commit: 57da8b960b9a ("x86: Avoid double stack traces with show_regs()") and this commit has removed the caller in kprobes altogether: 80006dbee674 ("kprobes/x86: Remove jprobe implementation") So this doesn't exist anymore - remove the orphan prototype. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 45b8cdc9fad7..9be1bff4f586 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -227,7 +227,6 @@ extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); -extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -- cgit From b55b3fdce3e554a6bbe8f8ca6a01a892d720e64e Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Fri, 17 Jul 2020 13:01:00 +0530 Subject: hw_breakpoint: Remove unused __register_perf_hw_breakpoint() declaration Commit: b326e9560a28 ("hw-breakpoints: Use overflow handler instead of the event callback") removed __register_perf_hw_breakpoint() function usage and replaced it with register_perf_hw_breakpoint() function. Remove the left-over unused __register_perf_hw_breakpoint() declaration from as well. Signed-off-by: Bhupesh Sharma Signed-off-by: Ingo Molnar Acked-by: Mark Rutland Link: https://lore.kernel.org/r/1594971060-14180-1-git-send-email-bhsharma@redhat.com --- include/linux/hw_breakpoint.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index d7d4250cd1e4..78dd7035d1e5 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -72,7 +72,6 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, void *context); extern int register_perf_hw_breakpoint(struct perf_event *bp); -extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); @@ -119,8 +118,6 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, void *context) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } -static inline int -__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } -- cgit From be9b54abd444c5f880f79e624362c2a986b0bf24 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 11:51:34 +0100 Subject: ALSA: usb-audio: fix spelling mistake "buss" -> "bus" There is a spelling mistake in a usb_audio_dbg debug message. Also replace "param" with "parameter". Fix these. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200806105134.46447-1-colin.king@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 986145fd2ce0..a4d4d71db55b 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -329,7 +329,7 @@ static int snd_us16x08_bus_put(struct snd_kcontrol *kcontrol, elem->cached |= 1; elem->cache_val[0] = val; } else { - usb_audio_dbg(chip, "Failed to set buss param, err:%d\n", err); + usb_audio_dbg(chip, "Failed to set bus parameter, err:%d\n", err); } return err > 0 ? 1 : 0; -- cgit From fec9008828cde0076aae595ac031bfcf49d335a4 Mon Sep 17 00:00:00 2001 From: Mirko Dietrich Date: Thu, 6 Aug 2020 14:48:50 +0200 Subject: ALSA: usb-audio: Creative USB X-Fi Pro SB1095 volume knob support Adds an entry for Creative USB X-Fi to the rc_config array in mixer_quirks.c to allow use of volume knob on the device. Adds support for newer X-Fi Pro card, known as "Model No. SB1095" with USB ID "041e:3263" Signed-off-by: Mirko Dietrich Cc: Link: https://lore.kernel.org/r/20200806124850.20334-1-buzz@l4m1.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index cec1cfd7edb7..199cdbfdc761 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -185,6 +185,7 @@ static const struct rc_config { { USB_ID(0x041e, 0x3042), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 */ { USB_ID(0x041e, 0x30df), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ { USB_ID(0x041e, 0x3237), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ + { USB_ID(0x041e, 0x3263), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ { USB_ID(0x041e, 0x3048), 2, 2, 6, 6, 2, 0x6e91 }, /* Toshiba SB0500 */ }; -- cgit From 8912fd6a61d7474ea9b43be93f136034d28868d5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 12:42:56 +0100 Subject: net: hns3: fix spelling mistake "could'nt" -> "couldn't" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9162856de1b1..e972138a14ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1728,7 +1728,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev) /* hardware completion status should be available by this time */ if (ret) { dev_err(&hdev->pdev->dev, - "could'nt get reset done status from h/w, timeout!\n"); + "couldn't get reset done status from h/w, timeout!\n"); return ret; } -- cgit From 2342ef4e6021f5f7f2b3d4daa1ed6d1140b1378b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 10 Jul 2020 09:37:58 +0100 Subject: drm/amdgpu: fix spelling mistake "Falied" -> "Failed" There is a spelling mistake in a DRM_ERROR error message. Fix it. This got lost in a merge, restore the fix. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher (cherry picked from commit 4afaa61db9cf5250b5734c2531b226e7b3a3d691) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index fe7d39bb975d..8034111acd9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1987,7 +1987,7 @@ static int psp_suspend(void *handle) ret = psp_tmr_terminate(psp); if (ret) { - DRM_ERROR("Falied to terminate tmr\n"); + DRM_ERROR("Failed to terminate tmr\n"); return ret; } -- cgit From ebfbd1c2ca02f1c1bc9f8f0a7783e71efb57e4cc Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 22 Jul 2020 09:40:11 +0800 Subject: drm/amdgpu: expand sienna chichlid reg access support Added dedicated 64bit reg read/write support Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index ea69ae76773e..74d02d270d34 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -97,6 +97,49 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -673,6 +716,8 @@ static int nv_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; adev->pcie_wreg = &nv_pcie_wreg; + adev->pcie_rreg64 = &nv_pcie_rreg64; + adev->pcie_wreg64 = &nv_pcie_wreg64; /* TODO: will add them during VCN v2 implementation */ adev->uvd_ctx_rreg = NULL; -- cgit From ee10e06eb00c3b371fa17a13ee2adaee4254dd54 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 20 Jul 2020 11:11:13 +0800 Subject: drm/amdgpu: add printing after executing page reservation to eeprom This will tell users if the faulty page has been written to external eeprom device in dmesg log. Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e10f02ed3f65..bcce4c0be462 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1618,7 +1618,7 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) data = con->eh_data; save_count = data->count - control->num_recs; /* only new entries are saved */ - if (save_count > 0) + if (save_count > 0) { if (amdgpu_ras_eeprom_process_recods(control, &data->bps[control->num_recs], true, @@ -1627,6 +1627,9 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) return -EIO; } + dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); + } + return 0; } -- cgit From ffa453f05be1513a2a68df0bce530db0d13480de Mon Sep 17 00:00:00 2001 From: Changfeng Date: Tue, 21 Jul 2020 10:38:19 +0800 Subject: Revert "drm/amd/powerplay: drop unnecessary message support check" The below 3 messages are not supported on Renoir SMU_MSG_PrepareMp1ForShutdown SMU_MSG_PrepareMp1ForUnload SMU_MSG_PrepareMp1ForReset It needs to revert patch: drm/amd/powerplay: drop unnecessary message support check to avoid set mp1 state fail during gpu reset on renoir. Signed-off-by: changfeng Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 9 +++++++++ drivers/gpu/drm/amd/powerplay/smu_cmn.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 838a369c9ec3..f778b00e49eb 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -34,6 +34,7 @@ #include "sienna_cichlid_ppt.h" #include "renoir_ppt.h" #include "amd_pcie.h" +#include "smu_cmn.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -1589,6 +1590,14 @@ int smu_set_mp1_state(struct smu_context *smu, return 0; } + /* some asics may not support those messages */ + if (smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_MSG, + msg) < 0) { + mutex_unlock(&smu->mutex); + return 0; + } + ret = smu_send_smc_msg(smu, msg, NULL); if (ret) dev_err(smu->adev->dev, "[PrepareMp1] Failed!\n"); diff --git a/drivers/gpu/drm/amd/powerplay/smu_cmn.h b/drivers/gpu/drm/amd/powerplay/smu_cmn.h index 98face8c5fd6..f9e63f18b157 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_cmn.h +++ b/drivers/gpu/drm/amd/powerplay/smu_cmn.h @@ -25,7 +25,7 @@ #include "amdgpu_smu.h" -#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || defined(SWSMU_CODE_LAYER_L4) +#if defined(SWSMU_CODE_LAYER_L1) || defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || defined(SWSMU_CODE_LAYER_L4) int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, enum smu_message_type msg, uint32_t param, -- cgit From 3520b5e22809d1fe211a7375e7b220de85e383d7 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 22 Jul 2020 21:27:35 +0800 Subject: drm/amd/powerplay: remove the dpm checking in the boot sequence It's not necessary to retrieve the power features status when the asic is booted up the first time. This patch can have the features enablement status still checked in suspend/resume case and removed from the first boot up sequence. Signed-off-by: Kenneth Feng Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index f778b00e49eb..6b03f750e63b 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -735,7 +735,7 @@ static int smu_smc_hw_setup(struct smu_context *smu) uint32_t pcie_gen = 0, pcie_width = 0; int ret; - if (smu_is_dpm_running(smu) && adev->in_suspend) { + if (adev->in_suspend && smu_is_dpm_running(smu)) { dev_info(adev->dev, "dpm has been enabled\n"); return 0; } -- cgit From 2f32faec004319a609982859a2b47459ae838888 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Tue, 21 Jul 2020 14:01:53 +0800 Subject: drm/amd/powerplay: skip invalid msg when smu set mp1 state Some asic may not support for some message of set mp1 state. If the return value of smu_send_smc_msg is -EINVAL, that means it failed to send msg to smc as it can not map an valid message for the ASIC. And with that case, smu_set_mp1_state should be skipped as those ASIC was in fact do not support for that. Signed-off-by: Likun Gao Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 6b03f750e63b..727cb9fd4aee 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1599,6 +1599,9 @@ int smu_set_mp1_state(struct smu_context *smu, } ret = smu_send_smc_msg(smu, msg, NULL); + /* some asics may not support those messages */ + if (ret == -EINVAL) + ret = 0; if (ret) dev_err(smu->adev->dev, "[PrepareMp1] Failed!\n"); -- cgit From ef69ab6aed16094b3d840d5863ae665f21fedf58 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Tue, 21 Jul 2020 13:13:39 +0800 Subject: drm/amd/powerplay: add msg map for mode1 reset Mapping Mode1Reset message for sienna_cichlid. Signed-off-by: Likun Gao Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index 59da3ca2a4ca..f44fd33aeb93 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -121,6 +121,7 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME, 1), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 1), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 1), }; static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { -- cgit From 518dcf959c68a33e61d78fec47a1d2a674d0120c Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Tue, 21 Jul 2020 16:39:45 +0800 Subject: drm/amd/powerplay: correct smu message for vf mode Set valid_in_vf to false for the message not support in vf mode on sienna cichlid. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c | 70 +++++++++++----------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index f44fd33aeb93..dcc5d25a7894 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -74,10 +74,10 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(SetAllowedFeaturesMaskLow, PPSMC_MSG_SetAllowedFeaturesMaskLow, 1), - MSG_MAP(SetAllowedFeaturesMaskHigh, PPSMC_MSG_SetAllowedFeaturesMaskHigh, 1), - MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 1), - MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 1), + MSG_MAP(SetAllowedFeaturesMaskLow, PPSMC_MSG_SetAllowedFeaturesMaskLow, 0), + MSG_MAP(SetAllowedFeaturesMaskHigh, PPSMC_MSG_SetAllowedFeaturesMaskHigh, 0), + MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), + MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), MSG_MAP(EnableSmuFeaturesLow, PPSMC_MSG_EnableSmuFeaturesLow, 1), MSG_MAP(EnableSmuFeaturesHigh, PPSMC_MSG_EnableSmuFeaturesHigh, 1), MSG_MAP(DisableSmuFeaturesLow, PPSMC_MSG_DisableSmuFeaturesLow, 1), @@ -85,43 +85,43 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetRunningSmuFeaturesLow, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetRunningSmuFeaturesHigh, 1), MSG_MAP(SetWorkloadMask, PPSMC_MSG_SetWorkloadMask, 1), - MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), - MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), - MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), - MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 1), - MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 1), - MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 1), - MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 1), - MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 1), - MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 1), - MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 1), - MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), + MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), + MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 0), + MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 0), + MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), + MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0), + MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), + MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), + MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 0), + MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 0), + MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0), + MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 0), MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 1), - MSG_MAP(SetHardMaxByFreq, PPSMC_MSG_SetHardMaxByFreq, 1), + MSG_MAP(SetHardMaxByFreq, PPSMC_MSG_SetHardMaxByFreq, 0), MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), - MSG_MAP(SetGeminiMode, PPSMC_MSG_SetGeminiMode, 1), - MSG_MAP(SetGeminiApertureHigh, PPSMC_MSG_SetGeminiApertureHigh, 1), - MSG_MAP(SetGeminiApertureLow, PPSMC_MSG_SetGeminiApertureLow, 1), - MSG_MAP(OverridePcieParameters, PPSMC_MSG_OverridePcieParameters, 1), - MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 1), - MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 1), - MSG_MAP(SetUclkFastSwitch, PPSMC_MSG_SetUclkFastSwitch, 1), - MSG_MAP(SetVideoFps, PPSMC_MSG_SetVideoFps, 1), + MSG_MAP(SetGeminiMode, PPSMC_MSG_SetGeminiMode, 0), + MSG_MAP(SetGeminiApertureHigh, PPSMC_MSG_SetGeminiApertureHigh, 0), + MSG_MAP(SetGeminiApertureLow, PPSMC_MSG_SetGeminiApertureLow, 0), + MSG_MAP(OverridePcieParameters, PPSMC_MSG_OverridePcieParameters, 0), + MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), + MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), + MSG_MAP(SetUclkFastSwitch, PPSMC_MSG_SetUclkFastSwitch, 0), + MSG_MAP(SetVideoFps, PPSMC_MSG_SetVideoFps, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1), - MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), - MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), - MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), + MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 0), + MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(GetDcModeMaxDpmFreq, PPSMC_MSG_GetDcModeMaxDpmFreq, 1), - MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 1), - MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1), - MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), - MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1), - MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), - MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME, 1), - MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 1), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 1), + MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 0), + MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 0), + MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 0), + MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 0), + MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 0), + MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), }; static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { -- cgit From 3da0a2dfbd3a7ada2d8388775796edf12b2c7219 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 23 Jul 2020 15:26:27 +0800 Subject: drm/amdgpu: update golden setting for sienna_cichlid Update golden setting for sienna_cichlid. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 61e89247faf3..8344c3b0b9b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3082,7 +3082,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), -- cgit From fa40a009001a30819097999d10cc3f51a0891564 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Thu, 23 Jul 2020 11:07:52 -0400 Subject: drm/amdgpu/jpeg3.0: remove extra asic type check jpeg ip block is already selected based on ASIC type during set_ip_blocks. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 42f1a516005e..c41e5590a701 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -49,12 +49,11 @@ static int jpeg_v3_0_set_powergating_state(void *handle, static int jpeg_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) { - u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + return -ENOENT; - if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) - return -ENOENT; - } adev->jpeg.num_jpeg_inst = 1; jpeg_v3_0_set_dec_ring_funcs(adev); -- cgit From bbf16f530cdf0e6f95e9954ca4aa7915cd3f1d99 Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Thu, 23 Jul 2020 22:34:22 -0400 Subject: drm/amdgpu: update dec ring test for VCN 3.0 Signed-off-by: Boyuan Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 910a4a32ff78..63e5547cfb16 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1659,7 +1659,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, -- cgit From de273b73ca6aef31c64071b90b9c76cd9f47d5d1 Mon Sep 17 00:00:00 2001 From: Changfeng Date: Fri, 24 Jul 2020 13:15:10 +0800 Subject: drm/amd/powerplay: drop unnecessary message support check(v2) Take back patch:drop unnecessary message support check Because the gpu reset fail problem on renoir can be fixed by: drm/amd/powerplay: skip invalid msg when smu set mp1 state It needs to remove SWSMU_CODE_LAYER_L1 in smu_cmn.h to guard a clear code layer. Signed-off-by: changfeng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 9 --------- drivers/gpu/drm/amd/powerplay/smu_cmn.h | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 727cb9fd4aee..2da72f333ad9 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -34,7 +34,6 @@ #include "sienna_cichlid_ppt.h" #include "renoir_ppt.h" #include "amd_pcie.h" -#include "smu_cmn.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -1590,14 +1589,6 @@ int smu_set_mp1_state(struct smu_context *smu, return 0; } - /* some asics may not support those messages */ - if (smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_MSG, - msg) < 0) { - mutex_unlock(&smu->mutex); - return 0; - } - ret = smu_send_smc_msg(smu, msg, NULL); /* some asics may not support those messages */ if (ret == -EINVAL) diff --git a/drivers/gpu/drm/amd/powerplay/smu_cmn.h b/drivers/gpu/drm/amd/powerplay/smu_cmn.h index f9e63f18b157..98face8c5fd6 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_cmn.h +++ b/drivers/gpu/drm/amd/powerplay/smu_cmn.h @@ -25,7 +25,7 @@ #include "amdgpu_smu.h" -#if defined(SWSMU_CODE_LAYER_L1) || defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || defined(SWSMU_CODE_LAYER_L4) +#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || defined(SWSMU_CODE_LAYER_L4) int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, enum smu_message_type msg, uint32_t param, -- cgit From a486bc3c50198a7e895eef5dc3d2a94cd6186e4f Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 24 Jul 2020 17:24:08 +0800 Subject: drm/amd/powerplay: update driver if file for sienna_cichlid Update sienna_cichlid driver if header and related files. Support new smu metrics for pre & postDS frequency. Signed-off-by: Likun Gao Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../powerplay/inc/smu11_driver_if_sienna_cichlid.h | 21 +++++++++++++++------ drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h | 2 +- drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c | 11 ++++++++--- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h index b2232e24d82f..aa2708fccb6d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU11_DRIVER_IF_VERSION 0x33 +#define SMU11_DRIVER_IF_VERSION 0x34 #define PPTABLE_Sienna_Cichlid_SMU_VERSION 5 @@ -968,9 +968,15 @@ typedef struct { typedef struct { uint32_t CurrClock[PPCLK_COUNT]; - uint16_t AverageGfxclkFrequency; - uint16_t AverageFclkFrequency; - uint16_t AverageUclkFrequency ; + + uint16_t AverageGfxclkFrequencyPreDs; + uint16_t AverageGfxclkFrequencyPostDs; + uint16_t AverageFclkFrequencyPreDs; + uint16_t AverageFclkFrequencyPostDs; + uint16_t AverageUclkFrequencyPreDs ; + uint16_t AverageUclkFrequencyPostDs ; + + uint16_t AverageGfxActivity ; uint16_t AverageUclkActivity ; uint8_t CurrSocVoltageOffset ; @@ -988,6 +994,7 @@ typedef struct { uint16_t TemperatureLiquid0 ; uint16_t TemperatureLiquid1 ; uint16_t TemperaturePlx ; + uint16_t Padding16 ; uint32_t ThrottlerStatus ; uint8_t LinkDpmLevel; @@ -1006,8 +1013,10 @@ typedef struct { uint16_t AverageDclk0Frequency ; uint16_t AverageVclk1Frequency ; uint16_t AverageDclk1Frequency ; - uint16_t VcnActivityPercentage ; //place holder, David N. to provide full sequence - uint16_t padding16_2; + uint16_t VcnActivityPercentage ; //place holder, David N. to provide full sequence + uint8_t PcieRate ; + uint8_t PcieWidth ; + } SmuMetrics_t; typedef struct { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index 429f5aa8924a..9504f9954fd3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -30,7 +30,7 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x33 +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x34 #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x2 /* MP Apertures */ diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index dcc5d25a7894..f64a1be94cb8 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -70,6 +70,8 @@ FEATURE_MASK(FEATURE_DPM_FCLK_BIT) | \ FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT)) +#define SMU_11_0_7_GFX_BUSY_THRESHOLD 15 + static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -443,13 +445,16 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, *value = metrics->CurrClock[PPCLK_DCEFCLK]; break; case METRICS_AVERAGE_GFXCLK: - *value = metrics->AverageGfxclkFrequency; + if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) + *value = metrics->AverageGfxclkFrequencyPostDs; + else + *value = metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - *value = metrics->AverageFclkFrequency; + *value = metrics->AverageFclkFrequencyPostDs; break; case METRICS_AVERAGE_UCLK: - *value = metrics->AverageUclkFrequency; + *value = metrics->AverageUclkFrequencyPostDs; break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->AverageGfxActivity; -- cgit From 68bb3c3ff9d92645488382c7261de7d477f638e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Jul 2020 16:39:40 +0300 Subject: drm/amd/powerplay: off by one bugs in smu_cmn_to_asic_specific_index() These tables have _COUNT number of elements so the comparisons should be >= instead of > to prevent reading one element beyond the end of the array. Fixes: 8264ee69f0d8 ("drm/amd/powerplay: drop unused code") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_cmn.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_cmn.c b/drivers/gpu/drm/amd/powerplay/smu_cmn.c index be4b678d0e60..5c23c44c33bd 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_cmn.c +++ b/drivers/gpu/drm/amd/powerplay/smu_cmn.c @@ -166,7 +166,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, switch (type) { case CMN2ASIC_MAPPING_MSG: - if (index > SMU_MSG_MAX_COUNT || + if (index >= SMU_MSG_MAX_COUNT || !smu->message_map) return -EINVAL; @@ -181,7 +181,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return msg_mapping.map_to; case CMN2ASIC_MAPPING_CLK: - if (index > SMU_CLK_COUNT || + if (index >= SMU_CLK_COUNT || !smu->clock_map) return -EINVAL; @@ -192,7 +192,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_FEATURE: - if (index > SMU_FEATURE_COUNT || + if (index >= SMU_FEATURE_COUNT || !smu->feature_map) return -EINVAL; @@ -203,7 +203,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_TABLE: - if (index > SMU_TABLE_COUNT || + if (index >= SMU_TABLE_COUNT || !smu->table_map) return -EINVAL; @@ -214,7 +214,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_PWR: - if (index > SMU_POWER_SOURCE_COUNT || + if (index >= SMU_POWER_SOURCE_COUNT || !smu->pwr_src_map) return -EINVAL; -- cgit From c4e0dbcb201a79639e5e74f456d8313893a259f5 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 27 Jul 2020 09:08:22 +0800 Subject: drm/amd/swsmu: allow asic to handle sensor type by itself 1. allow asic to handle sensor type by itself. 2. if not, use smu common sensor to handle it. Signed-off-by: Kevin Wang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 2da72f333ad9..8c624f1f33ba 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1947,6 +1947,10 @@ int smu_read_sensor(struct smu_context *smu, mutex_lock(&smu->mutex); + if (smu->ppt_funcs->read_sensor) + if (!smu->ppt_funcs->read_sensor(smu, sensor, data, size)) + goto unlock; + switch (sensor) { case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK: *((uint32_t *)data) = pstate_table->gfxclk_pstate.standard * 100; @@ -1977,11 +1981,12 @@ int smu_read_sensor(struct smu_context *smu, *size = 4; break; default: - if (smu->ppt_funcs->read_sensor) - ret = smu->ppt_funcs->read_sensor(smu, sensor, data, size); + *size = 0; + ret = -EOPNOTSUPP; break; } +unlock: mutex_unlock(&smu->mutex); return ret; -- cgit From 7c146177b3368feda6dbb0e01b085c84d9774589 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 6 Jul 2020 10:54:37 -0400 Subject: drm/amd/display: Clean up global sync param retrieval [Why] This change replaces older looping code in favor of these functions. [How] There are built in functions for extracting global sync params during mode validation now. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 49 +++------------------- .../gpu/drm/amd/display/dc/dml/display_mode_vba.c | 14 +------ .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 7 +--- 3 files changed, 7 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 968a89bbcf24..2a5e7175926a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3069,8 +3069,7 @@ void dcn20_calculate_dlg_params( int pipe_cnt, int vlevel) { - int i, j, pipe_idx, pipe_idx_unsplit; - bool visited[MAX_PIPES] = { 0 }; + int i, pipe_idx; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3089,55 +3088,17 @@ void dcn20_calculate_dlg_params( if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - /* - * An artifact of dml pipe split/odm is that pipes get merged back together for - * calculation. Therefore we need to only extract for first pipe in ascending index order - * and copy into the other split half. - */ - for (i = 0, pipe_idx = 0, pipe_idx_unsplit = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - if (!visited[pipe_idx]) { - display_pipe_source_params_st *src = &pipes[pipe_idx].pipe.src; - display_pipe_dest_params_st *dst = &pipes[pipe_idx].pipe.dest; - - dst->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit]; - dst->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit]; - dst->vupdate_width = context->bw_ctx.dml.vba.VUpdateWidthPix[pipe_idx_unsplit]; - dst->vready_offset = context->bw_ctx.dml.vba.VReadyOffsetPix[pipe_idx_unsplit]; - /* - * j iterates inside pipes array, unlike i which iterates inside - * pipe_ctx array - */ - if (src->is_hsplit) - for (j = pipe_idx + 1; j < pipe_cnt; j++) { - display_pipe_source_params_st *src_j = &pipes[j].pipe.src; - display_pipe_dest_params_st *dst_j = &pipes[j].pipe.dest; - - if (src_j->is_hsplit && !visited[j] - && src->hsplit_grp == src_j->hsplit_grp) { - dst_j->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit]; - dst_j->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit]; - dst_j->vupdate_width = context->bw_ctx.dml.vba.VUpdateWidthPix[pipe_idx_unsplit]; - dst_j->vready_offset = context->bw_ctx.dml.vba.VReadyOffsetPix[pipe_idx_unsplit]; - visited[j] = true; - } - } - visited[pipe_idx] = true; - pipe_idx_unsplit++; - } - pipe_idx++; - } - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - ASSERT(visited[pipe_idx]); context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; pipe_idx++; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 7916a7ea9336..b0064087b9bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -154,23 +154,11 @@ dml_get_pipe_attr_func(refcyc_per_meta_chunk_vblank_c_in_us, mode_lib->vba.TimeP dml_get_pipe_attr_func(refcyc_per_meta_chunk_flip_l_in_us, mode_lib->vba.TimePerMetaChunkFlip); dml_get_pipe_attr_func(refcyc_per_meta_chunk_flip_c_in_us, mode_lib->vba.TimePerChromaMetaChunkFlip); +dml_get_pipe_attr_func(vstartup, mode_lib->vba.VStartup); dml_get_pipe_attr_func(vupdate_offset, mode_lib->vba.VUpdateOffsetPix); dml_get_pipe_attr_func(vupdate_width, mode_lib->vba.VUpdateWidthPix); dml_get_pipe_attr_func(vready_offset, mode_lib->vba.VReadyOffsetPix); -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe) -{ - unsigned int which_plane; - - recalculate_params(mode_lib, pipes, num_pipes); - which_plane = mode_lib->vba.pipe_plane[which_pipe]; - return mode_lib->vba.VStartup[which_plane]; -} - double get_total_immediate_flip_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 756d8eb1221c..21e5111ea7a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -98,16 +98,11 @@ dml_get_pipe_attr_decl(refcyc_per_meta_chunk_vblank_c_in_us); dml_get_pipe_attr_decl(refcyc_per_meta_chunk_flip_l_in_us); dml_get_pipe_attr_decl(refcyc_per_meta_chunk_flip_c_in_us); +dml_get_pipe_attr_decl(vstartup); dml_get_pipe_attr_decl(vupdate_offset); dml_get_pipe_attr_decl(vupdate_width); dml_get_pipe_attr_decl(vready_offset); -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe); - double get_total_immediate_flip_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, -- cgit From 5e061a4d020380566365e8aa8f629bbd200e9708 Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Tue, 16 Jun 2020 16:24:11 -0400 Subject: drm/amd/display: Rename bytes_pp to the correct bits_pp [Why] Struct dcn_dsc_state is used for reading current state and parameters of DSC on a pipe, the target rate parameter uses bytes per pixel even though its reading BITS_PER_PIXEL register. [How] Changing it to Bits Per Pixel for consistency. Signed-off-by: Eryk Brol Signed-off-by: Mikita Lipski Reviewed-by: Mikita Lipski Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 998f729976bf..1c177c2ad1dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1186,7 +1186,7 @@ static ssize_t dp_dsc_bytes_per_pixel_read(struct file *f, char __user *buf, snprintf(rd_buf_ptr, str_len, "%d\n", - dsc_state.dsc_bytes_per_pixel); + dsc_state.dsc_bits_per_pixel); rd_buf_ptr += str_len; while (size) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index da0897fe3b54..a643927e272b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -390,6 +390,8 @@ void dcn10_log_hw_state(struct dc *dc, } DTN_INFO("\n"); + // dcn_dsc_state struct field bytes_per_pixel was renamed to bits_per_pixel + // TODO: Update golden log header to reflect this name change DTN_INFO("DSC: CLOCK_EN SLICE_WIDTH Bytes_pp\n"); for (i = 0; i < pool->res_cap->num_dsc; i++) { struct display_stream_compressor *dsc = pool->dscs[i]; @@ -400,7 +402,7 @@ void dcn10_log_hw_state(struct dc *dc, dsc->inst, s.dsc_clock_en, s.dsc_slice_width, - s.dsc_bytes_per_pixel); + s.dsc_bits_per_pixel); DTN_INFO("\n"); } DTN_INFO("\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index ba50214d6c32..79b640e202eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -156,7 +156,7 @@ static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_ds REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en); REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width); - REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bytes_per_pixel); + REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel); REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height); REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size); REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index 5915994f9eb8..f520e13aee4c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -55,7 +55,7 @@ struct dsc_optc_config { struct dcn_dsc_state { uint32_t dsc_clock_en; uint32_t dsc_slice_width; - uint32_t dsc_bytes_per_pixel; + uint32_t dsc_bits_per_pixel; uint32_t dsc_slice_height; uint32_t dsc_pic_width; uint32_t dsc_pic_height; -- cgit From c6851841fc877d9e4361eeaa1e3851778fa440c4 Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Tue, 16 Jun 2020 16:29:19 -0400 Subject: drm/amd/display: Fix naming of DSC Debugfs entry [why] Fix naming and return bits rather than bytes per pixel for naming consistency. Because registers return Bytes per pixel, but DSC Config structure is expecting bits per pixel as input. So when returning the value convert from bytes into bits. Signed-off-by: Eryk Brol Signed-off-by: Mikita Lipski Reviewed-by: Mikita Lipski Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 1c177c2ad1dc..e5a6d9115949 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -35,6 +35,7 @@ #include "dmub/dmub_srv.h" #include "resource.h" #include "dsc.h" +#include "dc_link_dp.h" struct dmub_debugfs_trace_header { uint32_t entry_count; @@ -1150,7 +1151,7 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, return result; } -static ssize_t dp_dsc_bytes_per_pixel_read(struct file *f, char __user *buf, +static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; @@ -1460,9 +1461,9 @@ static const struct file_operations dp_dsc_slice_height_debugfs_fops = { .llseek = default_llseek }; -static const struct file_operations dp_dsc_bytes_per_pixel_debugfs_fops = { +static const struct file_operations dp_dsc_bits_per_pixel_debugfs_fops = { .owner = THIS_MODULE, - .read = dp_dsc_bytes_per_pixel_read, + .read = dp_dsc_bits_per_pixel_read, .llseek = default_llseek }; @@ -1552,7 +1553,7 @@ static const struct { {"dsc_clock_en", &dp_dsc_clock_en_debugfs_fops}, {"dsc_slice_width", &dp_dsc_slice_width_debugfs_fops}, {"dsc_slice_height", &dp_dsc_slice_height_debugfs_fops}, - {"dsc_bytes_per_pixel", &dp_dsc_bytes_per_pixel_debugfs_fops}, + {"dsc_bits_per_pixel", &dp_dsc_bits_per_pixel_debugfs_fops}, {"dsc_pic_width", &dp_dsc_pic_width_debugfs_fops}, {"dsc_pic_height", &dp_dsc_pic_height_debugfs_fops}, {"dsc_chunk_size", &dp_dsc_chunk_size_debugfs_fops}, -- cgit From bc0cd80783ff7c2201b44d384bab4629c9cb7db9 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 14 Jul 2020 15:06:40 -0400 Subject: drm/amd/display: Don't compare dppclk before updating DTO [Why] In dcn3_update_clocks there are situations where dppclk is not lowered (i.e. stays the same), but DTO still needs to be increased before we program pipe frontend (i.e. in prepare_bandwidth). If we don't program the new DTO value before we program the pipe, we will underflow as soon as the pipe lock is released until the next call to dcn3_update_clocks where the DTO is updated. [How] Remove dppclk check before programming new DTO value. Signed-off-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index d94fdc52be37..9133646f6d5f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -323,9 +323,10 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, /* if clock is being raised, increase refclk before lowering DTO */ if (update_dppclk || update_dispclk) dcn20_update_clocks_update_dentist(clk_mgr); - /* always update dtos unless clock is lowered and not safe to lower */ - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures + * that we do not lower dto when it is not safe to lower. We do not need to + * compare the current and new dppclk before calling this function.*/ + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } -- cgit From 3fd20292c2352660155bbc11736dd014b2fc6e98 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Wed, 15 Jul 2020 11:21:43 +0800 Subject: drm/amd/display: Check lane status again after link training done [Why] Some monitors could suffer symbol unlock but cannot send HPD IRQ to notic source device to handle link loss. This makes monitor stuck in abnormal status and causes black screen. [How] According to the suggestion from scalar vendor, to check lane status again after link training done. That can improve the comaptibility from current production monitors. Signed-off-by: Martin Tsai Reviewed-by: Aric Cyr Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 50 ++++++++++++++++++++++ .../drm/amd/display/include/link_service_types.h | 2 + 2 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5cb7b834e459..1a3dbed3becb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1133,6 +1133,45 @@ static inline enum link_training_result perform_link_training_int( return status; } +static enum link_training_result check_link_loss_status( + struct dc_link *link, + const struct link_training_settings *link_training_setting) +{ + enum link_training_result status = LINK_TRAINING_SUCCESS; + unsigned int lane01_status_address = DP_LANE0_1_STATUS; + union lane_status lane_status; + uint8_t dpcd_buf[4] = {0}; + uint32_t lane; + + core_link_read_dpcd( + link, + lane01_status_address, + (uint8_t *)(dpcd_buf), + sizeof(dpcd_buf)); + + /*parse lane status*/ + for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { + /* + * check lanes status + */ + lane_status.raw = get_nibble_at_index(&dpcd_buf[0], lane); + + if (!lane_status.bits.CHANNEL_EQ_DONE_0 || + !lane_status.bits.CR_DONE_0 || + !lane_status.bits.SYMBOL_LOCKED_0) { + /* if one of the channel equalization, clock + * recovery or symbol lock is dropped + * consider it as (link has been + * dropped) dp sink status has changed + */ + status = LINK_TRAINING_LINK_LOSS; + break; + } + } + + return status; +} + static void initialize_training_settings( struct dc_link *link, const struct dc_link_settings *link_setting, @@ -1372,6 +1411,9 @@ static void print_status_message( case LINK_TRAINING_LQA_FAIL: lt_result = "LQA failed"; break; + case LINK_TRAINING_LINK_LOSS: + lt_result = "Link loss"; + break; default: break; } @@ -1531,6 +1573,14 @@ enum link_training_result dc_link_dp_perform_link_training( status); } + /* delay 5ms after Main Link output idle pattern and then check + * DPCD 0202h. + */ + if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) { + msleep(5); + status = check_link_loss_status(link, <_settings); + } + /* 6. print status message*/ print_status_message(link, <_settings, status); diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h index 4869d4562e4d..550f46e9b95f 100644 --- a/drivers/gpu/drm/amd/display/include/link_service_types.h +++ b/drivers/gpu/drm/amd/display/include/link_service_types.h @@ -66,6 +66,8 @@ enum link_training_result { /* other failure during EQ step */ LINK_TRAINING_EQ_FAIL_EQ, LINK_TRAINING_LQA_FAIL, + /* one of the CR,EQ or symbol lock is dropped */ + LINK_TRAINING_LINK_LOSS, }; struct link_training_settings { -- cgit From 5ce868fc474e8797bd16f19e435a538554c371ab Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 21 Jul 2020 13:59:52 -0400 Subject: drm/amd/display: Use seperate dmcub firmware for navy_flounder [Why] Currently navy_flounder is using sienna_cichlid_dmcub.bin. [How] Create a seperate define so navy_flounder will use its own firmware. Signed-off-by: Bhawanpreet Lakha Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 533913bf8410..3067f54b9891 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -97,6 +97,8 @@ MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB); #if defined(CONFIG_DRM_AMD_DC_DCN3_0) #define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin" MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID_DMUB); +#define FIRMWARE_NAVY_FLOUNDER_DMUB "amdgpu/navy_flounder_dmcub.bin" +MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER_DMUB); #endif #define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin" @@ -1185,10 +1187,13 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) break; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: dmub_asic = DMUB_ASIC_DCN30; fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB; break; + case CHIP_NAVY_FLOUNDER: + dmub_asic = DMUB_ASIC_DCN30; + fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB; + break; #endif default: -- cgit From 5eaec83a883b02c5e1d44d0537bb63444543d1e6 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 24 Jul 2020 16:36:07 -0400 Subject: drm/amd/display: Use proper abm/backlight functions for DCN3 Use DCN21 functions instead of DCE110 Signed-off-by: Bhawanpreet Lakha Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 1b354c219d0a..9afee7160490 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -26,6 +26,7 @@ #include "dce110/dce110_hw_sequencer.h" #include "dcn10/dcn10_hw_sequencer.h" #include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" #include "dcn30_hwseq.h" static const struct hw_sequencer_funcs dcn30_funcs = { @@ -87,8 +88,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, - .set_backlight_level = dce110_set_backlight_level, - .set_abm_immediate_disable = dce110_set_abm_immediate_disable, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dcn30_private_funcs = { -- cgit From c5892a10218214d729699ab61bad6fc109baf0ce Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Tue, 30 Jun 2020 17:55:29 +0800 Subject: drm/amd/display: Fix dmesg warning from setting abm level [Why] Setting abm level does not correctly update CRTC state. As a result no surface update is added to dc stream state and triggers warning. [How] Correctly update CRTC state when setting abm level property. CC: Stable Signed-off-by: Stylon Wang Reviewed-by: Nicholas Kazlauskas Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3067f54b9891..e6833a2d01f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8549,6 +8549,29 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (ret) goto fail; + /* Check connector changes */ + for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { + struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); + struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); + + /* Skip connectors that are disabled or part of modeset already. */ + if (!old_con_state->crtc && !new_con_state->crtc) + continue; + + if (!new_con_state->crtc) + continue; + + new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc); + if (IS_ERR(new_crtc_state)) { + ret = PTR_ERR(new_crtc_state); + goto fail; + } + + if (dm_old_con_state->abm_level != + dm_new_con_state->abm_level) + new_crtc_state->connectors_changed = true; + } + #if defined(CONFIG_DRM_AMD_DC_DCN) if (adev->asic_type >= CHIP_NAVI10) { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { -- cgit From 6b6352dd1f96ca2464d7373557cb913f00c9e6dd Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 25 Jun 2020 13:24:12 -0400 Subject: drm/amd/display: Disable idle optimizations before programming DCN [Why] Programming DCN is explicitly forbidden during idle optimzations allowed state. Existing implemenation relies on OS/DM, which is not robust. Instead DC should sequence this. Note that DC will not re-enter idle optimized state on its own, it is only responsible for catching out of sequence calls. It is still DM responsibility to sequence appropriate for optimized power, but this change removes the requirement for DM to cover the .1% case. [How] - elevate updates during idle optimized state to full updates - disable idle power optimizations prior to programming Signed-off-by: Jun Lei Reviewed-by: Jun Lei Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ef0b5941bc50..92eb1ca1634f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1250,6 +1250,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c int i, k, l; struct dc_stream_state *dc_streams[MAX_STREAMS] = {0}; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + dc_allow_idle_optimizations(dc, false); +#endif for (i = 0; i < context->stream_count; i++) dc_streams[i] = context->streams[i]; @@ -1838,6 +1841,11 @@ static enum surface_update_type check_update_surfaces_for_stream( int i; enum surface_update_type overall_type = UPDATE_TYPE_FAST; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + if (dc->idle_optimizations_allowed) + overall_type = UPDATE_TYPE_FULL; + +#endif if (stream_status == NULL || stream_status->plane_count != surface_count) overall_type = UPDATE_TYPE_FULL; @@ -2306,8 +2314,14 @@ static void commit_planes_for_stream(struct dc *dc, } } - if (update_type == UPDATE_TYPE_FULL && dc->optimize_seamless_boot_streams == 0) { - dc->hwss.prepare_bandwidth(dc, context); + if (update_type == UPDATE_TYPE_FULL) { +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + dc_allow_idle_optimizations(dc, false); + +#endif + if (dc->optimize_seamless_boot_streams == 0) + dc->hwss.prepare_bandwidth(dc, context); + context_clock_trace(dc, context); } -- cgit From 471c1dd9546df81d259664ac3e2ab0e99169f755 Mon Sep 17 00:00:00 2001 From: Reza Amini Date: Wed, 15 Jul 2020 11:33:23 -0400 Subject: drm/amd/display: Allow asic specific FSFT timing optimization [Why] Each asic can optimize best based on its capabilities [How] Optimizing timing for a new pixel clock Signed-off-by: Reza Amini Reviewed-by: Anthony Koo Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 18 +++++++-------- drivers/gpu/drm/amd/display/dc/dc_stream.h | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 27 ++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h | 5 ++++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 3 +++ drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h | 5 ++++ .../drm/amd/display/modules/freesync/freesync.c | 5 +++- 8 files changed, 57 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 10d69ada88e3..0257a900fe2b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -246,20 +246,18 @@ struct dc_stream_status *dc_stream_get_status( #ifndef TRIM_FSFT /** - * dc_optimize_timing() - dc to optimize timing + * dc_optimize_timing_for_fsft() - dc to optimize timing */ -bool dc_optimize_timing( - struct dc_crtc_timing *timing, +bool dc_optimize_timing_for_fsft( + struct dc_stream_state *pStream, unsigned int max_input_rate_in_khz) { - //optimization is expected to assing a value to these: - //timing->pix_clk_100hz - //timing->v_front_porch - //timing->v_total - //timing->fast_transport_output_rate_100hz; - timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz; + struct dc *dc; - return true; + dc = pStream->ctx->dc; + + return (dc->hwss.optimize_timing_for_fsft && + dc->hwss.optimize_timing_for_fsft(dc, &pStream->timing, max_input_rate_in_khz)); } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e4e85a159462..633442bc7ef2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -424,8 +424,8 @@ struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *dc_stream); #ifndef TRIM_FSFT -bool dc_optimize_timing( - struct dc_crtc_timing *timing, +bool dc_optimize_timing_for_fsft( + struct dc_stream_state *pStream, unsigned int max_input_rate_in_khz); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 7725a406c16e..66180b4332f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2498,3 +2498,30 @@ void dcn20_fpga_init_hw(struct dc *dc) tg->funcs->tg_init(tg); } } +#ifndef TRIM_FSFT +bool dcn20_optimize_timing_for_fsft(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz) +{ + unsigned int old_v_front_porch; + unsigned int old_v_total; + unsigned int max_input_rate_in_100hz; + unsigned long long new_v_total; + + max_input_rate_in_100hz = max_input_rate_in_khz * 10; + if (max_input_rate_in_100hz < timing->pix_clk_100hz) + return false; + + old_v_total = timing->v_total; + old_v_front_porch = timing->v_front_porch; + + timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz; + timing->pix_clk_100hz = max_input_rate_in_100hz; + + new_v_total = div_u64((unsigned long long)old_v_total * max_input_rate_in_100hz, timing->pix_clk_100hz); + + timing->v_total = new_v_total; + timing->v_front_porch = old_v_front_porch + (timing->v_total - old_v_total); + return true; +} +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h index 63ce763f148e..83220e34c1a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h @@ -132,5 +132,10 @@ int dcn20_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config); +#ifndef TRIM_FSFT +bool dcn20_optimize_timing_for_fsft(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz); +#endif #endif /* __DC_HWSS_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 2380392b916e..3dde6f26de47 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -88,6 +88,9 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, +#ifndef TRIM_FSFT + .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft, +#endif }; static const struct hwseq_private_funcs dcn20_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 177d0dc8927a..b187f71afa65 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -92,6 +92,9 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, +#ifndef TRIM_FSFT + .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft, +#endif }; static const struct hwseq_private_funcs dcn21_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 720ce5e458d8..3c986717dcd5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -116,6 +116,11 @@ struct hw_sequencer_funcs { void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, const struct dc_static_screen_params *events); +#ifndef TRIM_FSFT + bool (*optimize_timing_for_fsft)(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz); +#endif /* Stream Related */ void (*enable_stream)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 7a2500fbf3f2..81820f3d6b3b 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -829,10 +829,13 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, switch (packet_type) { case PACKET_TYPE_FS_V3: #ifndef TRIM_FSFT + // always populate with pixel rate. build_vrr_infopacket_v3( stream->signal, vrr, stream->timing.flags.FAST_TRANSPORT, - stream->timing.fast_transport_output_rate_100hz, + (stream->timing.flags.FAST_TRANSPORT) ? + stream->timing.fast_transport_output_rate_100hz : + stream->timing.pix_clk_100hz, app_tf, infopacket); #else build_vrr_infopacket_v3(stream->signal, vrr, app_tf, infopacket); -- cgit From 8b0379a85762b516c7b46aed7dbf2a4947c00564 Mon Sep 17 00:00:00 2001 From: hersen wu Date: Sun, 19 Jul 2020 17:21:59 -0400 Subject: drm/amd/display: dchubbub p-state warning during surface planes switch [Why] ramp_up_dispclk_with_dpp is to change dispclk, dppclk and dprefclk according to bandwidth requirement. call stack: rv1_update_clocks --> update_clocks --> dcn10_prepare_bandwidth / dcn10_optimize_bandwidth --> prepare_bandwidth / optimize_bandwidth. before change dcn hw, prepare_bandwidth will be called first to allow enough clock, watermark for change, after end of dcn hw change, optimize_bandwidth is executed to lower clock to save power for new dcn hw settings. below is sequence of commit_planes_for_stream: step 1: prepare_bandwidth - raise clock to have enough bandwidth step 2: lock_doublebuffer_enable step 3: pipe_control_lock(true) - make dchubp register change will not take effect right way step 4: apply_ctx_for_surface - program dchubp step 5: pipe_control_lock(false) - dchubp register change take effect step 6: optimize_bandwidth --> dc_post_update_surfaces_to_stream for full_date, optimize clock to save power at end of step 1, dcn clocks (dprefclk, dispclk, dppclk) may be changed for new dchubp configuration. but real dcn hub dchubps are still running with old configuration until end of step 5. this need clocks settings at step 1 should not less than that before step 1. this is checked by two conditions: 1. if (should_set_clock(safe_to_lower , new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) || new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) 2. request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz the second condition is based on new dchubp configuration. dppclk for new dchubp may be different from dppclk before step 1. for example, before step 1, dchubps are as below: pipe 0: recout=(0,40,1920,980) viewport=(0,0,1920,979) pipe 1: recout=(0,0,1920,1080) viewport=(0,0,1920,1080) for dppclk for pipe0 need dppclk = dispclk new dchubp pipe split configuration: pipe 0: recout=(0,0,960,1080) viewport=(0,0,960,1080) pipe 1: recout=(960,0,960,1080) viewport=(960,0,960,1080) dppclk only needs dppclk = dispclk /2. dispclk, dppclk are not lock by otg master lock. they take effect after step 1. during this transition, dispclk are the same, but dppclk is changed to half of previous clock for old dchubp configuration between step 1 and step 6. This may cause p-state warning intermittently. [How] for new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz, we need make sure dppclk are not changed to less between step 1 and 6. for new_clocks->dispclk_khz > clk_mgr_base->clks.dispclk_khz, new display clock is raised, but we do not know ratio of new_clocks->dispclk_khz and clk_mgr_base->clks.dispclk_khz, new_clocks->dispclk_khz /2 does not guarantee equal or higher than old dppclk. we could ignore power saving different between dppclk = displck and dppclk = dispclk / 2 between step 1 and step 6. as long as safe_to_lower = false, set dpclk = dispclk to simplify condition check. CC: Stable Signed-off-by: Hersen Wu Reviewed-by: Aric Cyr Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c | 69 +++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index 3fab9296918a..e133edc587d3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -85,12 +85,77 @@ static int rv1_determine_dppclk_threshold(struct clk_mgr_internal *clk_mgr, stru return disp_clk_threshold; } -static void ramp_up_dispclk_with_dpp(struct clk_mgr_internal *clk_mgr, struct dc *dc, struct dc_clocks *new_clocks) +static void ramp_up_dispclk_with_dpp( + struct clk_mgr_internal *clk_mgr, + struct dc *dc, + struct dc_clocks *new_clocks, + bool safe_to_lower) { int i; int dispclk_to_dpp_threshold = rv1_determine_dppclk_threshold(clk_mgr, new_clocks); bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; + /* this function is to change dispclk, dppclk and dprefclk according to + * bandwidth requirement. Its call stack is rv1_update_clocks --> + * update_clocks --> dcn10_prepare_bandwidth / dcn10_optimize_bandwidth + * --> prepare_bandwidth / optimize_bandwidth. before change dcn hw, + * prepare_bandwidth will be called first to allow enough clock, + * watermark for change, after end of dcn hw change, optimize_bandwidth + * is executed to lower clock to save power for new dcn hw settings. + * + * below is sequence of commit_planes_for_stream: + * + * step 1: prepare_bandwidth - raise clock to have enough bandwidth + * step 2: lock_doublebuffer_enable + * step 3: pipe_control_lock(true) - make dchubp register change will + * not take effect right way + * step 4: apply_ctx_for_surface - program dchubp + * step 5: pipe_control_lock(false) - dchubp register change take effect + * step 6: optimize_bandwidth --> dc_post_update_surfaces_to_stream + * for full_date, optimize clock to save power + * + * at end of step 1, dcn clocks (dprefclk, dispclk, dppclk) may be + * changed for new dchubp configuration. but real dcn hub dchubps are + * still running with old configuration until end of step 5. this need + * clocks settings at step 1 should not less than that before step 1. + * this is checked by two conditions: 1. if (should_set_clock(safe_to_lower + * , new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) || + * new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) + * 2. request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz + * + * the second condition is based on new dchubp configuration. dppclk + * for new dchubp may be different from dppclk before step 1. + * for example, before step 1, dchubps are as below: + * pipe 0: recout=(0,40,1920,980) viewport=(0,0,1920,979) + * pipe 1: recout=(0,0,1920,1080) viewport=(0,0,1920,1080) + * for dppclk for pipe0 need dppclk = dispclk + * + * new dchubp pipe split configuration: + * pipe 0: recout=(0,0,960,1080) viewport=(0,0,960,1080) + * pipe 1: recout=(960,0,960,1080) viewport=(960,0,960,1080) + * dppclk only needs dppclk = dispclk /2. + * + * dispclk, dppclk are not lock by otg master lock. they take effect + * after step 1. during this transition, dispclk are the same, but + * dppclk is changed to half of previous clock for old dchubp + * configuration between step 1 and step 6. This may cause p-state + * warning intermittently. + * + * for new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz, we + * need make sure dppclk are not changed to less between step 1 and 6. + * for new_clocks->dispclk_khz > clk_mgr_base->clks.dispclk_khz, + * new display clock is raised, but we do not know ratio of + * new_clocks->dispclk_khz and clk_mgr_base->clks.dispclk_khz, + * new_clocks->dispclk_khz /2 does not guarantee equal or higher than + * old dppclk. we could ignore power saving different between + * dppclk = displck and dppclk = dispclk / 2 between step 1 and step 6. + * as long as safe_to_lower = false, set dpclk = dispclk to simplify + * condition check. + * todo: review this change for other asic. + **/ + if (!safe_to_lower) + request_dpp_div = false; + /* set disp clk to dpp clk threshold */ clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold); @@ -209,7 +274,7 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base, /* program dispclk on = as a w/a for sleep resume clock ramping issues */ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) || new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) { - ramp_up_dispclk_with_dpp(clk_mgr, dc, new_clocks); + ramp_up_dispclk_with_dpp(clk_mgr, dc, new_clocks, safe_to_lower); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; send_request_to_lower = true; } -- cgit From b5fe6aa2b02cdfee75f747c68f3d303fddd09af9 Mon Sep 17 00:00:00 2001 From: Wyatt Wood Date: Wed, 17 Jun 2020 11:29:27 -0400 Subject: drm/amd/display: Use hw lock mgr [Why] Feature requires synchronization of dig, pipe, and cursor locking between driver and fw. [How] Set flag to force psr to use hw lock mgr. Signed-off-by: Wyatt Wood Reviewed-by: Anthony Koo Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 82e67bd81f2d..5167d6b8a48d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -233,8 +233,8 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ? true : false; + copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->init_sdp_deadline = psr_context->sdpTransmitLineNumDeadline; - copy_settings_data->debug.bitfields.use_hw_lock_mgr = 0; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); -- cgit From 76d5ef4ff10bc64f84a4b301cf8c25229048edd8 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 22 Jul 2020 15:40:06 -0400 Subject: drm/amd/display: Fix DP Compliance tests 4.3.2.1 and 4.3.2.2 [Why] Test expects that we also read HPD_IRQ_VECTOR when checking for symbol loss as well lane status. [How] Read bytes 0x200-0x205 instead of just 0x202-0x205 Signed-off-by: Aric Cyr Reviewed-by: Jun Lei Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 1a3dbed3becb..d7d2dcd49c06 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1138,23 +1138,22 @@ static enum link_training_result check_link_loss_status( const struct link_training_settings *link_training_setting) { enum link_training_result status = LINK_TRAINING_SUCCESS; - unsigned int lane01_status_address = DP_LANE0_1_STATUS; union lane_status lane_status; - uint8_t dpcd_buf[4] = {0}; + uint8_t dpcd_buf[6] = {0}; uint32_t lane; core_link_read_dpcd( - link, - lane01_status_address, - (uint8_t *)(dpcd_buf), - sizeof(dpcd_buf)); + link, + DP_SINK_COUNT, + (uint8_t *)(dpcd_buf), + sizeof(dpcd_buf)); /*parse lane status*/ for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { /* * check lanes status */ - lane_status.raw = get_nibble_at_index(&dpcd_buf[0], lane); + lane_status.raw = get_nibble_at_index(&dpcd_buf[2], lane); if (!lane_status.bits.CHANNEL_EQ_DONE_0 || !lane_status.bits.CR_DONE_0 || -- cgit From a676a97623d399f7a2174b347d688412864d385a Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 27 Jul 2020 16:19:45 +0800 Subject: drm/amdgpu: skip crit temperature values on APU (v2) It doesn't expose PPTable descriptor on APU platform. So max/min temperature values cannot be got from APU platform. v2: Stoney needs to skip crit temperature as well. Signed-off-by: Huang Rui Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 5f20cadee343..e4dbf14320b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -3212,6 +3212,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; + /* Skip crit temp on APU */ + if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ) && + (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + return 0; + /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || -- cgit From 0730344ee370012574fe73c2785557d0e9180430 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Tue, 28 Jul 2020 19:14:22 +0800 Subject: drm/amd/powerplay: update driver if version for navy_flounder It's in accordance with pmfw 65.5.0 for navy_flounder. Signed-off-by: Jiansong Chen Reviewed-by: Tao Zhou Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index 9504f9954fd3..6a42331aba8a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -31,7 +31,7 @@ #define SMU11_DRIVER_IF_VERSION_NV12 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 #define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x34 -#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x2 +#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x3 /* MP Apertures */ #define MP0_Public 0x03800000 -- cgit From 278a4b5f62be85fa62d4439b5fbdf15027e99606 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Wed, 29 Jul 2020 11:58:21 +0800 Subject: drm/amdgpu: update GC golden setting for navy_flounder Update GC golden setting for navy_flounder. Signed-off-by: Jiansong Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8344c3b0b9b5..43cf3487c98a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3127,7 +3127,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), @@ -3158,7 +3158,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xffffffff, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) }; -- cgit From 6b6124bb4a0b05fa9ac052e14dd7c37c42ab9297 Mon Sep 17 00:00:00 2001 From: Liu ChengZhe Date: Fri, 24 Jul 2020 17:22:15 +0800 Subject: drm/amdgpu: fix PSP autoload twice in FLR Assigning false to block->status.hw overwrites PSP's previous hardware status, which causes the PSP to Resume operation after hardware init. Remove this assignment and let the PSP execute Resume operation when it is told to. v2: Remove the braces. v3: Modify the description. Signed-off-by: Liu ChengZhe Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aa5b54e5a1d7..eb7cfe87042e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2574,6 +2574,9 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; + for (i = 0; i < adev->num_ip_blocks; i++) + adev->ip_blocks[i].status.hw = false; + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { int j; struct amdgpu_ip_block *block; @@ -2581,7 +2584,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; - block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; -- cgit From d392aa02db1b3ecbec49bc8bd28a132298174539 Mon Sep 17 00:00:00 2001 From: Liu ChengZhe Date: Fri, 24 Jul 2020 15:55:33 +0800 Subject: drm amdgpu: Skip tmr load for SRIOV 1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation; 2. Check pointer before release firmware. v2: use CHIP_SIENNA_CICHLID instead v3: remove local "bool ret"; fix grammer issue v4: use my name instead of "root" v5: fix grammer issue and indent issue Signed-off-by: Liu ChengZhe Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 35 +++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8034111acd9a..7fe564275457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; psp_memory_training_fini(&adev->psp); - release_firmware(adev->psp.sos_fw); - adev->psp.sos_fw = NULL; - release_firmware(adev->psp.asd_fw); - adev->psp.asd_fw = NULL; - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (adev->psp.sos_fw) { + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + } + if (adev->psp.asd_fw) { + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + if (adev->psp.ta_fw) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + } if (adev->asic_type == CHIP_NAVI10) psp_sysfs_fini(adev); @@ -409,11 +415,28 @@ static int psp_clear_vf_fw(struct psp_context *psp) return ret; } +static bool psp_skip_tmr(struct psp_context *psp) +{ + switch (psp->adev->asic_type) { + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + return true; + default: + return false; + } +} + static int psp_tmr_load(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR. + * Already set up by host driver. + */ + if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; -- cgit From a15383893f798c0962ab87f7e159abba9cbe3822 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Thu, 30 Jul 2020 18:09:47 +0800 Subject: drm/amdgpu: enable GFXOFF for navy_flounder Enable GFXOFF for navy_flounder. Signed-off-by: Jiansong Chen Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 43cf3487c98a..65997ffaed45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7529,6 +7529,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: amdgpu_gfx_off_ctrl(adev, enable); break; default: diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index fd82402065e6..7b950a582a28 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1029,6 +1029,7 @@ int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) -- cgit From c06f670f4701d3a19534eb7ca4f606b9771a94a3 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Mon, 20 Jul 2020 19:18:43 -0400 Subject: drm/amd/display: Use parameter for call to set output mux Signed-off-by: Eric Bernstein Reviewed-by: Chris Park Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 02742cca4d84..9f8ab679616c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3102,6 +3102,9 @@ void core_link_enable_stream( struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; enum dc_status status; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; +#endif DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); if (!IS_DIAG_DC(dc->ctx->dce_environment) && @@ -3136,8 +3139,8 @@ void core_link_enable_stream( pipe_ctx->stream->link->link_state_valid = true; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) - if (pipe_ctx->stream_res.tg->funcs->set_out_mux) - pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, OUT_MUX_DIO); + if (pipe_ctx->stream_res.tg->funcs->set_out_mux) + pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); #endif if (dc_is_dvi_signal(pipe_ctx->stream->signal)) -- cgit From 098214999c8f8eea8dffddb61be5742fac2c829e Mon Sep 17 00:00:00 2001 From: Igor Kravchenko Date: Sun, 19 Jul 2020 20:45:28 -0400 Subject: drm/amd/display: Read VBIOS Golden Settings Tbl [Why] For ver.4.4 and higher VBIOS contains default setting table. {How] Read Golden Settings Table from VBIOS, apply Aux tuning parameters. Signed-off-by: Igor Kravchenko Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 2 + drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 81 ++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 + drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 4 ++ drivers/gpu/drm/amd/display/dc/dc_types.h | 14 ++++ .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h | 4 +- .../drm/amd/display/dc/dcn10/dcn10_link_encoder.h | 10 ++- .../drm/amd/display/dc/dcn20/dcn20_link_encoder.c | 14 +++- .../drm/amd/display/dc/dcn20/dcn20_link_encoder.h | 5 +- drivers/gpu/drm/amd/include/atomfirmware.h | 54 ++++++++++++++- 10 files changed, 182 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 008d4d11339d..ad394aefa5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2834,6 +2834,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .bios_parser_destroy = bios_parser_destroy, .get_board_layout_info = bios_get_board_layout_info, + + .get_atom_dc_golden_table = NULL }; static bool bios_parser_construct( diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index b8684131151d..f8db92fed9cf 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2079,6 +2079,85 @@ static uint16_t bios_parser_pack_data_tables( return 0; } +static struct atom_dc_golden_table_v1 *bios_get_golden_table( + struct bios_parser *bp, + uint32_t rev_major, + uint32_t rev_minor, + uint16_t *dc_golden_table_ver) +{ + struct atom_display_controller_info_v4_4 *disp_cntl_tbl_4_4 = NULL; + uint32_t dc_golden_offset = 0; + *dc_golden_table_ver = 0; + + if (!DATA_TABLES(dce_info)) + return NULL; + + /* ver.4.4 or higher */ + switch (rev_major) { + case 4: + switch (rev_minor) { + case 4: + disp_cntl_tbl_4_4 = GET_IMAGE(struct atom_display_controller_info_v4_4, + DATA_TABLES(dce_info)); + if (!disp_cntl_tbl_4_4) + return NULL; + dc_golden_offset = disp_cntl_tbl_4_4->dc_golden_table_offset; + *dc_golden_table_ver = disp_cntl_tbl_4_4->dc_golden_table_ver; + break; + } + break; + } + + if (!dc_golden_offset) + return NULL; + + if (*dc_golden_table_ver != 1) + return NULL; + + return GET_IMAGE(struct atom_dc_golden_table_v1, + dc_golden_offset); +} + +static enum bp_result bios_get_atom_dc_golden_table( + struct dc_bios *dcb) +{ + struct bios_parser *bp = BP_FROM_DCB(dcb); + enum bp_result result = BP_RESULT_OK; + struct atom_dc_golden_table_v1 *atom_dc_golden_table = NULL; + struct atom_common_table_header *header; + struct atom_data_revision tbl_revision; + uint16_t dc_golden_table_ver = 0; + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(dce_info)); + if (!header) + return BP_RESULT_UNSUPPORTED; + + get_atom_data_table_revision(header, &tbl_revision); + + atom_dc_golden_table = bios_get_golden_table(bp, + tbl_revision.major, + tbl_revision.minor, + &dc_golden_table_ver); + + if (!atom_dc_golden_table) + return BP_RESULT_UNSUPPORTED; + + dcb->golden_table.dc_golden_table_ver = dc_golden_table_ver; + dcb->golden_table.aux_dphy_rx_control0_val = atom_dc_golden_table->aux_dphy_rx_control0_val; + dcb->golden_table.aux_dphy_rx_control1_val = atom_dc_golden_table->aux_dphy_rx_control1_val; + dcb->golden_table.aux_dphy_tx_control_val = atom_dc_golden_table->aux_dphy_tx_control_val; + dcb->golden_table.dc_gpio_aux_ctrl_0_val = atom_dc_golden_table->dc_gpio_aux_ctrl_0_val; + dcb->golden_table.dc_gpio_aux_ctrl_1_val = atom_dc_golden_table->dc_gpio_aux_ctrl_1_val; + dcb->golden_table.dc_gpio_aux_ctrl_2_val = atom_dc_golden_table->dc_gpio_aux_ctrl_2_val; + dcb->golden_table.dc_gpio_aux_ctrl_3_val = atom_dc_golden_table->dc_gpio_aux_ctrl_3_val; + dcb->golden_table.dc_gpio_aux_ctrl_4_val = atom_dc_golden_table->dc_gpio_aux_ctrl_4_val; + dcb->golden_table.dc_gpio_aux_ctrl_5_val = atom_dc_golden_table->dc_gpio_aux_ctrl_5_val; + + return result; +} + + static const struct dc_vbios_funcs vbios_funcs = { .get_connectors_number = bios_parser_get_connectors_number, @@ -2128,6 +2207,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .get_board_layout_info = bios_get_board_layout_info, .pack_data_tables = bios_parser_pack_data_tables, + + .get_atom_dc_golden_table = bios_get_atom_dc_golden_table }; static bool bios_parser2_construct( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9f8ab679616c..071c7b32b282 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1540,6 +1540,9 @@ static bool dc_link_construct(struct dc_link *link, } } + if (bios->funcs->get_atom_dc_golden_table) + bios->funcs->get_atom_dc_golden_table(bios); + /* * TODO check if GPIO programmed correctly * diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 845a3054f21f..d06d07042a12 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -133,6 +133,9 @@ struct dc_vbios_funcs { uint16_t (*pack_data_tables)( struct dc_bios *dcb, void *dst); + + enum bp_result (*get_atom_dc_golden_table)( + struct dc_bios *dcb); }; struct bios_registers { @@ -154,6 +157,7 @@ struct dc_bios { struct dc_firmware_info fw_info; bool fw_info_valid; struct dc_vram_info vram_info; + struct dc_golden_table golden_table; }; #endif /* DC_BIOS_TYPES_H */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 29fe5389f973..946ba929c6f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -890,6 +890,20 @@ struct dsc_dec_dpcd_caps { uint32_t branch_max_line_width; }; +struct dc_golden_table { + uint16_t dc_golden_table_ver; + uint32_t aux_dphy_rx_control0_val; + uint32_t aux_dphy_tx_control_val; + uint32_t aux_dphy_rx_control1_val; + uint32_t dc_gpio_aux_ctrl_0_val; + uint32_t dc_gpio_aux_ctrl_1_val; + uint32_t dc_gpio_aux_ctrl_2_val; + uint32_t dc_gpio_aux_ctrl_3_val; + uint32_t dc_gpio_aux_ctrl_4_val; + uint32_t dc_gpio_aux_ctrl_5_val; +}; + + #if defined(CONFIG_DRM_AMD_DC_DCN3_0) enum dc_gpu_mem_alloc_type { DC_MEM_ALLOC_TYPE_GART, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 384389f0e2c3..66027d496778 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -38,7 +38,8 @@ #define AUX_REG_LIST(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ - SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id) + SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ + SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id) #define HPD_REG_LIST(id)\ SRI(DC_HPD_CONTROL, HPD, id) @@ -107,6 +108,7 @@ struct dce110_link_enc_aux_registers { uint32_t AUX_CONTROL; uint32_t AUX_DPHY_RX_CONTROL0; + uint32_t AUX_DPHY_RX_CONTROL1; }; struct dce110_link_enc_hpd_registers { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index cf59ab0034dc..04dabed5f1c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -31,10 +31,10 @@ #define TO_DCN10_LINK_ENC(link_encoder)\ container_of(link_encoder, struct dcn10_link_encoder, base) - #define AUX_REG_LIST(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ - SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id) + SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ + SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id) #define HPD_REG_LIST(id)\ SRI(DC_HPD_CONTROL, HPD, id) @@ -73,6 +73,7 @@ struct dcn10_link_enc_aux_registers { uint32_t AUX_CONTROL; uint32_t AUX_DPHY_RX_CONTROL0; uint32_t AUX_DPHY_TX_CONTROL; + uint32_t AUX_DPHY_RX_CONTROL1; }; struct dcn10_link_enc_hpd_registers { @@ -443,7 +444,10 @@ struct dcn10_link_enc_registers { type AUX_TX_PRECHARGE_LEN; \ type AUX_TX_PRECHARGE_SYMBOLS; \ type AUX_MODE_DET_CHECK_DELAY;\ - type DPCS_DBG_CBUS_DIS + type DPCS_DBG_CBUS_DIS;\ + type AUX_RX_PRECHARGE_SKIP;\ + type AUX_RX_TIMEOUT_LEN;\ + type AUX_RX_TIMEOUT_LEN_MUL struct dcn10_link_enc_shift { DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c index 8d209dae66e6..15c2ff264ff6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c @@ -309,7 +309,6 @@ bool dcn20_link_encoder_is_in_alt_mode(struct link_encoder *enc) void enc2_hw_init(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); - /* 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4 @@ -333,9 +332,18 @@ void enc2_hw_init(struct link_encoder *enc) AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 AUX_RX_DETECTION_THRESHOLD [30:28] = 1 */ - AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + if (enc->ctx->dc_bios->golden_table.dc_golden_table_ver > 0) { + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, enc->ctx->dc_bios->golden_table.aux_dphy_rx_control0_val); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, enc->ctx->dc_bios->golden_table.aux_dphy_tx_control_val); + + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL1, enc->ctx->dc_bios->golden_table.aux_dphy_rx_control1_val); + } else { + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c4d); - AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + } //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h index db09f40075c2..bf0044f7417e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h @@ -191,7 +191,10 @@ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\ - LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh) + LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh) #define UNIPHY_DCN2_REG_LIST(id) \ SRI(CLOCK_ENABLE, SYMCLK, id), \ diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index c2544c81dfb2..3e526c394f6c 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -941,7 +941,6 @@ struct atom_display_controller_info_v4_1 uint8_t reserved3[8]; }; - struct atom_display_controller_info_v4_2 { struct atom_common_table_header table_header; @@ -976,6 +975,59 @@ struct atom_display_controller_info_v4_2 uint8_t reserved3[8]; }; +struct atom_display_controller_info_v4_4 { + struct atom_common_table_header table_header; + uint32_t display_caps; + uint32_t bootup_dispclk_10khz; + uint16_t dce_refclk_10khz; + uint16_t i2c_engine_refclk_10khz; + uint16_t dvi_ss_percentage; // in unit of 0.001% + uint16_t dvi_ss_rate_10hz; + uint16_t hdmi_ss_percentage; // in unit of 0.001% + uint16_t hdmi_ss_rate_10hz; + uint16_t dp_ss_percentage; // in unit of 0.001% + uint16_t dp_ss_rate_10hz; + uint8_t dvi_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t hdmi_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t dp_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t ss_reserved; + uint8_t dfp_hardcode_mode_num; // DFP hardcode mode number defined in StandardVESA_TimingTable when EDID is not available + uint8_t dfp_hardcode_refreshrate;// DFP hardcode mode refreshrate defined in StandardVESA_TimingTable when EDID is not available + uint8_t vga_hardcode_mode_num; // VGA hardcode mode number defined in StandardVESA_TimingTable when EDID is not avablable + uint8_t vga_hardcode_refreshrate;// VGA hardcode mode number defined in StandardVESA_TimingTable when EDID is not avablable + uint16_t dpphy_refclk_10khz; + uint16_t hw_chip_id; + uint8_t dcnip_min_ver; + uint8_t dcnip_max_ver; + uint8_t max_disp_pipe_num; + uint8_t max_vbios_active_disp_pipum; + uint8_t max_ppll_num; + uint8_t max_disp_phy_num; + uint8_t max_aux_pairs; + uint8_t remotedisplayconfig; + uint32_t dispclk_pll_vco_freq; + uint32_t dp_ref_clk_freq; + uint32_t max_mclk_chg_lat; // Worst case blackout duration for a memory clock frequency (p-state) change, units of 100s of ns (0.1 us) + uint32_t max_sr_exit_lat; // Worst case memory self refresh exit time, units of 100ns of ns (0.1us) + uint32_t max_sr_enter_exit_lat; // Worst case memory self refresh entry followed by immediate exit time, units of 100ns of ns (0.1us) + uint16_t dc_golden_table_offset; // point of struct of atom_dc_golden_table_vxx + uint16_t dc_golden_table_ver; + uint32_t reserved3[3]; +}; + +struct atom_dc_golden_table_v1 +{ + uint32_t aux_dphy_rx_control0_val; + uint32_t aux_dphy_tx_control_val; + uint32_t aux_dphy_rx_control1_val; + uint32_t dc_gpio_aux_ctrl_0_val; + uint32_t dc_gpio_aux_ctrl_1_val; + uint32_t dc_gpio_aux_ctrl_2_val; + uint32_t dc_gpio_aux_ctrl_3_val; + uint32_t dc_gpio_aux_ctrl_4_val; + uint32_t dc_gpio_aux_ctrl_5_val; + uint32_t reserved[23]; +}; enum dce_info_caps_def { -- cgit From 7c6981e7ca74be39a16a770315606d495a860275 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 26 Jun 2020 14:30:29 -0400 Subject: drm/amd/display: populate new dml variable Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index b0064087b9bb..afdd4f0d9d71 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -467,7 +467,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.AudioSampleLayout[mode_lib->vba.NumberOfActivePlanes] = 1; mode_lib->vba.DRAMClockChangeLatencyOverride = 0.0; - mode_lib->vba.DSCEnabled[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable; + mode_lib->vba.DSCEnabled[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable;; + mode_lib->vba.DSCEnable[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable; mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_slices; mode_lib->vba.DSCInputBitPerComponent[mode_lib->vba.NumberOfActivePlanes] = -- cgit From 7edac0d3fbf58db6a807bad5c5e264bb830906aa Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 30 Jun 2020 11:16:05 -0400 Subject: drm/amd/display: Fix logger context [Why&How] use correct logger context Signed-off-by: Harry Wentland Reviewed-by: Roman Li Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 4e6e18bbef5d..72743058836d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -71,8 +71,9 @@ enum dentist_divider_range { #define CTX \ clk_mgr->base.ctx + #define DC_LOGGER \ - clk_mgr->ctx->logger + clk_mgr->base.ctx->logger -- cgit From e106c96e2e0a0b9c1305d54480b60cb670d7fd1b Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 23 Jul 2020 13:06:23 -0400 Subject: drm/amd/display: AMD OUI (DPCD 0x00300) skipped on some sink [Why] Sink OUI supported cap is not set so driver skips programming it. [How] Revert the change the skips OUI programming if the cap is not set Signed-off-by: Aric Cyr Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index d7d2dcd49c06..9bc03f26efda 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4339,22 +4339,6 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) void dpcd_set_source_specific_data(struct dc_link *link) { - uint8_t dspc = 0; - enum dc_status ret; - - ret = core_link_read_dpcd(link, DP_DOWN_STREAM_PORT_COUNT, &dspc, - sizeof(dspc)); - - if (ret != DC_OK) { - DC_LOG_ERROR("Error in DP aux read transaction," - " not writing source specific data\n"); - return; - } - - /* Return if OUI unsupported */ - if (!(dspc & DP_OUI_SUPPORT)) - return; - if (!link->dc->vendor_signature.is_valid) { struct dpcd_amd_signature amd_signature; amd_signature.AMD_IEEE_TxSignature_byte1 = 0x0; -- cgit From bd42538f1f40a6de16eead6ab01a1c214e6894e4 Mon Sep 17 00:00:00 2001 From: "JinZe.Xu" Date: Tue, 21 Jul 2020 17:52:41 +0800 Subject: drm/amd/display: Use helper function to check for HDMI signal [How] Use dc_is_hdmi_signal to determine signal type. Signed-off-by: JinZe.Xu Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 071c7b32b282..4bd6e03a7ef3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3282,7 +3282,7 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx) dc_is_virtual_signal(pipe_ctx->stream->signal)) return; - if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { core_link_set_avmute(pipe_ctx, true); } -- cgit From ab2cf4c86b24821b6a4164a20d46961da1686634 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 17 Jul 2020 13:19:27 -0400 Subject: drm/amd/display: Change null plane state swizzle mode to 4kb_s [Why] During SetPathMode and UpdatePlanes, the plane state can be null. We default to linear swizzle mode when plane state is null. This resulted in bandwidth validation failing when trying to set 8K60 mode (which previously passed validation during rebuild timing list). [How] Change the default swizzle mode from linear to 4kb_s and update pitch accordingly. Signed-off-by: George Shen Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2a5e7175926a..790baf552695 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2223,7 +2223,7 @@ int dcn20_populate_dml_pipes_from_context( if (!res_ctx->pipe_ctx[i].plane_state) { pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled; pipes[pipe_cnt].pipe.src.source_scan = dm_horz; - pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_linear; + pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s; pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile; pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable; if (pipes[pipe_cnt].pipe.src.viewport_width > 1920) @@ -2235,7 +2235,7 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; - pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */ + pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ -- cgit From d6a4e5e9fed072702a03f45584df7e74a8ba79ee Mon Sep 17 00:00:00 2001 From: Igor Kravchenko Date: Fri, 24 Jul 2020 11:10:40 -0400 Subject: drm/amd/display: Display goes blank after inst [why] Display goes blank after driver installation. Aux tuning parameters must be used for 2.x only. Wrong dc_golden_table offset was used. [How] Implement a new enc3_hw_init function without VBIOS constants usage to be called for 3.x Calculate dc_golden_table offset using sum of base dce_info offset and golden table offset Signed-off-by: Igor Kravchenko Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 +- .../amd/display/dc/dcn30/dcn30_dio_link_encoder.c | 53 +++++++++++++++++++++- .../amd/display/dc/dcn30/dcn30_dio_link_encoder.h | 2 + 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index f8db92fed9cf..078b7e344185 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2101,7 +2101,7 @@ static struct atom_dc_golden_table_v1 *bios_get_golden_table( DATA_TABLES(dce_info)); if (!disp_cntl_tbl_4_4) return NULL; - dc_golden_offset = disp_cntl_tbl_4_4->dc_golden_table_offset; + dc_golden_offset = DATA_TABLES(dce_info) + disp_cntl_tbl_4_4->dc_golden_table_offset; *dc_golden_table_ver = disp_cntl_tbl_4_4->dc_golden_table_ver; break; } @@ -2115,7 +2115,7 @@ static struct atom_dc_golden_table_v1 *bios_get_golden_table( return NULL; return GET_IMAGE(struct atom_dc_golden_table_v1, - dc_golden_offset); + dc_golden_offset); } static enum bp_result bios_get_atom_dc_golden_table( diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c index c29326e9856a..2ae159e2dd6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c @@ -62,7 +62,7 @@ static const struct link_encoder_funcs dcn30_link_enc_funcs = { .read_state = link_enc2_read_state, .validate_output_with_stream = dcn30_link_encoder_validate_output_with_stream, - .hw_init = enc2_hw_init, + .hw_init = enc3_hw_init, .setup = dcn10_link_encoder_setup, .enable_tmds_output = dcn10_link_encoder_enable_tmds_output, .enable_dp_output = dcn20_link_encoder_enable_dp_output, @@ -203,3 +203,54 @@ void dcn30_link_encoder_construct( enc10->base.features.flags.bits.HDMI_6GB_EN = 0; } } + +#define AUX_REG(reg)\ + (enc10->aux_regs->reg) + +#define AUX_REG_READ(reg_name) \ + dm_read_reg(CTX, AUX_REG(reg_name)) + +#define AUX_REG_WRITE(reg_name, val) \ + dm_write_reg(CTX, AUX_REG(reg_name), val) +void enc3_hw_init(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + +/* + 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2 + 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4 + 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8 + 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16 + 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32 + 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64 + 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128 + 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256 +*/ + +/* + AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0, + AUX_RX_START_WINDOW = 1 [6:4] + AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8] + AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1 + AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0 + AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1 + AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 + AUX_RX_DETECTION_THRESHOLD [30:28] = 1 +*/ + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + + //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; + // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk + // 27MHz -> 0xd + // 100MHz -> 0x32 + // 48MHz -> 0x18 + + // Set TMDS_CTL0 to 1. This is a legacy setting. + REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1); + + dcn10_aux_initialize(enc10); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h index 585d1ce63db1..8e9fd59ccde8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h @@ -73,4 +73,6 @@ void dcn30_link_encoder_construct( const struct dcn10_link_enc_shift *link_shift, const struct dcn10_link_enc_mask *link_mask); +void enc3_hw_init(struct link_encoder *enc); + #endif /* __DC_LINK_ENCODER__DCN30_H__ */ -- cgit From 95a2687687f832a7f8fa32bdfe42a530796b71a2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Jul 2020 15:21:33 -0400 Subject: drm/amdgpu/smu: rework i2c adpater registration The i2c init/fini functions just register the i2c adapter. There is no need to call them during hw init/fini. They only need to be called once per driver init/fini. The previous behavior broke runtime pm because we unregistered the i2c adapter during suspend. Tested-by: Tom St Denis Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 12 ++++++------ drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 14 -------------- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 14 -------------- drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c | 14 -------------- 4 files changed, 6 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 8c624f1f33ba..ceef149d3410 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -579,6 +579,10 @@ static int smu_smc_table_sw_init(struct smu_context *smu) if (ret) return ret; + ret = smu_i2c_init(smu, &smu->adev->pm.smu_i2c); + if (ret) + return ret; + return 0; } @@ -586,6 +590,8 @@ static int smu_smc_table_sw_fini(struct smu_context *smu) { int ret; + smu_i2c_fini(smu, &smu->adev->pm.smu_i2c); + ret = smu_free_memory_pool(smu); if (ret) return ret; @@ -844,10 +850,6 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } - ret = smu_i2c_init(smu, &adev->pm.smu_i2c); - if (ret) - return ret; - ret = smu_disable_umc_cdr_12gbps_workaround(smu); if (ret) { dev_err(adev->dev, "Workaround failed to disable UMC CDR feature on 12Gbps SKU!\n"); @@ -1046,8 +1048,6 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; - smu_i2c_fini(smu, &adev->pm.smu_i2c); - cancel_work_sync(&smu->throttling_logging_work); ret = smu_disable_thermal_alert(smu); diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3b9182c8c53f..3b2ecb52a36f 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -2080,22 +2080,11 @@ static const struct i2c_algorithm arcturus_i2c_algo = { .functionality = arcturus_i2c_func, }; -static bool arcturus_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int arcturus_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (arcturus_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2111,9 +2100,6 @@ static int arcturus_i2c_control_init(struct smu_context *smu, struct i2c_adapter static void arcturus_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!arcturus_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 6aaf483858a0..c33bdc6747f2 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -2457,22 +2457,11 @@ static const struct i2c_algorithm navi10_i2c_algo = { .functionality = navi10_i2c_func, }; -static bool navi10_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int navi10_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (navi10_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2488,9 +2477,6 @@ static int navi10_i2c_control_init(struct smu_context *smu, struct i2c_adapter * static void navi10_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!navi10_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index f64a1be94cb8..f373e2d0d31c 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -2630,22 +2630,11 @@ static const struct i2c_algorithm sienna_cichlid_i2c_algo = { .functionality = sienna_cichlid_i2c_func, }; -static bool sienna_cichlid_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int sienna_cichlid_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (sienna_cichlid_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2661,9 +2650,6 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu, struct i2c_a static void sienna_cichlid_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!sienna_cichlid_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } -- cgit From 86d709ce30eaa65706090865662a08d7bdd30c54 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 6 Aug 2020 19:38:02 +0800 Subject: x86/acrn: Allow ACRN guest to use X2APIC mode The ACRN Hypervisor did not support x2APIC and thus x2APIC support was disabled by always returning false when VM checked for x2APIC support. ACRN received full support of x2APIC and exports the capability through CPUID feature bits. Let VM decide if it needs to switch to x2APIC mode according to CPUID features. Originally-by: Yakui Zhao Signed-off-by: Shuo Liu Signed-off-by: Ingo Molnar Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20200806113802.9325-1-shuo.a.liu@intel.com --- arch/x86/kernel/cpu/acrn.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index 1da9b1c9a2db..3b08cdfc6514 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -29,12 +30,7 @@ static void __init acrn_init_platform(void) static bool acrn_x2apic_available(void) { - /* - * x2apic is not supported for now. Future enablement will have to check - * X86_FEATURE_X2APIC to determine whether x2apic is supported in the - * guest. - */ - return false; + return boot_cpu_has(X86_FEATURE_X2APIC); } static void (*acrn_intr_handler)(void); -- cgit From 4c7bfa383efd837d4ab8f86ef05886959ed8bfe5 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 6 Aug 2020 19:41:11 +0800 Subject: x86/acrn: Remove redundant chars from ACRN signature hypervisor_cpuid_base() only handles 12 chars of the hypervisor signature string but is provided with 14 chars. Remove the redundancy. Additionally, replace the user space uint32_t with preferred kernel type u32. Signed-off-by: Shuo Liu Signed-off-by: Ingo Molnar Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20200806114111.9448-1-shuo.a.liu@intel.com --- arch/x86/kernel/cpu/acrn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index 3b08cdfc6514..0b2c03943ac6 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -17,9 +17,9 @@ #include #include -static uint32_t __init acrn_detect(void) +static u32 __init acrn_detect(void) { - return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0); + return hypervisor_cpuid_base("ACRNACRNACRN", 0); } static void __init acrn_init_platform(void) -- cgit From a3e1c3bb24e2ff2927af5e30c2bebe669bb84196 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 4 Aug 2020 12:49:31 +0800 Subject: x86/crash: Correct the address boundary of function parameters Let's carefully handle the boundary of the function parameter to make sure that the arguments passed doesn't exceed the address range. Signed-off-by: Lianbo Jiang Signed-off-by: Ingo Molnar Acked-by: Dave Young Link: https://lore.kernel.org/r/20200804044933.1973-2-lijiang@redhat.com --- arch/x86/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index fd87b59452a3..a8f3af257e26 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -230,7 +230,7 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) int ret = 0; /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, 1<<20); + ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1); if (ret) return ret; -- cgit From a2e9a95d2190ef55bf0724ecdf8a466d393a86b6 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 4 Aug 2020 12:49:32 +0800 Subject: kexec: Improve & fix crash_exclude_mem_range() to handle overlapping ranges The crash_exclude_mem_range() function can only handle one memory region a time. It will fail in the case in which the passed in area covers several memory regions. In this case, it will only exclude the first region, then return, but leave the later regions unsolved. E.g in a NEC system with two usable RAM regions inside the low 1M: ... BIOS-e820: [mem 0x0000000000000000-0x000000000003efff] usable BIOS-e820: [mem 0x000000000003f000-0x000000000003ffff] reserved BIOS-e820: [mem 0x0000000000040000-0x000000000009ffff] usable It will only exclude the memory region [0, 0x3efff], the memory region [0x40000, 0x9ffff] will still be added into /proc/vmcore, which may cause the following failure when dumping vmcore: ioremap on RAM at 0x0000000000040000 - 0x0000000000040fff WARNING: CPU: 0 PID: 665 at arch/x86/mm/ioremap.c:186 __ioremap_caller+0x2c7/0x2e0 ... RIP: 0010:__ioremap_caller+0x2c7/0x2e0 ... cp: error reading '/proc/vmcore': Cannot allocate memory kdump: saving vmcore failed In order to fix this bug, let's extend the crash_exclude_mem_range() to handle the overlapping ranges. [ mingo: Amended the changelog. ] Signed-off-by: Lianbo Jiang Signed-off-by: Ingo Molnar Acked-by: Dave Young Link: https://lore.kernel.org/r/20200804044933.1973-3-lijiang@redhat.com --- kernel/kexec_file.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 94661d2d13ad..97fa68267197 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -1157,24 +1157,26 @@ int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend) { int i, j; - unsigned long long start, end; + unsigned long long start, end, p_start, p_end; struct crash_mem_range temp_range = {0, 0}; for (i = 0; i < mem->nr_ranges; i++) { start = mem->ranges[i].start; end = mem->ranges[i].end; + p_start = mstart; + p_end = mend; if (mstart > end || mend < start) continue; /* Truncate any area outside of range */ if (mstart < start) - mstart = start; + p_start = start; if (mend > end) - mend = end; + p_end = end; /* Found completely overlapping range */ - if (mstart == start && mend == end) { + if (p_start == start && p_end == end) { mem->ranges[i].start = 0; mem->ranges[i].end = 0; if (i < mem->nr_ranges - 1) { @@ -1185,20 +1187,29 @@ int crash_exclude_mem_range(struct crash_mem *mem, mem->ranges[j].end = mem->ranges[j+1].end; } + + /* + * Continue to check if there are another overlapping ranges + * from the current position because of shifting the above + * mem ranges. + */ + i--; + mem->nr_ranges--; + continue; } mem->nr_ranges--; return 0; } - if (mstart > start && mend < end) { + if (p_start > start && p_end < end) { /* Split original range */ - mem->ranges[i].end = mstart - 1; - temp_range.start = mend + 1; + mem->ranges[i].end = p_start - 1; + temp_range.start = p_end + 1; temp_range.end = end; - } else if (mstart != start) - mem->ranges[i].end = mstart - 1; + } else if (p_start != start) + mem->ranges[i].end = p_start - 1; else - mem->ranges[i].start = mend + 1; + mem->ranges[i].start = p_end + 1; break; } @@ -1243,7 +1254,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). * I think this is required by tools like gdb. So same physical - * memory will be mapped in two elf headers. One will contain kernel + * memory will be mapped in two elf headers. One will contain kernel * text virtual addresses and other will have __va(physical) addresses. */ @@ -1270,7 +1281,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, ehdr->e_ehsize = sizeof(Elf64_Ehdr); ehdr->e_phentsize = sizeof(Elf64_Phdr); - /* Prepare one phdr of type PT_NOTE for each present cpu */ + /* Prepare one phdr of type PT_NOTE for each present CPU */ for_each_present_cpu(cpu) { phdr->p_type = PT_NOTE; notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); -- cgit From 475f63ae63b5102ae6423d1712333929d04d6ecc Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 4 Aug 2020 12:49:33 +0800 Subject: kexec_file: Correctly output debugging information for the PT_LOAD ELF header Currently, when we enable the debugging switch to debug kexec_file, we always get the following incorrect results: kexec_file: Crash PT_LOAD elf header. phdr=00000000c988639b vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=51 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=000000003cca69a0 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=52 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000c584cb9f vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=53 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000cf85d57f vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=54 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000a4a8f847 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=55 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000272ec49f vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=56 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000ea0b65de vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=57 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=000000001f5e490c vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=58 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000dfe4109e vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=59 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000480ed2b6 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=60 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=0000000080b65151 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=61 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=0000000024e31c5e vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=62 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000332e0385 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=63 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=000000002754d5da vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=64 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=00000000783320dd vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=65 p_offset=0x0 kexec_file: Crash PT_LOAD elf header. phdr=0000000076fe5b64 vaddr=0x0, paddr=0x0, sz=0x0 e_phnum=66 p_offset=0x0 The reason is that kernel always prints the values of the next PT_LOAD instead of the current PT_LOAD. Change it to ensure that we can get the correct debugging information. [ mingo: Amended changelog, capitalized "ELF". ] Signed-off-by: Lianbo Jiang Signed-off-by: Ingo Molnar Acked-by: Dave Young Link: https://lore.kernel.org/r/20200804044933.1973-4-lijiang@redhat.com --- kernel/kexec_file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 97fa68267197..3f7867c1820f 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -1246,7 +1246,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, unsigned long long notes_addr; unsigned long mstart, mend; - /* extra phdr for vmcoreinfo elf note */ + /* extra phdr for vmcoreinfo ELF note */ nr_phdr = nr_cpus + 1; nr_phdr += mem->nr_ranges; @@ -1254,7 +1254,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). * I think this is required by tools like gdb. So same physical - * memory will be mapped in two elf headers. One will contain kernel + * memory will be mapped in two ELF headers. One will contain kernel * text virtual addresses and other will have __va(physical) addresses. */ @@ -1323,10 +1323,10 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; phdr->p_align = 0; ehdr->e_phnum++; - phdr++; - pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", + pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, ehdr->e_phnum, phdr->p_offset); + phdr++; } *addr = buf; -- cgit From 7d98585860d845e36ee612832a5ff021f201dbaf Mon Sep 17 00:00:00 2001 From: Dilip Kota Date: Mon, 3 Aug 2020 15:56:36 +0800 Subject: x86/tsr: Fix tsc frequency enumeration bug on Lightning Mountain SoC Frequency descriptor of Lightning Mountain SoC doesn't have all the frequency entries so resulting in the below failure causing a kernel hang: Error MSR_FSB_FREQ index 15 is unknown tsc: Fast TSC calibration failed So, add all the frequency entries in the Lightning Mountain SoC frequency descriptor. Fixes: 0cc5359d8fd45 ("x86/cpu: Update init data for new Airmont CPU model") Fixes: 812c2d7506fd ("x86/tsc_msr: Use named struct initializers") Signed-off-by: Dilip Kota Signed-off-by: Ingo Molnar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/211c643ae217604b46cbec43a2c0423946dc7d2d.1596440057.git.eswara.kota@linux.intel.com --- arch/x86/kernel/tsc_msr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 4fec6f3a1858..a654a9b4b77c 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -133,10 +133,15 @@ static const struct freq_desc freq_desc_ann = { .mask = 0x0f, }; -/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */ +/* + * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz + * Frequency step for Lightning Mountain SoC is fixed to 78 MHz, + * so all the frequency entries are 78000. + */ static const struct freq_desc freq_desc_lgm = { .use_msr_plat = true, - .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, + .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000, + 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, .mask = 0x0f, }; -- cgit From 52416ffcf823ee11aa19792715664ab94757f111 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Mon, 3 Aug 2020 13:49:48 +0800 Subject: x86/purgatory: Don't generate debug info for purgatory.ro Purgatory.ro is a standalone binary that is not linked against the rest of the kernel. Its image is copied into an array that is linked to the kernel, and from there kexec relocates it wherever it desires. Unlike the debug info for vmlinux, which can be used for analyzing crash such info is useless in purgatory.ro. And discarding them can save about 200K space. Original: 259080 kexec-purgatory.o Stripped debug info: 29152 kexec-purgatory.o Signed-off-by: Pingfan Liu Signed-off-by: Ingo Molnar Reviewed-by: Nick Desaulniers Reviewed-by: Steve Wahl Acked-by: Dave Young Link: https://lore.kernel.org/r/1596433788-3784-1-git-send-email-kernelfans@gmail.com --- arch/x86/purgatory/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 088bd764e0b7..d24b43a4451a 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -32,7 +32,7 @@ KCOV_INSTRUMENT := n # make up the standalone purgatory.ro PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel -PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss +PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0 PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector) @@ -64,6 +64,9 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_string.o += $(PURGATORY_CFLAGS) +AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) -- cgit From 76d10256a97a7cab72b123d54b766a3c17da658c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 20 Jul 2020 06:50:51 -0700 Subject: x86/fpu/xstate: Fix an xstate size check warning with architectural LBRs An xstate size check warning is triggered on machines which support Architectural LBRs. XSAVE consistency problem, dumping leaves WARNING: CPU: 0 PID: 0 at arch/x86/kernel/fpu/xstate.c:649 fpu__init_system_xstate+0x4d4/0xd0e Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted intel-arch_lbr+ RIP: 0010:fpu__init_system_xstate+0x4d4/0xd0e The xstate size check routine, init_xstate_size(), compares the size retrieved from the hardware with the size of task->fpu, which is calculated by the software. The size from the hardware is the total size of the enabled xstates in XCR0 | IA32_XSS. Architectural LBR state is a dynamic supervisor feature, which sets the corresponding bit in the IA32_XSS at boot time. The size from the hardware includes the size of the Architectural LBR state. However, a dynamic supervisor feature doesn't allocate a buffer in the task->fpu. The size of task->fpu doesn't include the size of the Architectural LBR state. The mismatch will trigger the warning. Three options as below were considered to fix the issue: - Correct the size from the hardware by subtracting the size of the dynamic supervisor features. The purpose of the check is to compare the size CPU told with the size of the XSAVE buffer, which is calculated by the software. If the software mucks with the number from hardware, it removes the value of the check. This option is not a good option. - Prevent the hardware from counting the size of the dynamic supervisor feature by temporarily removing the corresponding bits in IA32_XSS. Two extra MSR writes are required to flip the IA32_XSS. The option is not pretty, but it is workable. The check is only called once at early boot time. The synchronization or context-switching doesn't need to be worried. This option is implemented here. - Remove the check entirely, because the check hasn't found any real problems. The option may be an alternative as option 2. This option is not implemented here. Add a new function, get_xsaves_size_no_dynamic(), which retrieves the total size without the dynamic supervisor features from the hardware. The size will be used to compare with the size of task->fpu. Fixes: f0dccc9da4c0 ("x86/fpu/xstate: Support dynamic supervisor feature for LBR") Reported-by: Chang S. Bae Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Dave Hansen Link: https://lore.kernel.org/r/1595253051-75374-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/kernel/fpu/xstate.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index be2a68a09d19..6073e342a1ed 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -611,6 +611,10 @@ static void check_xstate_against_struct(int nr) * This essentially double-checks what the cpu told us about * how large the XSAVE buffer needs to be. We are recalculating * it to be safe. + * + * Dynamic XSAVE features allocate their own buffers and are not + * covered by these checks. Only the size of the buffer for task->fpu + * is checked here. */ static void do_extra_xstate_size_checks(void) { @@ -673,6 +677,33 @@ static unsigned int __init get_xsaves_size(void) return ebx; } +/* + * Get the total size of the enabled xstates without the dynamic supervisor + * features. + */ +static unsigned int __init get_xsaves_size_no_dynamic(void) +{ + u64 mask = xfeatures_mask_dynamic(); + unsigned int size; + + if (!mask) + return get_xsaves_size(); + + /* Disable dynamic features. */ + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); + + /* + * Ask the hardware what size is required of the buffer. + * This is the size required for the task->fpu buffer. + */ + size = get_xsaves_size(); + + /* Re-enable dynamic features so XSAVES will work on them again. */ + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); + + return size; +} + static unsigned int __init get_xsave_size(void) { unsigned int eax, ebx, ecx, edx; @@ -710,7 +741,7 @@ static int __init init_xstate_size(void) xsave_size = get_xsave_size(); if (boot_cpu_has(X86_FEATURE_XSAVES)) - possible_xstate_size = get_xsaves_size(); + possible_xstate_size = get_xsaves_size_no_dynamic(); else possible_xstate_size = xsave_size; -- cgit From f29dfa53cc8ae6ad93bae619bcc0bf45cab344f7 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 16 Jul 2020 12:23:59 -0700 Subject: x86/bugs/multihit: Fix mitigation reporting when VMX is not in use On systems that have virtualization disabled or unsupported, sysfs mitigation for X86_BUG_ITLB_MULTIHIT is reported incorrectly as: $ cat /sys/devices/system/cpu/vulnerabilities/itlb_multihit KVM: Vulnerable System is not vulnerable to DoS attack from a rogue guest when virtualization is disabled or unsupported in the hardware. Change the mitigation reporting for these cases. Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Reported-by: Nelson Dsouza Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/0ba029932a816179b9d14a30db38f0f11ef1f166.1594925782.git.pawan.kumar.gupta@linux.intel.com --- Documentation/admin-guide/hw-vuln/multihit.rst | 4 ++++ arch/x86/kernel/cpu/bugs.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst index ba9988d8bce5..140e4cec38c3 100644 --- a/Documentation/admin-guide/hw-vuln/multihit.rst +++ b/Documentation/admin-guide/hw-vuln/multihit.rst @@ -80,6 +80,10 @@ The possible values in this file are: - The processor is not vulnerable. * - KVM: Mitigation: Split huge pages - Software changes mitigate this issue. + * - KVM: Mitigation: VMX unsupported + - KVM is not vulnerable because Virtual Machine Extensions (VMX) is not supported. + * - KVM: Mitigation: VMX disabled + - KVM is not vulnerable because Virtual Machine Extensions (VMX) is disabled. * - KVM: Vulnerable - The processor is vulnerable, but no mitigation enabled diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f0b743a2fe9c..d3f0db463f96 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "cpu.h" @@ -1549,7 +1550,12 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t itlb_multihit_show_state(char *buf) { - if (itlb_multihit_kvm_mitigation) + if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX)) + return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + else if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + else if (itlb_multihit_kvm_mitigation) return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); else return sprintf(buf, "KVM: Vulnerable\n"); -- cgit From 6bcaf41f9613278cd5897fc80ab93033bda8efaa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Aug 2020 17:47:57 -0700 Subject: selftests/bpf: Prevent runqslower from racing on building bpftool runqslower's Makefile is building/installing bpftool into $(OUTPUT)/sbin/bpftool, which coincides with $(DEFAULT_BPFTOOL). In practice this means that often when building selftests from scratch (after `make clean`), selftests are racing with runqslower to simultaneously build bpftool and one of the two processes fail due to file being busy. Prevent this race by explicitly order-depending on $(BPFTOOL_DEFAULT). Fixes: a2c9652f751e ("selftests: Refactor build to remove tools/lib/bpf from include path") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805004757.2960750-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48..48425f9251b5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -142,7 +142,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +166,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool -- cgit From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/linux/bpf.h | 10 ++++---- include/uapi/linux/bpf.h | 15 ++++++------ kernel/bpf/bpf_iter.c | 58 +++++++++++++++++++++++------------------------ kernel/bpf/map_iter.c | 37 +++++++++++++++++++++++------- kernel/bpf/syscall.c | 2 +- net/core/bpf_sk_storage.c | 37 +++++++++++++++++++++++------- 6 files changed, 102 insertions(+), 57 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 363b9cafc2d8..b6715964b685 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link) struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); - if (iter_link->aux.map) - bpf_map_put_with_uref(iter_link->aux.map); + if (iter_link->tinfo->reg_info->detach_target) + iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link) int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + union bpf_iter_link_info __user *ulinfo; struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; - struct bpf_iter_aux_info aux = {}; + union bpf_iter_link_info linfo; struct bpf_iter_link *link; - u32 prog_btf_id, target_fd; + u32 prog_btf_id, linfo_len; bool existed = false; - struct bpf_map *map; int err; + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + memset(&linfo, 0, sizeof(union bpf_iter_link_info)); + + ulinfo = u64_to_user_ptr(attr->link_create.iter_info); + linfo_len = attr->link_create.iter_info_len; + if (!ulinfo ^ !linfo_len) + return -EINVAL; + + if (ulinfo) { + err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), + linfo_len); + if (err) + return err; + linfo_len = min_t(u32, linfo_len, sizeof(linfo)); + if (copy_from_user(&linfo, ulinfo, linfo_len)) + return -EFAULT; + } + prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; - /* Make sure user supplied flags are target expected. */ - target_fd = attr->link_create.target_fd; - if (attr->link_create.flags != tinfo->reg_info->req_linfo) - return -EINVAL; - if (!attr->link_create.flags && target_fd) - return -EINVAL; - link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } - if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { - map = bpf_map_get_with_uref(target_fd); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto cleanup_link; - } - - aux.map = map; - err = tinfo->reg_info->check_target(prog, &aux); + if (tinfo->reg_info->attach_target) { + err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { - bpf_map_put_with_uref(map); - goto cleanup_link; + bpf_link_cleanup(&link_primer); + return err; } - - link->aux.map = map; } return bpf_link_settle(&link_primer); - -cleanup_link: - bpf_link_cleanup(&link_primer); - return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index fbe1f557cb88..af86048e5afd 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = { .seq_info = &bpf_map_seq_info, }; -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { u32 key_acc_size, value_acc_size, key_size, value_size; - struct bpf_map *map = aux->map; + struct bpf_map *map; bool is_percpu = false; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) - return -EINVAL; + goto put_map; key_acc_size = prog->aux->max_rdonly_access; value_acc_size = prog->aux->max_rdwr_access; @@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else value_size = round_up(map->value_size, 8) * num_possible_cpus(); - if (key_acc_size > key_size || value_acc_size > value_size) - return -EACCES; + if (key_acc_size > key_size || value_acc_size > value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, @@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, static const struct bpf_iter_reg bpf_map_elem_reg_info = { .target = "bpf_map_elem", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map_elem, key), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2f343ce15747..86299a292214 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog * return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.flags +#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len static int link_create(union bpf_attr *attr) { enum bpf_prog_type ptype; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d3377c90a291..b988f48153a4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, return 0; } -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { - struct bpf_map *map = aux->map; + struct bpf_map *map; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) - return -EINVAL; + goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) - return -EACCES; + if (prog->aux->max_rdonly_access > map->value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } static const struct seq_operations bpf_sk_storage_map_seq_ops = { @@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { .target = "bpf_sk_storage_map", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), -- cgit From 74fc097de327b37e8fe3ff580ce7ffaa7c1740dd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:58 -0700 Subject: tools/bpf: Support new uapi for map element bpf iterator Previous commit adjusted kernel uapi for map element bpf iterator. This patch adjusted libbpf API due to uapi change. bpftool and bpf_iter selftests are also changed accordingly. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055058.1457623-1-yhs@fb.com --- tools/bpf/bpftool/iter.c | 9 +++-- tools/include/uapi/linux/bpf.h | 15 +++++---- tools/lib/bpf/bpf.c | 3 ++ tools/lib/bpf/bpf.h | 5 ++- tools/lib/bpf/libbpf.c | 6 ++-- tools/lib/bpf/libbpf.h | 5 +-- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 40 ++++++++++++++++++----- 7 files changed, 58 insertions(+), 25 deletions(-) diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba..3b1aad7535dd 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d..0750681057c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f4..015d13f25fcc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29c..0a06124f7999 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc..5ecb4069a9f0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f..7375d9a6d242 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); -- cgit From 932ac54a3e59335a847f7682b5124a788ab3c798 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Thu, 6 Aug 2020 07:33:59 +0900 Subject: libbf: Fix uninitialized pointer at btf__parse_raw() Recently, from commit 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs"), new API has been added to libbpf that allows to parse BTF from raw data file (btf__parse_raw()). The commit derives build failure of samples/bpf due to improper access of uninitialized pointer at btf_parse_raw(). btf.c: In function btf__parse_raw: btf.c:625:28: error: btf may be used uninitialized in this function 625 | return err ? ERR_PTR(err) : btf; | ~~~~~~~~~~~~~~~~~~~^~~~~ This commit fixes the build failure of samples/bpf by adding code of initializing btf pointer as NULL. Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805223359.32109-1-danieltimlee@gmail.com --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a04563..4843e44916f7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -564,8 +564,8 @@ done: struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; -- cgit From d48556f45608921fa07cb882f9dd15a8a1203427 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 08:52:25 -0700 Subject: bpf: Add missing return to resolve_btfids int sets_patch(struct object *obj) doesn't have a 'return 0' at the end. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806155225.637202-1-sdf@google.com --- tools/bpf/resolve_btfids/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a23..4d9ecb975862 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -566,6 +566,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) -- cgit From 0d360d64b01231cdb36e1936de889f308fd9317f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 11:26:12 -0700 Subject: bpf: Remove inline from bpf_do_trace_printk I get the following error during compilation on my side: kernel/trace/bpf_trace.c: In function 'bpf_do_trace_printk': kernel/trace/bpf_trace.c:386:34: error: function 'bpf_do_trace_printk' can never be inlined because it uses variable argument lists static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) ^ Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806182612.1390883-1-sdf@google.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb91ef902cc4..a8d4f253ed77 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -383,7 +383,7 @@ static DEFINE_RAW_SPINLOCK(trace_printk_lock); #define BPF_TRACE_PRINTK_SIZE 1024 -static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) { static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; -- cgit From 6fc5916cc256b8e92cc7ad3b34cc4d2a7efa1d5c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Aug 2020 20:39:59 +0200 Subject: selftests: bpf: Switch off timeout Several bpf tests are interrupted by the default timeout of 45 seconds added by commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test"). In my case it was test_progs, test_tunnel.sh, test_lwt_ip_encap.sh and test_xdping.sh. There's not much value in having a timeout for bpf tests, switch it off. Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/7a9198ed10917f4ecab4a3dd74bcda1200791efd.1596739059.git.jbenc@redhat.com --- tools/testing/selftests/bpf/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/bpf/settings diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 -- cgit From 929e54a989680c6f134b02293732030b897475dc Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Thu, 6 Aug 2020 18:42:24 +0800 Subject: bpf: Fix compilation warning of selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang compiler version: 12.0.0 The following warning appears during the selftests/bpf compilation: prog_tests/send_signal.c:51:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] 51 | write(pipe_c2p[1], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ prog_tests/send_signal.c:54:3: warning: ignoring return value of ‘read’, declared with attribute warn_unused_result [-Wunused-result] 54 | read(pipe_p2c[0], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~ ...... prog_tests/stacktrace_build_id_nmi.c:13:2: warning: ignoring return value of ‘fscanf’,declared with attribute warn_unused_result [-Wunused-resul] 13 | fscanf(f, "%llu", &sample_freq); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:133:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 133 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:138:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 138 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:143:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 143 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ Add code that fix compilation warning about ignoring return value and handles any errors; Check return value of library`s API make the code more secure. Signed-off-by: Jianlin Lv Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806104224.95306-1-Jianlin.Lv@arm.com --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 18 ++++++++---------- .../selftests/bpf/prog_tests/stacktrace_build_id_nmi.c | 4 +++- tools/testing/selftests/bpf/test_tcpnotify_user.c | 13 ++++++++++--- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb95..7043e6ded0e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92..11a769e18f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab..73da7fe8c152 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { -- cgit From c7ca03c216acb14466a713fedf1b9f2c24994ef2 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 5 Aug 2020 18:50:40 -0700 Subject: drivers/net/wan/lapbether: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Changed to use needed_headroom instead of hard_header_len to request necessary headroom to be allocated In net/packet/af_packet.c, the function packet_snd first reserves a headroom of length (dev->hard_header_len + dev->needed_headroom). Then if the socket is a SOCK_DGRAM socket, it calls dev_hard_header, which calls dev->header_ops->create, to create the link layer header. If the socket is a SOCK_RAW socket, it "un-reserves" a headroom of length (dev->hard_header_len), and assumes the user to provide the appropriate link layer header. So according to the logic of af_packet.c, dev->hard_header_len should be the length of the header that would be created by dev->header_ops->create. However, this driver doesn't provide dev->header_ops, so logically dev->hard_header_len should be 0. So we should use dev->needed_headroom instead of dev->hard_header_len to request necessary headroom to be allocated. This change fixes kernel panic when this driver is used with AF_PACKET SOCK_RAW sockets. Call stack when panic: [ 168.399197] skbuff: skb_under_panic: text:ffffffff819d95fb len:20 put:14 head:ffff8882704c0a00 data:ffff8882704c09fd tail:0x11 end:0xc0 dev:veth0 ... [ 168.399255] Call Trace: [ 168.399259] skb_push.cold+0x14/0x24 [ 168.399262] eth_header+0x2b/0xc0 [ 168.399267] lapbeth_data_transmit+0x9a/0xb0 [lapbether] [ 168.399275] lapb_data_transmit+0x22/0x2c [lapb] [ 168.399277] lapb_transmit_buffer+0x71/0xb0 [lapb] [ 168.399279] lapb_kick+0xe3/0x1c0 [lapb] [ 168.399281] lapb_data_request+0x76/0xc0 [lapb] [ 168.399283] lapbeth_xmit+0x56/0x90 [lapbether] [ 168.399286] dev_hard_start_xmit+0x91/0x1f0 [ 168.399289] ? irq_init_percpu_irqstack+0xc0/0x100 [ 168.399291] __dev_queue_xmit+0x721/0x8e0 [ 168.399295] ? packet_parse_headers.isra.0+0xd2/0x110 [ 168.399297] dev_queue_xmit+0x10/0x20 [ 168.399298] packet_sendmsg+0xbf0/0x19b0 ...... Cc: Willem de Bruijn Cc: Martin Schiller Cc: Brian Norris Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index b2868433718f..1ea15f2123ed 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -157,6 +157,12 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb, if (!netif_running(dev)) goto drop; + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) + goto drop; + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -305,6 +311,7 @@ static void lapbeth_setup(struct net_device *dev) dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; + dev->hard_header_len = 0; dev->mtu = 1000; dev->addr_len = 0; } @@ -331,7 +338,8 @@ static int lapbeth_new_device(struct net_device *dev) * then this driver prepends a length field of 2 bytes, * then the underlying Ethernet device prepends its own header. */ - ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len; + ndev->needed_headroom = -1 + 3 + 2 + dev->hard_header_len + + dev->needed_headroom; lapbeth = netdev_priv(ndev); lapbeth->axdev = ndev; -- cgit From 20b135e48cdda7a7dfccef7286def66e5c9686c3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Aug 2020 12:52:07 +0300 Subject: cifs: Fix an error pointer dereference in cifs_mount() The error handling calls kfree(full_path) so we can't let it be a NULL pointer. There used to be a NULL assignment here but we accidentally deleted it. Add it back. Fixes: 7efd08158261 ("cifs: document and cleanup dfs mount") Signed-off-by: Dan Carpenter Signed-off-by: Steve French Reviewed-by: Paulo Alcantara (SUSE) --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7e3e5e2098eb..2fe6b1ab75c3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4886,6 +4886,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) full_path = build_unc_path_to_root(vol, cifs_sb, !!count); if (IS_ERR(full_path)) { rc = PTR_ERR(full_path); + full_path = NULL; break; } /* Chase referral */ -- cgit From e2d2fded6bdf3f7bb40718a208140dba8b4ec574 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 7 Aug 2020 16:05:12 +0800 Subject: ALSA: hda/realtek: Fix pin default on Intel NUC 8 Rugged The jack on Intel NUC 8 Rugged rear panel doesn't work. The spec [1] states that the jack supports both headphone and microphone, so override a Pin Complex which has both Amp-In and Amp-Out to make the jack work. Node 0x1b fits the requirement, and user confirmed the jack now works with new pin config. [1] https://www.intel.com/content/dam/support/us/en/documents/mini-pcs/NUC8CCH_TechProdSpec.pdf BugLink: https://bugs.launchpad.net/bugs/1875199 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200807080514.15293-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2477f3ed7237..daedcc0adc21 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6159,6 +6159,7 @@ enum { ALC269_FIXUP_CZC_L101, ALC269_FIXUP_LEMOTE_A1802, ALC269_FIXUP_LEMOTE_A190X, + ALC256_FIXUP_INTEL_NUC8_RUGGED, }; static const struct hda_fixup alc269_fixups[] = { @@ -7480,6 +7481,15 @@ static const struct hda_fixup alc269_fixups[] = { }, .chain_id = ALC269_FIXUP_DMIC, }, + [ALC256_FIXUP_INTEL_NUC8_RUGGED] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7777,6 +7787,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), #if 0 /* Below is a quirk table taken from the old code. -- cgit From 608a973bd52ef1705941b18f55e39655fcfe9cdb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 6 Aug 2020 09:22:15 -0700 Subject: Drivers: hv: vmbus: Only notify Hyper-V for die events that are oops Hyper-V currently may be notified of a panic for any die event. But this results in false panic notifications for various user space traps that are die events. Fix this by ignoring die events that aren't oops. Fixes: 510f7aef65bb ("Drivers: hv: vmbus: prefer 'die' notification chain to 'panic'") Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/1596730935-11564-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d69f4efa3719..45c954d75e70 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -87,6 +87,10 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, struct die_args *die = (struct die_args *)args; struct pt_regs *regs = die->regs; + /* Don't notify Hyper-V if the die event is other than oops */ + if (val != DIE_OOPS) + return NOTIFY_DONE; + /* * Hyper-V should be notified only once about a panic. If we will be * doing hyperv_report_panic_msg() later with kmsg data, don't do -- cgit From 7fb20f9e901e6df2f7dc032d88da5abff4117d37 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 7 Aug 2020 12:50:29 +0100 Subject: bpf, doc: Remove references to warning message when using bpf_trace_printk() The BPF helper bpf_trace_printk() no longer uses trace_printk(); it is now triggers a dedicated trace event. Hence the described warning is no longer present, so remove the discussion of it as it may confuse people. Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1596801029-32395-1-git-send-email-alan.maguire@oracle.com --- Documentation/bpf/bpf_design_QA.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 12a246fcf6cb..2df7b067ab93 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -246,17 +246,6 @@ program is loaded the kernel will print warning message, so this helper is only useful for experiments and prototypes. Tracing BPF programs are root only. -Q: bpf_trace_printk() helper warning ------------------------------------- -Q: When bpf_trace_printk() helper is used the kernel prints nasty -warning message. Why is that? - -A: This is done to nudge program authors into better interfaces when -programs need to pass data to user space. Like bpf_perf_event_output() -can be used to efficiently stream data via perf ring buffer. -BPF maps can be used for asynchronous data sharing between kernel -and user space. bpf_trace_printk() should only be used for debugging. - Q: New functionality via kernel modules? ---------------------------------------- Q: Can BPF functionality such as new program or map types, new -- cgit From d5ca590525cfbd87ca307dcf498a566e2e7c1767 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Aug 2020 20:30:57 -0700 Subject: selftests/bpf: Fix silent Makefile output 99aacebecb75 ("selftests: do not use .ONESHELL") removed .ONESHELL, which changes how Makefile "silences" multi-command target recipes. selftests/bpf's Makefile relied (a somewhat unknowingly) on .ONESHELL behavior of silencing all commands within the recipe if the first command contains @ symbol. Removing .ONESHELL exposed this hack. This patch fixes the issue by explicitly silencing each command with $(Q). Also explicitly define fallback rule for building *.o from *.c, instead of relying on non-silent inherited rule. This was causing a non-silent output for bench.o object file. Fixes: 92f7440ecc93 ("selftests/bpf: More succinct Makefile output") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033058.848677-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 48 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 48425f9251b5..a83b5827532f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -181,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -238,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -301,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -321,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -345,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -353,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -367,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -421,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -444,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ -- cgit From b8c1a3090741f349322ad855d2b66d6e9752a60d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Aug 2020 20:31:41 -0700 Subject: bpf: Delete repeated words in comments Drop repeated words in kernel/bpf/: {has, the} Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033141.10437-1-rdunlap@infradead.org --- kernel/bpf/core.c | 2 +- kernel/bpf/verifier.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bde93344164d..ed0b3578867c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1966,7 +1966,7 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array, * @index: the index of the program to replace * * Skips over dummy programs, by not counting them, when calculating - * the the position of the program to replace. + * the position of the program to replace. * * Return: * * 0 - Success diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6ccfce3bf4c..ef938f17b944 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8294,7 +8294,7 @@ static bool stacksafe(struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> + * this stack slot, but current has STACK_MISC -> * this verifier states are not equivalent, * return false to continue verification of this path */ -- cgit From 384b02d6b83bd4c5df96de00658623b4e04e2472 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Fri, 7 Aug 2020 17:35:29 +0800 Subject: i2c: designware: Add device HID for Hygon I2C controller Add device HID HYGO0010 to match the Hygon ACPI Vendor ID (HYGO) that was registered in http://www.uefi.org/acpi_id_list, and the I2C controller on Hygon paltform will use the HID. Signed-off-by: Pu Wen Acked-by: Andy Shevchenko Acked-by: Wolfram Sang Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 1 + drivers/i2c/busses/i2c-designware-platdrv.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index ba2612e9a0eb..f24f6d3f1fa5 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -240,6 +240,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = { { "AMDI0020", APD_ADDR(cz_uart_desc) }, { "AMD0030", }, { "AMD0040", APD_ADDR(st_misc_desc)}, + { "HYGO0010", APD_ADDR(wt_i2c_desc) }, #endif #ifdef CONFIG_ARM64 { "APMC0D0F", APD_ADDR(xgene_i2c_desc) }, diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index a71bc58fc03c..0dfeb2d11603 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -55,6 +55,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = { { "HISI02A1", 0 }, { "HISI02A2", 0 }, { "HISI02A3", 0 }, + { "HYGO0010", ACCESS_INTR_MASK }, { } }; MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match); -- cgit From d58669b093997e4e5f98c38a54f99761657c19d2 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:01 +0530 Subject: ACPI: APD: Change name from ST to FCH AMD SoC general pupose clk is present in new platforms with same MMIO mappings. We can reuse the same clk handler support for other platforms. Hence, changing name from ST(SoC) to FCH(IP) Signed-off-by: Akshu Agrawal Acked-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 14 +++++++------- drivers/clk/x86/clk-st.c | 4 ++-- include/linux/platform_data/clk-fch.h | 17 +++++++++++++++++ include/linux/platform_data/clk-st.h | 17 ----------------- 4 files changed, 26 insertions(+), 26 deletions(-) create mode 100644 include/linux/platform_data/clk-fch.h delete mode 100644 include/linux/platform_data/clk-st.h diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index f24f6d3f1fa5..36319deb24d9 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include @@ -79,11 +79,11 @@ static int misc_check_res(struct acpi_resource *ares, void *data) return !acpi_dev_resource_memory(ares, &res); } -static int st_misc_setup(struct apd_private_data *pdata) +static int fch_misc_setup(struct apd_private_data *pdata) { struct acpi_device *adev = pdata->adev; struct platform_device *clkdev; - struct st_clk_data *clk_data; + struct fch_clk_data *clk_data; struct resource_entry *rentry; struct list_head resource_list; int ret; @@ -106,7 +106,7 @@ static int st_misc_setup(struct apd_private_data *pdata) acpi_dev_free_resource_list(&resource_list); - clkdev = platform_device_register_data(&adev->dev, "clk-st", + clkdev = platform_device_register_data(&adev->dev, "clk-fch", PLATFORM_DEVID_NONE, clk_data, sizeof(*clk_data)); return PTR_ERR_OR_ZERO(clkdev); @@ -135,8 +135,8 @@ static const struct apd_device_desc cz_uart_desc = { .properties = uart_properties, }; -static const struct apd_device_desc st_misc_desc = { - .setup = st_misc_setup, +static const struct apd_device_desc fch_misc_desc = { + .setup = fch_misc_setup, }; #endif @@ -239,7 +239,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = { { "AMD0020", APD_ADDR(cz_uart_desc) }, { "AMDI0020", APD_ADDR(cz_uart_desc) }, { "AMD0030", }, - { "AMD0040", APD_ADDR(st_misc_desc)}, + { "AMD0040", APD_ADDR(fch_misc_desc)}, { "HYGO0010", APD_ADDR(wt_i2c_desc) }, #endif #ifdef CONFIG_ARM64 diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c index 25d4b97aff9b..c2438874d9f2 100644 --- a/drivers/clk/x86/clk-st.c +++ b/drivers/clk/x86/clk-st.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include /* Clock Driving Strength 2 register */ @@ -31,7 +31,7 @@ static struct clk_hw *hws[ST_MAX_CLKS]; static int st_clk_probe(struct platform_device *pdev) { - struct st_clk_data *st_data; + struct fch_clk_data *st_data; st_data = dev_get_platdata(&pdev->dev); if (!st_data || !st_data->base) diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h new file mode 100644 index 000000000000..850ca776156d --- /dev/null +++ b/include/linux/platform_data/clk-fch.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * clock framework for AMD misc clocks + * + * Copyright 2018 Advanced Micro Devices, Inc. + */ + +#ifndef __CLK_FCH_H +#define __CLK_FCH_H + +#include + +struct fch_clk_data { + void __iomem *base; +}; + +#endif /* __CLK_FCH_H */ diff --git a/include/linux/platform_data/clk-st.h b/include/linux/platform_data/clk-st.h deleted file mode 100644 index 7cdb6a402b35..000000000000 --- a/include/linux/platform_data/clk-st.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * clock framework for AMD Stoney based clock - * - * Copyright 2018 Advanced Micro Devices, Inc. - */ - -#ifndef __CLK_ST_H -#define __CLK_ST_H - -#include - -struct st_clk_data { - void __iomem *base; -}; - -#endif /* __CLK_ST_H */ -- cgit From d9b77361c1a5155e3d803d4ce9d7010269d301d3 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:02 +0530 Subject: clk: x86: Change name from ST to FCH AMD SoC general pupose clk is present in new platforms with minor differences. We can reuse the same clk driver for other platforms. Hence, changing name from ST(SoC) to FCH(IP) Signed-off-by: Akshu Agrawal Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/clk/x86/Makefile | 2 +- drivers/clk/x86/clk-fch.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/clk/x86/clk-st.c | 78 ----------------------------------------------- 3 files changed, 79 insertions(+), 79 deletions(-) create mode 100644 drivers/clk/x86/clk-fch.c delete mode 100644 drivers/clk/x86/clk-st.c diff --git a/drivers/clk/x86/Makefile b/drivers/clk/x86/Makefile index 7c774ea7ddeb..18564efdc651 100644 --- a/drivers/clk/x86/Makefile +++ b/drivers/clk/x86/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_PMC_ATOM) += clk-pmc-atom.o -obj-$(CONFIG_X86_AMD_PLATFORM_DEVICE) += clk-st.o +obj-$(CONFIG_X86_AMD_PLATFORM_DEVICE) += clk-fch.o clk-x86-lpss-objs := clk-lpt.o obj-$(CONFIG_X86_INTEL_LPSS) += clk-x86-lpss.o obj-$(CONFIG_CLK_LGM_CGU) += clk-cgu.o clk-cgu-pll.o clk-lgm.o diff --git a/drivers/clk/x86/clk-fch.c b/drivers/clk/x86/clk-fch.c new file mode 100644 index 000000000000..b252f0cf0628 --- /dev/null +++ b/drivers/clk/x86/clk-fch.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MIT +/* + * clock framework for AMD Stoney based clocks + * + * Copyright 2018 Advanced Micro Devices, Inc. + */ + +#include +#include +#include +#include +#include + +/* Clock Driving Strength 2 register */ +#define CLKDRVSTR2 0x28 +/* Clock Control 1 register */ +#define MISCCLKCNTL1 0x40 +/* Auxiliary clock1 enable bit */ +#define OSCCLKENB 2 +/* 25Mhz auxiliary output clock freq bit */ +#define OSCOUT1CLK25MHZ 16 + +#define ST_CLK_48M 0 +#define ST_CLK_25M 1 +#define ST_CLK_MUX 2 +#define ST_CLK_GATE 3 +#define ST_MAX_CLKS 4 + +static const char * const clk_oscout1_parents[] = { "clk48MHz", "clk25MHz" }; +static struct clk_hw *hws[ST_MAX_CLKS]; + +static int fch_clk_probe(struct platform_device *pdev) +{ + struct fch_clk_data *fch_data; + + fch_data = dev_get_platdata(&pdev->dev); + if (!fch_data || !fch_data->base) + return -EINVAL; + + hws[ST_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", NULL, 0, + 48000000); + hws[ST_CLK_25M] = clk_hw_register_fixed_rate(NULL, "clk25MHz", NULL, 0, + 25000000); + + hws[ST_CLK_MUX] = clk_hw_register_mux(NULL, "oscout1_mux", + clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), + 0, fch_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL); + + clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); + + hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux", + 0, fch_data->base + MISCCLKCNTL1, OSCCLKENB, + CLK_GATE_SET_TO_DISABLE, NULL); + + devm_clk_hw_register_clkdev(&pdev->dev, hws[ST_CLK_GATE], "oscout1", + NULL); + + return 0; +} + +static int fch_clk_remove(struct platform_device *pdev) +{ + int i; + + for (i = 0; i < ST_MAX_CLKS; i++) + clk_hw_unregister(hws[i]); + return 0; +} + +static struct platform_driver fch_clk_driver = { + .driver = { + .name = "clk-fch", + .suppress_bind_attrs = true, + }, + .probe = fch_clk_probe, + .remove = fch_clk_remove, +}; +builtin_platform_driver(fch_clk_driver); diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c deleted file mode 100644 index c2438874d9f2..000000000000 --- a/drivers/clk/x86/clk-st.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * clock framework for AMD Stoney based clocks - * - * Copyright 2018 Advanced Micro Devices, Inc. - */ - -#include -#include -#include -#include -#include - -/* Clock Driving Strength 2 register */ -#define CLKDRVSTR2 0x28 -/* Clock Control 1 register */ -#define MISCCLKCNTL1 0x40 -/* Auxiliary clock1 enable bit */ -#define OSCCLKENB 2 -/* 25Mhz auxiliary output clock freq bit */ -#define OSCOUT1CLK25MHZ 16 - -#define ST_CLK_48M 0 -#define ST_CLK_25M 1 -#define ST_CLK_MUX 2 -#define ST_CLK_GATE 3 -#define ST_MAX_CLKS 4 - -static const char * const clk_oscout1_parents[] = { "clk48MHz", "clk25MHz" }; -static struct clk_hw *hws[ST_MAX_CLKS]; - -static int st_clk_probe(struct platform_device *pdev) -{ - struct fch_clk_data *st_data; - - st_data = dev_get_platdata(&pdev->dev); - if (!st_data || !st_data->base) - return -EINVAL; - - hws[ST_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", NULL, 0, - 48000000); - hws[ST_CLK_25M] = clk_hw_register_fixed_rate(NULL, "clk25MHz", NULL, 0, - 25000000); - - hws[ST_CLK_MUX] = clk_hw_register_mux(NULL, "oscout1_mux", - clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), - 0, st_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL); - - clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); - - hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux", - 0, st_data->base + MISCCLKCNTL1, OSCCLKENB, - CLK_GATE_SET_TO_DISABLE, NULL); - - devm_clk_hw_register_clkdev(&pdev->dev, hws[ST_CLK_GATE], "oscout1", - NULL); - - return 0; -} - -static int st_clk_remove(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < ST_MAX_CLKS; i++) - clk_hw_unregister(hws[i]); - return 0; -} - -static struct platform_driver st_clk_driver = { - .driver = { - .name = "clk-st", - .suppress_bind_attrs = true, - }, - .probe = st_clk_probe, - .remove = st_clk_remove, -}; -builtin_platform_driver(st_clk_driver); -- cgit From 7f8802f2d2ed67ffbac9264f946a52507f749e19 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:03 +0530 Subject: ACPI: APD: Add a fmw property is_raven Since there is slight difference in AMD RV based soc in misc clk architecture. The fmw property will help in differentiating the SoCs. Signed-off-by: Akshu Agrawal Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 4 ++++ include/linux/platform_data/clk-fch.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 36319deb24d9..4c348377a39d 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -82,6 +82,7 @@ static int misc_check_res(struct acpi_resource *ares, void *data) static int fch_misc_setup(struct apd_private_data *pdata) { struct acpi_device *adev = pdata->adev; + const union acpi_object *obj; struct platform_device *clkdev; struct fch_clk_data *clk_data; struct resource_entry *rentry; @@ -98,6 +99,9 @@ static int fch_misc_setup(struct apd_private_data *pdata) if (ret < 0) return -ENOENT; + acpi_dev_get_property(adev, "is-rv", ACPI_TYPE_INTEGER, &obj); + clk_data->is_rv = obj->integer.value; + list_for_each_entry(rentry, &resource_list, node) { clk_data->base = devm_ioremap(&adev->dev, rentry->res->start, resource_size(rentry->res)); diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h index 850ca776156d..b9f682459f08 100644 --- a/include/linux/platform_data/clk-fch.h +++ b/include/linux/platform_data/clk-fch.h @@ -12,6 +12,7 @@ struct fch_clk_data { void __iomem *base; + u32 is_rv; }; #endif /* __CLK_FCH_H */ -- cgit From 19fe87fd854a92c746ac73cb91a0bebac07a4618 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:04 +0530 Subject: clk: x86: Support RV architecture There is minor difference between previous family of SoC and the current one. Which is the there is only 48Mh fixed clk. There is no mux and no option to select another freq as there in previous. Signed-off-by: Akshu Agrawal Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/clk/x86/clk-fch.c | 53 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/clk/x86/clk-fch.c b/drivers/clk/x86/clk-fch.c index b252f0cf0628..8f7c5142b0f0 100644 --- a/drivers/clk/x86/clk-fch.c +++ b/drivers/clk/x86/clk-fch.c @@ -26,6 +26,10 @@ #define ST_CLK_GATE 3 #define ST_MAX_CLKS 4 +#define RV_CLK_48M 0 +#define RV_CLK_GATE 1 +#define RV_MAX_CLKS 2 + static const char * const clk_oscout1_parents[] = { "clk48MHz", "clk25MHz" }; static struct clk_hw *hws[ST_MAX_CLKS]; @@ -37,33 +41,52 @@ static int fch_clk_probe(struct platform_device *pdev) if (!fch_data || !fch_data->base) return -EINVAL; - hws[ST_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", NULL, 0, - 48000000); - hws[ST_CLK_25M] = clk_hw_register_fixed_rate(NULL, "clk25MHz", NULL, 0, - 25000000); + if (!fch_data->is_rv) { + hws[ST_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", + NULL, 0, 48000000); + hws[ST_CLK_25M] = clk_hw_register_fixed_rate(NULL, "clk25MHz", + NULL, 0, 25000000); + + hws[ST_CLK_MUX] = clk_hw_register_mux(NULL, "oscout1_mux", + clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), + 0, fch_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, + NULL); - hws[ST_CLK_MUX] = clk_hw_register_mux(NULL, "oscout1_mux", - clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), - 0, fch_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL); + clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); - clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); + hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", + "oscout1_mux", 0, fch_data->base + MISCCLKCNTL1, + OSCCLKENB, CLK_GATE_SET_TO_DISABLE, NULL); - hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux", - 0, fch_data->base + MISCCLKCNTL1, OSCCLKENB, - CLK_GATE_SET_TO_DISABLE, NULL); + devm_clk_hw_register_clkdev(&pdev->dev, hws[ST_CLK_GATE], + "oscout1", NULL); + } else { + hws[RV_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", + NULL, 0, 48000000); - devm_clk_hw_register_clkdev(&pdev->dev, hws[ST_CLK_GATE], "oscout1", - NULL); + hws[RV_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", + "clk48MHz", 0, fch_data->base + MISCCLKCNTL1, + OSCCLKENB, CLK_GATE_SET_TO_DISABLE, NULL); + + devm_clk_hw_register_clkdev(&pdev->dev, hws[RV_CLK_GATE], + "oscout1", NULL); + } return 0; } static int fch_clk_remove(struct platform_device *pdev) { - int i; + int i, clks; + struct fch_clk_data *fch_data; - for (i = 0; i < ST_MAX_CLKS; i++) + fch_data = dev_get_platdata(&pdev->dev); + + clks = fch_data->is_rv ? RV_MAX_CLKS : ST_MAX_CLKS; + + for (i = 0; i < clks; i++) clk_hw_unregister(hws[i]); + return 0; } -- cgit From 96cf2a2c75567ff56195fe3126d497a2e7e4379f Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Thu, 6 Aug 2020 15:18:48 -0700 Subject: xfs: Fix UBSAN null-ptr-deref in xfs_sysfs_init If xfs_sysfs_init is called with parent_kobj == NULL, UBSAN shows the following warning: UBSAN: null-ptr-deref in ./fs/xfs/xfs_sysfs.h:37:23 member access within null pointer of type 'struct xfs_kobj' Call Trace: dump_stack+0x10e/0x195 ubsan_type_mismatch_common+0x241/0x280 __ubsan_handle_type_mismatch_v1+0x32/0x40 init_xfs_fs+0x12b/0x28f do_one_initcall+0xdd/0x1d0 do_initcall_level+0x151/0x1b6 do_initcalls+0x50/0x8f do_basic_setup+0x29/0x2b kernel_init_freeable+0x19f/0x20b kernel_init+0x11/0x1e0 ret_from_fork+0x22/0x30 Fix it by checking parent_kobj before the code accesses its member. Signed-off-by: Eiichi Tsukata Reviewed-by: Darrick J. Wong [darrick: minor whitespace edits] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_sysfs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index e9f810fc6731..43585850f154 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -32,9 +32,11 @@ xfs_sysfs_init( struct xfs_kobj *parent_kobj, const char *name) { + struct kobject *parent; + + parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, - &parent_kobj->kobject, "%s", name); + return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); } static inline void -- cgit From 16c24801122e2961c588562f04707b98f9c84bbb Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 6 Aug 2020 13:54:31 +0100 Subject: rtc: max77686: Fix wake-ups for max77620 Following commit d8f090dbeafd ("rtc: max77686: Do not allow interrupt to fire before system resume"), RTC wake-ups stopped working on Jetson TX2 and Jetson Xavier platforms. The Jetson TX2 uses the max77620 PMIC and the Jetson Xavier uses max20024 PMIC. Both of these PMICs have the same max77620 RTC controller. For the max77620 RTC, the variable 'rtc_irq_from_platform' is defined as true in the max77686 driver and because of this the IRQ passed to the max77686 driver for RTC is the PMIC IRQ and not the parent. Hence, following commit d8f090dbeafd ("rtc: max77686: Do not allow interrupt to fire before system resume"), for the max77620 the RTC IRQ within the PMIC is now getting disabled on entry to suspend and unable to wake the system up. Fix this by only disabling interrupts on entry to suspend in the max77686 RTC driver, if the interrupt is the parent interrupt. Fixes: d8f090dbeafd ("rtc: max77686: Do not allow interrupt to fire before system resume") Signed-off-by: Jon Hunter Signed-off-by: Alexandre Belloni Tested-by: Thierry Reding Reviewed-by: Thierry Reding Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200806125431.699339-1-jonathanh@nvidia.com --- drivers/rtc/rtc-max77686.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 645de5af707b..d51cc12114cb 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -815,13 +815,15 @@ static int max77686_rtc_suspend(struct device *dev) } /* - * Main IRQ (not virtual) must be disabled during suspend because if it - * happens while suspended it will be handled before resuming I2C. + * If the main IRQ (not virtual) is the parent IRQ, then it must be + * disabled during suspend because if it happens while suspended it + * will be handled before resuming I2C. * * Since Main IRQ is shared, all its users should disable it to be sure * it won't fire while one of them is still suspended. */ - disable_irq(info->rtc_irq); + if (!info->drv_data->rtc_irq_from_platform) + disable_irq(info->rtc_irq); return ret; } @@ -830,7 +832,8 @@ static int max77686_rtc_resume(struct device *dev) { struct max77686_rtc_info *info = dev_get_drvdata(dev); - enable_irq(info->rtc_irq); + if (!info->drv_data->rtc_irq_from_platform) + enable_irq(info->rtc_irq); if (device_may_wakeup(dev)) { struct max77686_rtc_info *info = dev_get_drvdata(dev); -- cgit From f2e2573c0823c07dc8aac4a8e0947881af2340bd Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 6 Aug 2020 17:37:28 +0800 Subject: drm/amdgpu: use mode1 reset by default for sienna_cichlid Swith default gpu reset method for sienna_cichlid to MODE1 reset. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 74d02d270d34..da8024c2826e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -362,10 +362,15 @@ nv_asic_reset_method(struct amdgpu_device *adev) dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", amdgpu_reset_method); - if (smu_baco_is_support(smu)) - return AMD_RESET_METHOD_BACO; - else + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: return AMD_RESET_METHOD_MODE1; + default: + if (smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; + } } static int nv_asic_reset(struct amdgpu_device *adev) -- cgit From 2c34c960cee0dfdfe33ddf1b3835c250a9503ef6 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 3 Aug 2020 11:15:14 +0800 Subject: drm/amd/powerplay: update swSMU VCN/JPEG PG logics Add lock protections and avoid unnecessary actions if the PG state is already the same as required. Signed-off-by: Evan Quan Tested-by: Matt Coffin Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 57 +++++++++++++++++++++- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 4 -- drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 6 ++- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 8 --- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 8 --- drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c | 9 ---- drivers/gpu/drm/amd/powerplay/smu_internal.h | 2 - 7 files changed, 60 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index ceef149d3410..70a4e6dab166 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -133,6 +133,56 @@ int smu_get_dpm_freq_range(struct smu_context *smu, return ret; } +static int smu_dpm_set_vcn_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + if (!smu->ppt_funcs->dpm_set_vcn_enable) + return 0; + + mutex_lock(&power_gate->vcn_gate_lock); + + if (atomic_read(&power_gate->vcn_gated) ^ enable) + goto out; + + ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable); + if (!ret) + atomic_set(&power_gate->vcn_gated, !enable); + +out: + mutex_unlock(&power_gate->vcn_gate_lock); + + return ret; +} + +static int smu_dpm_set_jpeg_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + if (!smu->ppt_funcs->dpm_set_jpeg_enable) + return 0; + + mutex_lock(&power_gate->jpeg_gate_lock); + + if (atomic_read(&power_gate->jpeg_gated) ^ enable) + goto out; + + ret = smu->ppt_funcs->dpm_set_jpeg_enable(smu, enable); + if (!ret) + atomic_set(&power_gate->jpeg_gated, !enable); + +out: + mutex_unlock(&power_gate->jpeg_gate_lock); + + return ret; +} + /** * smu_dpm_set_power_gate - power gate/ungate the specific IP block * @@ -649,6 +699,11 @@ static int smu_sw_init(void *handle) smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); + mutex_init(&smu->smu_power.power_gate.vcn_gate_lock); + mutex_init(&smu->smu_power.power_gate.jpeg_gate_lock); + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; @@ -1973,7 +2028,7 @@ int smu_read_sensor(struct smu_context *smu, *size = 4; break; case AMDGPU_PP_SENSOR_VCN_POWER_STATE: - *(uint32_t *)data = smu->smu_power.power_gate.vcn_gated ? 0 : 1; + *(uint32_t *)data = atomic_read(&smu->smu_power.power_gate.vcn_gated) ? 0: 1; *size = 4; break; case AMDGPU_PP_SENSOR_MIN_FAN_RPM: diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3b2ecb52a36f..6c991de8f371 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1849,8 +1849,6 @@ static bool arcturus_is_dpm_running(struct smu_context *smu) static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -1861,7 +1859,6 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 0); @@ -1870,7 +1867,6 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } } - power_gate->vcn_gated = true; } return ret; diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 28312d6dc187..074458eb5407 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -292,8 +292,10 @@ struct smu_dpm_context { struct smu_power_gate { bool uvd_gated; bool vce_gated; - bool vcn_gated; - bool jpeg_gated; + atomic_t vcn_gated; + atomic_t jpeg_gated; + struct mutex vcn_gate_lock; + struct mutex jpeg_gate_lock; }; struct smu_power_context { diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index c33bdc6747f2..9f62af9abd23 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -785,8 +785,6 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -796,14 +794,12 @@ static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownVcn, NULL); if (ret) return ret; } - power_gate->vcn_gated = true; } return ret; @@ -811,8 +807,6 @@ static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) static int navi10_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -821,14 +815,12 @@ static int navi10_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownJpeg, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 575ae4be98a2..dbb676c482fd 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -459,8 +459,6 @@ static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -470,14 +468,12 @@ static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownVcn, NULL); if (ret) return ret; } - power_gate->vcn_gated = true; } return ret; @@ -485,8 +481,6 @@ static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -495,14 +489,12 @@ static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index f373e2d0d31c..3865dbed5f93 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -766,10 +766,7 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; struct amdgpu_device *adev = smu->adev; - int ret = 0; if (enable) { @@ -785,7 +782,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl return ret; } } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, 0, NULL); @@ -798,7 +794,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl return ret; } } - power_gate->vcn_gated = true; } return ret; @@ -806,8 +801,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl static int sienna_cichlid_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -816,14 +809,12 @@ static int sienna_cichlid_dpm_set_jpeg_enable(struct smu_context *smu, bool enab if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index d0deaefd3feb..f1d8f247e589 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -77,8 +77,6 @@ #define smu_get_dal_power_level(smu, clocks) smu_ppt_funcs(get_dal_power_level, 0, smu, clocks) #define smu_get_perf_level(smu, designation, level) smu_ppt_funcs(get_perf_level, 0, smu, designation, level) #define smu_get_current_shallow_sleep_clocks(smu, clocks) smu_ppt_funcs(get_current_shallow_sleep_clocks, 0, smu, clocks) -#define smu_dpm_set_vcn_enable(smu, enable) smu_ppt_funcs(dpm_set_vcn_enable, 0, smu, enable) -#define smu_dpm_set_jpeg_enable(smu, enable) smu_ppt_funcs(dpm_set_jpeg_enable, 0, smu, enable) #define smu_set_watermarks_table(smu, clock_ranges) smu_ppt_funcs(set_watermarks_table, 0, smu, clock_ranges) #define smu_thermal_temperature_range_update(smu, range, rw) smu_ppt_funcs(thermal_temperature_range_update, 0, smu, range, rw) #define smu_register_irq_handler(smu) smu_ppt_funcs(register_irq_handler, 0, smu) -- cgit From 7fb133cf65558955b5fdac2458f9690dfd12a794 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 5 Aug 2020 17:24:41 +0800 Subject: drm/amd/powerplay: put VCN/JPEG into PG ungate state before dpm table setup(V3) As VCN related dpm table setup needs VCN be in PG ungate state. Same logics applies to JPEG. V2: fix paste typo V3: code cosmetic Signed-off-by: Evan Quan Tested-by: Matt Coffin Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 85 ++++++++++++++++++++++++---- drivers/gpu/drm/amd/powerplay/smu_internal.h | 1 - 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 70a4e6dab166..0826625573dc 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -133,8 +133,8 @@ int smu_get_dpm_freq_range(struct smu_context *smu, return ret; } -static int smu_dpm_set_vcn_enable(struct smu_context *smu, - bool enable) +static int smu_dpm_set_vcn_enable_locked(struct smu_context *smu, + bool enable) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; @@ -143,23 +143,34 @@ static int smu_dpm_set_vcn_enable(struct smu_context *smu, if (!smu->ppt_funcs->dpm_set_vcn_enable) return 0; - mutex_lock(&power_gate->vcn_gate_lock); - if (atomic_read(&power_gate->vcn_gated) ^ enable) - goto out; + return 0; ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable); if (!ret) atomic_set(&power_gate->vcn_gated, !enable); -out: + return ret; +} + +static int smu_dpm_set_vcn_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + mutex_lock(&power_gate->vcn_gate_lock); + + ret = smu_dpm_set_vcn_enable_locked(smu, enable); + mutex_unlock(&power_gate->vcn_gate_lock); return ret; } -static int smu_dpm_set_jpeg_enable(struct smu_context *smu, - bool enable) +static int smu_dpm_set_jpeg_enable_locked(struct smu_context *smu, + bool enable) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; @@ -168,16 +179,27 @@ static int smu_dpm_set_jpeg_enable(struct smu_context *smu, if (!smu->ppt_funcs->dpm_set_jpeg_enable) return 0; - mutex_lock(&power_gate->jpeg_gate_lock); - if (atomic_read(&power_gate->jpeg_gated) ^ enable) - goto out; + return 0; ret = smu->ppt_funcs->dpm_set_jpeg_enable(smu, enable); if (!ret) atomic_set(&power_gate->jpeg_gated, !enable); -out: + return ret; +} + +static int smu_dpm_set_jpeg_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + mutex_lock(&power_gate->jpeg_gate_lock); + + ret = smu_dpm_set_jpeg_enable_locked(smu, enable); + mutex_unlock(&power_gate->jpeg_gate_lock); return ret; @@ -403,6 +425,45 @@ static int smu_early_init(void *handle) return smu_set_funcs(adev); } +static int smu_set_default_dpm_table(struct smu_context *smu) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int vcn_gate, jpeg_gate; + int ret = 0; + + if (!smu->ppt_funcs->set_default_dpm_table) + return 0; + + mutex_lock(&power_gate->vcn_gate_lock); + mutex_lock(&power_gate->jpeg_gate_lock); + + vcn_gate = atomic_read(&power_gate->vcn_gated); + jpeg_gate = atomic_read(&power_gate->jpeg_gated); + + ret = smu_dpm_set_vcn_enable_locked(smu, true); + if (ret) + goto err0_out; + + ret = smu_dpm_set_jpeg_enable_locked(smu, true); + if (ret) + goto err1_out; + + ret = smu->ppt_funcs->set_default_dpm_table(smu); + if (ret) + dev_err(smu->adev->dev, + "Failed to setup default dpm clock tables!\n"); + + smu_dpm_set_jpeg_enable_locked(smu, !jpeg_gate); +err1_out: + smu_dpm_set_vcn_enable_locked(smu, !vcn_gate); +err0_out: + mutex_unlock(&power_gate->jpeg_gate_lock); + mutex_unlock(&power_gate->vcn_gate_lock); + + return ret; +} + static int smu_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index f1d8f247e589..264073d4e263 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -60,7 +60,6 @@ #define smu_disable_all_features_with_exception(smu, mask) smu_ppt_funcs(disable_all_features_with_exception, 0, smu, mask) #define smu_is_dpm_running(smu) smu_ppt_funcs(is_dpm_running, 0 , smu) #define smu_notify_display_change(smu) smu_ppt_funcs(notify_display_change, 0, smu) -#define smu_set_default_dpm_table(smu) smu_ppt_funcs(set_default_dpm_table, 0, smu) #define smu_populate_umd_state_clk(smu) smu_ppt_funcs(populate_umd_state_clk, 0, smu) #define smu_set_default_od8_settings(smu) smu_ppt_funcs(set_default_od8_settings, 0, smu) #define smu_enable_thermal_alert(smu) smu_ppt_funcs(enable_thermal_alert, 0, smu) -- cgit From f87812284172a9809820d10143b573d833cd3f75 Mon Sep 17 00:00:00 2001 From: Sandeep Raghuraman Date: Thu, 6 Aug 2020 22:52:20 +0530 Subject: drm/amdgpu: Fix bug where DPM is not enabled after hibernate and resume Reproducing bug report here: After hibernating and resuming, DPM is not enabled. This remains the case even if you test hibernate using the steps here: https://www.kernel.org/doc/html/latest/power/basic-pm-debugging.html I debugged the problem, and figured out that in the file hardwaremanager.c, in the function, phm_enable_dynamic_state_management(), the check 'if (!hwmgr->pp_one_vf && smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev) && adev->in_suspend)' returns true for the hibernate case, and false for the suspend case. This means that for the hibernate case, the AMDGPU driver doesn't enable DPM (even though it should) and simply returns from that function. In the suspend case, it goes ahead and enables DPM, even though it doesn't need to. I debugged further, and found out that in the case of suspend, for the CIK/Hawaii GPUs, smum_is_dpm_running(hwmgr) returns false, while in the case of hibernate, smum_is_dpm_running(hwmgr) returns true. For CIK, the ci_is_dpm_running() function calls the ci_is_smc_ram_running() function, which is ultimately used to determine if DPM is currently enabled or not, and this seems to provide the wrong answer. I've changed the ci_is_dpm_running() function to instead use the same method that some other AMD GPU chips do (e.g Fiji), which seems to read the voltage controller. I've tested on my R9 390 and it seems to work correctly for both suspend and hibernate use cases, and has been stable so far. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=208839 Signed-off-by: Sandeep Raghuraman Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 02159ca29fa2..c18169aa59ce 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2725,7 +2725,10 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return ci_is_smc_ram_running(hwmgr); + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, + VOLTAGE_CONTROLLER_ON)) + ? true : false; } static int ci_smu_init(struct pp_hwmgr *hwmgr) -- cgit From b9b40ee4db6cb186341b97bca4f0d7aa2a042a66 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 7 Aug 2020 09:36:32 +0200 Subject: r8152: Use MAC address from correct device tree node Query the USB device's device tree node when looking for a MAC address. The struct device embedded into the struct net_device does not have a device tree node attached at all. The reason why this went unnoticed is because the system where this was tested was one of the few development units that had its OTP programmed, as opposed to production systems where the MAC address is stored in a separate EEPROM and is passed via device tree by the firmware. Reported-by: EJ Hsu Fixes: acb6d3771a03 ("r8152: Use MAC address from device tree if available") Signed-off-by: Thierry Reding Reviewed-by: EJ Hsu Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7d39f998535d..2b02fefd094d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1504,7 +1504,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) sa->sa_family = dev->type; - ret = eth_platform_get_mac_address(&dev->dev, sa->sa_data); + ret = eth_platform_get_mac_address(&tp->udev->dev, sa->sa_data); if (ret < 0) { if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data); -- cgit From 158b47a65aa6b1e70424c3c9c0eda8577b831ea8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 11:32:04 +0200 Subject: selftests: mptcp: fix dependecies Since commit df62f2ec3df6 ("selftests/mptcp: add diag interface tests") the MPTCP selftests relies on the MPTCP diag interface which is enabled by a specific kconfig knob: be sure to include it. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..8df5cb8f71ff 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m -- cgit From 6bdb6211a61eaf91a22e9160b16795ee6c40f90d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 18:31:00 +0200 Subject: mptcp: more stable diag self-tests During diag self-tests we introduce long wait in the mptcp test program to give the script enough time to access the sockets dump. Such wait is introduced after shutting down one sockets end. Since commit 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") if both sides shutdown the socket is correctly transitioned into CLOSED status. As a side effect some sockets are not dumped via the diag interface, because the socket state (CLOSED) does not match the default filter, and this cause self-tests instability. Address the issue moving the above mentioned wait before shutting down the socket. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/68 Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Tested-and-acked-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd0..090620c3e10c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); -- cgit From 7ee2492635d862fac39bc5ef234ffd3d8e9f833b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 19:03:53 +0200 Subject: mptcp: fix warn at shutdown time for unaccepted msk sockets With commit b93df08ccda3 ("mptcp: explicitly track the fully established status"), the status of unaccepted mptcp closed in mptcp_sock_destruct() changes from TCP_SYN_RECV to TCP_ESTABLISHED. As a result mptcp_sock_destruct() does not perform the proper cleanup and inet_sock_destruct() will later emit a warn. Address the issue updating the condition tested in mptcp_sock_destruct(). Also update the related comment. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/66 Reported-and-tested-by: Christoph Paasch Fixes: b93df08ccda3 ("mptcp: explicitly track the fully established status") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 96f4f2fe50ad..e8cac2655c82 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -423,12 +423,12 @@ static void mptcp_sock_destruct(struct sock *sk) * also remove the mptcp socket, via * sock_put(ctx->conn). * - * Problem is that the mptcp socket will not be in - * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Problem is that the mptcp socket will be in + * ESTABLISHED state and will not have the SOCK_DEAD flag. * Both result in warnings from inet_sock_destruct. */ - if (sk->sk_state == TCP_SYN_RECV) { + if (sk->sk_state == TCP_ESTABLISHED) { sk->sk_state = TCP_CLOSE; WARN_ON_ONCE(sk->sk_socket); sock_orphan(sk); -- cgit From 1c3b63f155f637594268cd1add8335461691b314 Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Thu, 6 Aug 2020 08:49:06 +0200 Subject: net/tls: allow MSG_CMSG_COMPAT in sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to use ktls on a system with 32-bit userspace and 64-bit kernel results in a EOPNOTSUPP message during sendmsg: setsockopt(3, SOL_TLS, TLS_TX, …, 40) = 0 sendmsg(3, …, msg_flags=0}, 0) = -1 EOPNOTSUPP (Operation not supported) The tls_sw implementation does strict flag checking and does not allow the MSG_CMSG_COMPAT flag, which is set if the message comes in through the compat syscall. This patch adds MSG_CMSG_COMPAT to the flag check to allow the usage of the TLS SW implementation on systems using the compat syscall path. Note that the same check is present in the sendmsg path for the TLS device implementation, however the flag hasn't been added there for lack of testing hardware. Signed-off-by: Rouven Czerwinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 710bd44eaa49..9a3d9fedd7aa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -935,7 +935,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int ret = 0; int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT)) return -EOPNOTSUPP; mutex_lock(&tls_ctx->tx_lock); -- cgit From d02cbc46136105cf86f84ac355e16f04696f538d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Aug 2020 17:37:53 +0200 Subject: net: phy: fix memory leak in device-create error path A recent commit introduced a late error path in phy_device_create() which fails to release the device name allocated by dev_set_name(). Fixes: 13d0ab6750b2 ("net: phy: check return code when requesting PHY driver module") Cc: Heiner Kallweit Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1b9523595839..57d44648c8dd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -615,7 +615,9 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, if (c45_ids) dev->c45_ids = *c45_ids; dev->irq = bus->irq[addr]; + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); + device_initialize(&mdiodev->dev); dev->state = PHY_DOWN; @@ -649,10 +651,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, ret = phy_request_driver_module(dev, phy_id); } - if (!ret) { - device_initialize(&mdiodev->dev); - } else { - kfree(dev); + if (ret) { + put_device(&mdiodev->dev); dev = ERR_PTR(ret); } -- cgit From 6b07edebe6d31529346e553a7b309bc5251da712 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:52:24 +0800 Subject: net: Use helper function fdput() Use helper function fdput() to fput() the file iff FDPUT_FPUT is set. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index aff52e81653c..3c3d6abe4c1e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1804,8 +1804,7 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, upeer_addrlen, flags, rlimit(RLIMIT_NOFILE)); - if (f.flags) - fput(f.file); + fdput(f); } return ret; @@ -1868,8 +1867,7 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) ret = move_addr_to_kernel(uservaddr, addrlen, &address); if (!ret) ret = __sys_connect_file(f.file, &address, addrlen, 0); - if (f.flags) - fput(f.file); + fdput(f); } return ret; -- cgit From ce787a5a074a86f76f5d3fd804fa78e01bfb9e89 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:53:16 +0800 Subject: net: Set fput_needed iff FDPUT_FPUT is set We should fput() file iff FDPUT_FPUT is set. So we should set fput_needed accordingly. Fixes: 00e188ef6a7e ("sockfd_lookup_light(): switch to fdget^W^Waway from fget_light") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 3c3d6abe4c1e..e08415b4f939 100644 --- a/net/socket.c +++ b/net/socket.c @@ -500,7 +500,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) if (f.file) { sock = sock_from_file(f.file, err); if (likely(sock)) { - *fput_needed = f.flags; + *fput_needed = f.flags & FDPUT_FPUT; return sock; } fdput(f); -- cgit From 47260ba937822cd1a57ee8b520b8789bdda5964e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:54:19 +0800 Subject: net: Remove meaningless jump label out_fs The out_fs jump label has nothing to do but goto out. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index e08415b4f939..c61f036d24f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3063,7 +3063,7 @@ static int __init sock_init(void) err = register_filesystem(&sock_fs_type); if (err) - goto out_fs; + goto out; sock_mnt = kern_mount(&sock_fs_type); if (IS_ERR(sock_mnt)) { err = PTR_ERR(sock_mnt); @@ -3086,7 +3086,6 @@ out: out_mount: unregister_filesystem(&sock_fs_type); -out_fs: goto out; } -- cgit From 11f920d2aa9d4c2c2d53ccd79f4b5512f2169623 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:57:18 +0800 Subject: net: Use helper function ip_is_fragment() Use helper function ip_is_fragment() to check ip fragment. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2828f6d5ba89..7e2e502ef519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4853,7 +4853,7 @@ static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) if (err < 0) goto out; - if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); -- cgit From 7c7ab580db49cc7befe5f4b91bb1920cd6b07575 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 8 Aug 2020 16:23:10 +0800 Subject: net: Convert to use the fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index c61f036d24f5..f4d5998bdcba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1325,7 +1325,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band) case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; - /* fall through */ + fallthrough; case SOCK_WAKE_IO: call_kill: kill_fasync(&wq->fasync_list, SIGIO, band); @@ -3158,13 +3158,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) return -ENOMEM; buf_size += rule_cnt * sizeof(u32); - /* fall through */ + fallthrough; case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_SRXCLSRLINS: convert_out = true; - /* fall through */ + fallthrough; case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; -- cgit From 17fcd83c2b621333eb4b4874c54a30891735700f Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 08:42:20 +0900 Subject: openrisc: uaccess: Fix sparse address space warnings The OpenRISC user access functions put_user(), get_user() and clear_user() were missing proper sparse annotations. This generated warnings like the below. This patch adds the annotations to fix the warnings. Example warnings: net/ipv4/ip_sockglue.c:759:29: warning: incorrect type in argument 1 (different address spaces) net/ipv4/ip_sockglue.c:759:29: expected void const volatile [noderef] __user * net/ipv4/ip_sockglue.c:759:29: got int const *__gu_addr net/ipv4/ip_sockglue.c:764:29: warning: incorrect type in initializer (different address spaces) net/ipv4/ip_sockglue.c:764:29: expected unsigned char const *__gu_addr net/ipv4/ip_sockglue.c:764:29: got unsigned char [noderef] __user * Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck --- arch/openrisc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 46e31bb4a9ad..f2fc5c4b88c3 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -100,7 +100,7 @@ extern long __put_user_bad(void); #define __put_user_check(x, ptr, size) \ ({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (access_ok(__pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -173,7 +173,7 @@ struct __large_struct { #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT, __gu_val = 0; \ - const __typeof__(*(ptr)) * __gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (access_ok(__gu_addr, size)) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -248,10 +248,10 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long size) #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -extern unsigned long __clear_user(void *addr, unsigned long size); +extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline __must_check unsigned long -clear_user(void *addr, unsigned long size) +clear_user(void __user *addr, unsigned long size) { if (likely(access_ok(addr, size))) size = __clear_user(addr, size); -- cgit From af84b16e3423bd9c1c8d81c44bc0a217f763f6b7 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 09:35:24 +0900 Subject: openrisc: uaccess: Use static inline function in access_ok As suggested by Linus when reviewing commit 9cb2feb4d21d ("arch/openrisc: Fix issues with access_ok()") last year; making __range_ok an inline function also fixes the used twice issue that the commit was fixing. I agree it's a good cleanup. This patch addresses that as I am currently working on the access_ok macro to fixup sparse annotations in OpenRISC. Suggested-by: Linus Torvalds Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck --- arch/openrisc/include/asm/uaccess.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index f2fc5c4b88c3..4b59dc9ad300 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -48,16 +48,19 @@ /* Ensure that the range from addr to addr+size is all within the process' * address space */ -#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs()-size)) +static inline int __range_ok(unsigned long addr, unsigned long size) +{ + const mm_segment_t fs = get_fs(); + + return size <= fs && addr <= (fs - size); +} /* Ensure that addr is below task's addr_limit */ #define __addr_ok(addr) ((unsigned long) addr < get_fs()) #define access_ok(addr, size) \ ({ \ - unsigned long __ao_addr = (unsigned long)(addr); \ - unsigned long __ao_size = (unsigned long)(size); \ - __range_ok(__ao_addr, __ao_size); \ + __range_ok((unsigned long)(addr), (size)); \ }) /* -- cgit From aac9a9b555b9d84295d2d3637a2d83971c474a1e Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 09:57:14 +0900 Subject: openrisc: uaccess: Remove unused macro __addr_ok Since commit b48b2c3e5043 ("openrisc: use generic strnlen_user() function") the macro __addr_ok is no longer used. It is safe to remove so this patch removes it. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/uaccess.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 4b59dc9ad300..85a55359b244 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -55,9 +55,6 @@ static inline int __range_ok(unsigned long addr, unsigned long size) return size <= fs && addr <= (fs - size); } -/* Ensure that addr is below task's addr_limit */ -#define __addr_ok(addr) ((unsigned long) addr < get_fs()) - #define access_ok(addr, size) \ ({ \ __range_ok((unsigned long)(addr), (size)); \ -- cgit From d99596645f7d66b94b35650e732a42f4c701e11b Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 09:33:57 +0900 Subject: openrisc: signal: Fix sparse address space warnings The __user annotations in signal.c were mostly missing. The missing annotations caused the warnings listed below. This patch fixes them up by adding the __user annotations. arch/openrisc/kernel/signal.c:71:38: warning: incorrect type in initializer (different address spaces) arch/openrisc/kernel/signal.c:71:38: expected struct rt_sigframe *frame arch/openrisc/kernel/signal.c:71:38: got struct rt_sigframe [noderef] __user * arch/openrisc/kernel/signal.c:82:14: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:82:14: expected void const volatile [noderef] __user * arch/openrisc/kernel/signal.c:82:14: got struct rt_sigframe *frame arch/openrisc/kernel/signal.c:84:37: warning: incorrect type in argument 2 (different address spaces) arch/openrisc/kernel/signal.c:84:37: expected void const [noderef] __user *from arch/openrisc/kernel/signal.c:84:37: got struct sigset_t * arch/openrisc/kernel/signal.c:89:39: warning: incorrect type in argument 2 (different address spaces) arch/openrisc/kernel/signal.c:89:39: expected struct sigcontext [noderef] __user *sc arch/openrisc/kernel/signal.c:89:39: got struct sigcontext * arch/openrisc/kernel/signal.c:92:31: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:92:31: expected struct sigaltstack const [noderef] [usertype] __user * arch/openrisc/kernel/signal.c:92:31: got struct sigaltstack * arch/openrisc/kernel/signal.c:158:15: warning: incorrect type in assignment (different address spaces) arch/openrisc/kernel/signal.c:158:15: expected struct rt_sigframe *frame arch/openrisc/kernel/signal.c:158:15: got void [noderef] __user * arch/openrisc/kernel/signal.c:160:14: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:160:14: expected void const volatile [noderef] __user * arch/openrisc/kernel/signal.c:160:14: got struct rt_sigframe *frame arch/openrisc/kernel/signal.c:165:46: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:165:46: expected struct siginfo [noderef] [usertype] __user *to arch/openrisc/kernel/signal.c:165:46: got struct siginfo * arch/openrisc/kernel/signal.c:170:33: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:170:33: expected struct sigaltstack [noderef] [usertype] __user * arch/openrisc/kernel/signal.c:170:33: got struct sigaltstack * arch/openrisc/kernel/signal.c:171:40: warning: incorrect type in argument 2 (different address spaces) arch/openrisc/kernel/signal.c:171:40: expected struct sigcontext [noderef] __user *sc arch/openrisc/kernel/signal.c:171:40: got struct sigcontext * arch/openrisc/kernel/signal.c:173:32: warning: incorrect type in argument 1 (different address spaces) arch/openrisc/kernel/signal.c:173:32: expected void [noderef] __user *to arch/openrisc/kernel/signal.c:173:32: got struct sigset_t * Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck --- arch/openrisc/kernel/signal.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 4f0754874d78..97804f21a40c 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -68,7 +68,7 @@ static int restore_sigcontext(struct pt_regs *regs, asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) { - struct rt_sigframe *frame = (struct rt_sigframe __user *)regs->sp; + struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->sp; sigset_t set; /* @@ -76,7 +76,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) * then frame should be dword aligned here. If it's * not, then the user is trying to mess with us. */ - if (((long)frame) & 3) + if (((unsigned long)frame) & 3) goto badframe; if (!access_ok(frame, sizeof(*frame))) @@ -151,7 +151,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; unsigned long return_ip; int err = 0; @@ -181,10 +181,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, l.ori r11,r0,__NR_sigreturn l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); - err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); - err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); + err |= __put_user(0xa960, (short __user *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short __user *)(frame->retcode + 2)); + err |= __put_user(0x20000001, (unsigned long __user *)(frame->retcode + 4)); + err |= __put_user(0x15000000, (unsigned long __user *)(frame->retcode + 8)); if (err) return -EFAULT; -- cgit From 55b2662ec665cc8b592809a011fe807b05370ab8 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 4 Aug 2020 10:41:54 +0900 Subject: openrisc: uaccess: Add user address space check to access_ok Now that __user annotations are fixed for openrisc uaccess api's we can add checking to the access_ok macro. This patch adds the __chk_user_ptr check, on normal builds the added check is a nop. Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck --- arch/openrisc/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 85a55359b244..7c5892f56765 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -57,6 +57,7 @@ static inline int __range_ok(unsigned long addr, unsigned long size) #define access_ok(addr, size) \ ({ \ + __chk_user_ptr(addr); \ __range_ok((unsigned long)(addr), (size)); \ }) -- cgit From 05487215e6b9732cc4ad0e83e465b33182200ad5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 13 Jul 2020 18:57:32 -0700 Subject: KVM: x86: Don't attempt to load PDPTRs when 64-bit mode is enabled Don't attempt to load PDPTRs if EFER.LME=1, i.e. if 64-bit mode is enabled. A recent change to reload the PDTPRs when CR0.CD or CR0.NW is toggled botched the EFER.LME handling and sends KVM down the PDTPR path when is_paging() is true, i.e. when the guest toggles CD/NW in 64-bit mode. Split the CR0 checks for 64-bit vs. 32-bit PAE into separate paths. The 64-bit path is specifically checking state when paging is toggled on, i.e. CR0.PG transititions from 0->1. The PDPTR path now needs to run if the new CR0 state has paging enabled, irrespective of whether paging was already enabled. Trying to shave a few cycles to make the PDPTR path an "else if" case is a mess. Fixes: d42e3fae6faed ("kvm: x86: Read PDPTEs on CR0.CD and CR0.NW changes") Cc: Jim Mattson Cc: Oliver Upton Cc: Peter Shier Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Reviewed-by: Maxim Levitsky Message-Id: <20200714015732.32426-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 12ea77f99ff3..2db369a64f29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -820,22 +820,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) return 1; - if (cr0 & X86_CR0_PG) { #ifdef CONFIG_X86_64 - if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) { - int cs_db, cs_l; + if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) && + (cr0 & X86_CR0_PG)) { + int cs_db, cs_l; - if (!is_pae(vcpu)) - return 1; - kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - if (cs_l) - return 1; - } else -#endif - if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + if (!is_pae(vcpu)) + return 1; + kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + if (cs_l) return 1; } +#endif + if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) && + is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + return 1; if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) return 1; -- cgit From 43bd9ef42b3b862c97f1f4e86bf3ace890bef924 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 9 Aug 2020 13:04:56 -0400 Subject: x86: Expose SERIALIZE for supported cpuid The SERIALIZE instruction is supported by Tntel processors, like Sapphire Rapids. SERIALIZE is a faster serializing instruction which does not modify registers, arithmetic flags or memory, will not cause VM exit. It's availability is indicated by CPUID.(EAX=7,ECX=0):ECX[bit 14]. Expose it in KVM supported CPUID. This way, KVM could pass this information to guests and they can make use of these features accordingly. Signed-off-by: Cathy Zhang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fa873e3e6e90..3fd6eec202d7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -370,7 +370,8 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_7_EDX, F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | + F(SERIALIZE) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ -- cgit From cf99c505cf7a5b6d3deee91e3571871f20320d31 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Aug 2020 20:50:52 +0800 Subject: MIPS: VZ: Only include loongson_regs.h for CPU_LOONGSON64 Only Loongson64 platform has and needs loongson_regs.h, including it unconditionally will cause build errors. Fixes: 7f2a83f1c2a941ebfee5 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Huacai Chen Message-Id: <1596891052-24052-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/vz.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 3932f767e938..a474578ccf68 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -29,7 +29,9 @@ #include #include "interrupt.h" +#ifdef CONFIG_CPU_LOONGSON64 #include "loongson_regs.h" +#endif #include "trace.h" -- cgit From 14a720dc1f5332f3bdf30a23a3bc549e81be974c Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 10 Aug 2020 13:53:19 +0900 Subject: ALSA: usb-audio: fix overeager device match for MacroSilicon MS2109 Matching by device matches all interfaces, which breaks the video/HID portions of the device depending on module load order. Fixes: e337bf19f6af ("ALSA: usb-audio: add quirk for MacroSilicon MS2109") Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20200810045319.128745-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index adb3b62afed4..9c3c03dc96d3 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3662,7 +3662,13 @@ ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */ * with. */ { - USB_DEVICE(0x534d, 0x2109), + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | + USB_DEVICE_ID_MATCH_INT_CLASS | + USB_DEVICE_ID_MATCH_INT_SUBCLASS, + .idVendor = 0x534d, + .idProduct = 0x2109, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { .vendor_name = "MacroSilicon", .product_name = "MS2109", -- cgit From 386a6539992b82fe9ac4f9dc3f548956fd894d8c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 10 Aug 2020 10:16:59 +0800 Subject: ALSA: hda - fix the micmute led status for Lenovo ThinkCentre AIO After installing the Ubuntu Linux, the micmute led status is not correct. Users expect that the led is on if the capture is disabled, but with the current kernel, the led is off with the capture disabled. We tried the old linux kernel like linux-4.15, there is no this issue. It looks like we introduced this issue when switching to the led_cdev. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200810021659.7429-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index daedcc0adc21..09d93dd88713 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4414,6 +4414,7 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, { struct alc_spec *spec = codec->spec; + spec->micmute_led_polarity = 1; alc_fixup_hp_gpio_led(codec, action, 0, 0x04); if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->init_amp = ALC_INIT_DEFAULT; -- cgit From f40a4b0512839d07be0b03dd4097de5582fa136d Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Mon, 10 Aug 2020 09:17:49 +0800 Subject: MIPS: KVM: Convert a fallthrough comment to fallthrough There is a fallthrough comment being forgotten, GCC complains about it: arch/mips/kvm/emulate.c: In function kvm_mips_emulate_load: arch/mips/kvm/emulate.c:1936:21: error: this statement may fall through 1936 | vcpu->mmio_needed = 1; /* unsigned */ | ~~~~~~~~~~~~~~~~~~^~~ arch/mips/kvm/emulate.c:1939:2: note: here 1939 | case lw_op: Just fix it. Signed-off-by: Jiaxun Yang Reviewed-by: Huacai Chen Message-Id: <20200810011749.3211128-1-jiaxun.yang@flygoat.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 703782355318..d70c4f8e14e2 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1935,7 +1935,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, case lwu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; #endif case lw_op: run->mmio.len = 4; -- cgit From b0102a890f17e965cc43283cb04218c03f85c6ff Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 7 Aug 2020 15:19:04 +0200 Subject: i2c: mediatek: Fix i2c_spec_values description The struct i2c_spec_values have it's members documented but is missing the starting '@', which leads to warings like: drivers/i2c/busses/i2c-mt65xx.c:267: warning: Function parameter or member 'min_low_ns' not described in 'i2c_spec_values' We also delete min_high_ns member as it is not used in the code. Signed-off-by: Matthias Brugger Reviewed-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index e889f74703e4..efc14041d45b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -253,14 +253,13 @@ struct mtk_i2c { /** * struct i2c_spec_values: - * min_low_ns: min LOW period of the SCL clock - * min_su_sta_ns: min set-up time for a repeated START condition - * max_hd_dat_ns: max data hold time - * min_su_dat_ns: min data set-up time + * @min_low_ns: min LOW period of the SCL clock + * @min_su_sta_ns: min set-up time for a repeated START condition + * @max_hd_dat_ns: max data hold time + * @min_su_dat_ns: min data set-up time */ struct i2c_spec_values { unsigned int min_low_ns; - unsigned int min_high_ns; unsigned int min_su_sta_ns; unsigned int max_hd_dat_ns; unsigned int min_su_dat_ns; -- cgit From 1b7ecc241a67ad6b584e071bd791a54e0cd5f097 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 10 Aug 2020 17:24:00 +0900 Subject: ALSA: usb-audio: work around streaming quirk for MacroSilicon MS2109 Further investigation of the L-R swap problem on the MS2109 reveals that the problem isn't that the channels are swapped, but rather that they are swapped and also out of phase by one sample. In other words, the issue is actually that the very first frame that comes from the hardware is a half-frame containing only the right channel, and after that everything becomes offset. So introduce a new quirk field to drop the very first 2 bytes that come in after the format is configured and a capture stream starts. This puts the channels in phase and in the correct order. Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20200810082400.225858-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/card.h | 1 + sound/usb/pcm.c | 6 ++++++ sound/usb/quirks.c | 3 +++ sound/usb/stream.c | 1 + 4 files changed, 11 insertions(+) diff --git a/sound/usb/card.h b/sound/usb/card.h index de43267b9c8a..5351d7183b1b 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -137,6 +137,7 @@ struct snd_usb_substream { unsigned int tx_length_quirk:1; /* add length specifier to transfers */ unsigned int fmt_type; /* USB audio format type (1-3) */ unsigned int pkt_offset_adj; /* Bytes to drop from beginning of packets (for non-compliant devices) */ + unsigned int stream_offset_adj; /* Bytes to drop from beginning of stream (for non-compliant devices) */ unsigned int running: 1; /* running status */ diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 415bfec49a01..5600751803cf 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1420,6 +1420,12 @@ static void retire_capture_urb(struct snd_usb_substream *subs, // continue; } bytes = urb->iso_frame_desc[i].actual_length; + if (subs->stream_offset_adj > 0) { + unsigned int adj = min(subs->stream_offset_adj, bytes); + cp += adj; + bytes -= adj; + subs->stream_offset_adj -= adj; + } frames = bytes / stride; if (!subs->txfr_quirk) bytes = frames * stride; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c551141f337e..abf99b814a0f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1495,6 +1495,9 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x2b73, 0x000a): /* Pioneer DJ DJM-900NXS2 */ pioneer_djm_set_format_quirk(subs); break; + case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ + subs->stream_offset_adj = 2; + break; } } diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 4d1e6579e54d..ca76ba5b5c0b 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -94,6 +94,7 @@ static void snd_usb_init_substream(struct snd_usb_stream *as, subs->tx_length_quirk = as->chip->tx_length_quirk; subs->speed = snd_usb_get_speed(subs->dev); subs->pkt_offset_adj = 0; + subs->stream_offset_adj = 0; snd_usb_set_pcm_ops(as->pcm, stream); -- cgit From 6e8596172ee1cd46ec0bfd5adcf4ff86371478b6 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 10 Aug 2020 17:25:02 +0900 Subject: ALSA: usb-audio: add quirk for Pioneer DDJ-RB This is just another Pioneer device with fixed endpoints. Input is dummy but used as feedback (it always returns silence). Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20200810082502.225979-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 9c3c03dc96d3..d79e3ddc5690 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3558,6 +3558,62 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, +{ + /* + * PIONEER DJ DDJ-RB + * PCM is 4 channels out, 2 dummy channels in @ 44.1 fixed + * The feedback for the output is the dummy input. + */ + USB_DEVICE_VENDOR_SPEC(0x2b73, 0x000e), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 4, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_44100, + .rate_min = 44100, + .rate_max = 44100, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 44100 } + } + }, + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 2, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC| + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_44100, + .rate_min = 44100, + .rate_max = 44100, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 44100 } + } + }, + { + .ifnum = -1 + } + } + } +}, #define ALC1220_VB_DESKTOP(vend, prod) { \ USB_DEVICE(vend, prod), \ -- cgit From 6553fb799f601497ca0703682e2aff131197dc5c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Aug 2020 15:56:23 +0530 Subject: powerpc/pkeys: Fix boot failures with Nemo board (A-EON AmigaOne X1000) On p6 and before we should avoid updating UAMOR SPRN. This resulted in boot failure on Nemo board. Fixes: 269e829f48a0 ("powerpc/book3s64/pkey: Disable pkey on POWER6 and before") Reported-by: Christian Zigotzky Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200810102623.685083-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 5 ++--- arch/powerpc/mm/book3s64/pkeys.c | 12 ++++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1478fceeb683..1da9dbba9217 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,9 +1115,8 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); -#ifdef CONFIG_PPC_MEM_KEYS - mtspr(SPRN_UAMOR, default_uamor); -#endif + if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) + mtspr(SPRN_UAMOR, default_uamor); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 69a6b87f2bb4..b1d091a97611 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -73,12 +73,6 @@ static int scan_pkey_feature(void) if (early_radix_enabled()) return 0; - /* - * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 - */ - if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) - return 0; - ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); if (ret == 0) { /* @@ -124,6 +118,12 @@ void __init pkey_early_init_devtree(void) __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); + /* + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 + */ + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return; + /* scan the device tree for pkey feature */ pkeys_total = scan_pkey_feature(); if (!pkeys_total) -- cgit From 536e785f533f5a65ce5238e202d527d70fd4ab17 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 6 Aug 2020 01:06:15 +0530 Subject: i2c: eg20t: Drop PCI wakeup calls from .suspend/.resume The driver calls pci_enable_wake(...., false) in pch_i2c_suspend() as well as pch_i2c_resume(). Either it should enable-wake the device in .suspend() or should not invoke pci_enable_wake() at all. Concluding that this driver doesn't support enable-wake and PCI core calls pci_enable_wake(pci_dev, PCI_D0, false) during resume, drop it from .suspend() and .resume(). Reported-by: Bjorn Helgaas Signed-off-by: Vaibhav Gupta Reviewed-by: Bjorn Helgaas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-eg20t.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 73f139690e4e..eb41de22d461 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -879,7 +879,6 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) return ret; } - pci_enable_wake(pdev, PCI_D3hot, 0); pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); @@ -899,8 +898,6 @@ static int pch_i2c_resume(struct pci_dev *pdev) return -EIO; } - pci_enable_wake(pdev, PCI_D3hot, 0); - for (i = 0; i < adap_info->ch_num; i++) pch_i2c_init(&adap_info->pch_data[i]); -- cgit From 82c8eb22095fa747517845e7d5748ae2e4967359 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 6 Aug 2020 01:06:16 +0530 Subject: i2c: eg20t: use generic power management Drivers using legacy power management .suspen()/.resume() callbacks have to manage PCI states and device's PM states themselves. They also need to take care of standard configuration registers. Switch to generic power management framework using a single "struct dev_pm_ops" variable to take the unnecessary load from the driver. This also avoids the need for the driver to directly call most of the PCI helper functions and device power state control functions, as through the generic framework PCI Core takes care of the necessary operations, and drivers are required to do only device-specific jobs. Signed-off-by: Vaibhav Gupta Reviewed-by: Bjorn Helgaas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-eg20t.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index eb41de22d461..843b31a0f752 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -846,11 +846,10 @@ static void pch_i2c_remove(struct pci_dev *pdev) kfree(adap_info); } -#ifdef CONFIG_PM -static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused pch_i2c_suspend(struct device *dev) { - int ret; int i; + struct pci_dev *pdev = to_pci_dev(dev); struct adapter_info *adap_info = pci_get_drvdata(pdev); void __iomem *p = adap_info->pch_data[0].pch_base_address; @@ -872,31 +871,13 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) ioread32(p + PCH_I2CSR), ioread32(p + PCH_I2CBUFSTA), ioread32(p + PCH_I2CESRSTA)); - ret = pci_save_state(pdev); - - if (ret) { - pch_pci_err(pdev, "pci_save_state\n"); - return ret; - } - - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; } -static int pch_i2c_resume(struct pci_dev *pdev) +static int __maybe_unused pch_i2c_resume(struct device *dev) { int i; - struct adapter_info *adap_info = pci_get_drvdata(pdev); - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - if (pci_enable_device(pdev) < 0) { - pch_pci_err(pdev, "pch_i2c_resume:pci_enable_device FAILED\n"); - return -EIO; - } + struct adapter_info *adap_info = dev_get_drvdata(dev); for (i = 0; i < adap_info->ch_num; i++) pch_i2c_init(&adap_info->pch_data[i]); @@ -905,18 +886,15 @@ static int pch_i2c_resume(struct pci_dev *pdev) return 0; } -#else -#define pch_i2c_suspend NULL -#define pch_i2c_resume NULL -#endif + +static SIMPLE_DEV_PM_OPS(pch_i2c_pm_ops, pch_i2c_suspend, pch_i2c_resume); static struct pci_driver pch_pcidriver = { .name = KBUILD_MODNAME, .id_table = pch_pcidev_id, .probe = pch_i2c_probe, .remove = pch_i2c_remove, - .suspend = pch_i2c_suspend, - .resume = pch_i2c_resume + .driver.pm = &pch_i2c_pm_ops, }; module_pci_driver(pch_pcidriver); -- cgit From 34dedd2a83b241ba6aeb290260313c65dc58660e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 10 Aug 2020 21:31:06 +0800 Subject: ALSA: usb-audio: Disable Lenovo P620 Rear line-in volume control The USB device (0x17aa:0x1046) that support Lenovo P620 rear panel line-in claim to support volume control, but it doens't seem to have an AMP, so when line-in volume lowers below 80, nothing gets recorded anymore. Disable the volume control to workaround the issue. Fixes: f8c11eb7da4a ("ALSA: usb-audio: Add support for Lenovo ThinkStation P620") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200810133108.31580-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index c369c81e74c4..5b43e9e40e49 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -371,6 +371,7 @@ static const struct usbmix_name_map asus_rog_map[] = { }; static const struct usbmix_name_map lenovo_p620_rear_map[] = { + { 19, NULL, 2 }, /* FU, Volume */ { 19, NULL, 12 }, /* FU, Input Gain Pad */ {} }; -- cgit From 1e6e238c3002ea3611465ce5f32777ddd6a40126 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 28 Jul 2020 16:39:26 +0800 Subject: btrfs: inode: fix NULL pointer dereference if inode doesn't need compression [BUG] There is a bug report of NULL pointer dereference caused in compress_file_extent(): Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs] NIP [c008000006dd4d34] compress_file_range.constprop.41+0x75c/0x8a0 [btrfs] LR [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] Call Trace: [c000000c69093b00] [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] (unreliable) [c000000c69093bd0] [c008000006dd4ebc] async_cow_start+0x44/0xa0 [btrfs] [c000000c69093c10] [c008000006e14824] normal_work_helper+0xdc/0x598 [btrfs] [c000000c69093c80] [c0000000001608c0] process_one_work+0x2c0/0x5b0 [c000000c69093d10] [c000000000160c38] worker_thread+0x88/0x660 [c000000c69093db0] [c00000000016b55c] kthread+0x1ac/0x1c0 [c000000c69093e20] [c00000000000b660] ret_from_kernel_thread+0x5c/0x7c ---[ end trace f16954aa20d822f6 ]--- [CAUSE] For the following execution route of compress_file_range(), it's possible to hit NULL pointer dereference: compress_file_extent() |- pages = NULL; |- start = async_chunk->start = 0; |- end = async_chunk = 4095; |- nr_pages = 1; |- inode_need_compress() == false; <<< Possible, see later explanation | Now, we have nr_pages = 1, pages = NULL |- cont: |- ret = cow_file_range_inline(); |- if (ret <= 0) { |- for (i = 0; i < nr_pages; i++) { |- WARN_ON(pages[i]->mapping); <<< Crash To enter above call execution branch, we need the following race: Thread 1 (chattr) | Thread 2 (writeback) --------------------------+------------------------------ | btrfs_run_delalloc_range | |- inode_need_compress = true | |- cow_file_range_async() btrfs_ioctl_set_flag() | |- binode_flags |= | BTRFS_INODE_NOCOMPRESS | | compress_file_range() | |- inode_need_compress = false | |- nr_page = 1 while pages = NULL | | Then hit the crash [FIX] This patch will fix it by checking @pages before doing accessing it. This patch is only designed as a hot fix and easy to backport. More elegant fix may make btrfs only check inode_need_compress() once to avoid such race, but that would be another story. Reported-by: Luciano Chavez Fixes: 4d3a800ebb12 ("btrfs: merge nr_pages input and output parameter in compress_pages") CC: stable@vger.kernel.org # 4.14.x: cecc8d9038d16: btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b3412fbfd..9988d754e465 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -653,12 +653,18 @@ cont: page_error_op | PAGE_END_WRITEBACK); - for (i = 0; i < nr_pages; i++) { - WARN_ON(pages[i]->mapping); - put_page(pages[i]); + /* + * Ensure we only free the compressed pages if we have + * them allocated, as we can still reach here with + * inode_need_compress() == false. + */ + if (pages) { + for (i = 0; i < nr_pages; i++) { + WARN_ON(pages[i]->mapping); + put_page(pages[i]); + } + kfree(pages); } - kfree(pages); - return 0; } } -- cgit From bf53d4687b8f3f6b752f091eb85f62369a515dfd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 27 Jul 2020 10:28:05 -0400 Subject: btrfs: only search for left_info if there is no right_info in try_merge_free_space In try_to_merge_free_space we attempt to find entries to the left and right of the entry we are adding to see if they can be merged. We search for an entry past our current info (saved into right_info), and then if right_info exists and it has a rb_prev() we save the rb_prev() into left_info. However there's a slight problem in the case that we have a right_info, but no entry previous to that entry. At that point we will search for an entry just before the info we're attempting to insert. This will simply find right_info again, and assign it to left_info, making them both the same pointer. Now if right_info _can_ be merged with the range we're inserting, we'll add it to the info and free right_info. However further down we'll access left_info, which was right_info, and thus get a use-after-free. Fix this by only searching for the left entry if we don't find a right entry at all. The CVE referenced had a specially crafted file system that could trigger this use-after-free. However with the tree checker improvements we no longer trigger the conditions for the UAF. But the original conditions still apply, hence this fix. Reference: CVE-2019-19448 Fixes: 963030817060 ("Btrfs: use hybrid extents+bitmap rb tree for free space") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6d961e11639e..ef0fd7afb0b1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2282,7 +2282,7 @@ out: static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *left_info; + struct btrfs_free_space *left_info = NULL; struct btrfs_free_space *right_info; bool merged = false; u64 offset = info->offset; @@ -2298,7 +2298,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); - else + else if (!right_info) left_info = tree_search_offset(ctl, offset - 1, 0, 0); /* See try_merge_free_space() comment. */ -- cgit From 27942c9971cc405c60432eca9395e514a2ae9f5e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 23 Jul 2020 19:08:55 +0200 Subject: btrfs: fix messages after changing compression level by remount Reported by Forza on IRC that remounting with compression options does not reflect the change in level, or at least it does not appear to do so according to the messages: mount -o compress=zstd:1 /dev/sda /mnt mount -o remount,compress=zstd:15 /mnt does not print the change to the level to syslog: [ 41.366060] BTRFS info (device vda): use zstd compression, level 1 [ 41.368254] BTRFS info (device vda): disk space caching is enabled [ 41.390429] BTRFS info (device vda): disk space caching is enabled What really happens is that the message is lost but the level is actualy changed. There's another weird output, if compression is reset to 'no': [ 45.413776] BTRFS info (device vda): use no compression, level 4 To fix that, save the previous compression level and print the message in that case too and use separate message for 'no' compression. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: David Sterba --- fs/btrfs/super.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5a9dc31d95c9..aa73422b0678 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -517,6 +517,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; + int saved_compress_level; bool saved_compress_force; int no_compress = 0; @@ -598,6 +599,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); + saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { @@ -642,6 +644,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; + info->compress_level = 0; + info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; @@ -662,11 +666,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(info, COMPRESS) && - (info->compress_type != saved_compress_type || - compress_force != saved_compress_force)) || - (!btrfs_test_opt(info, COMPRESS) && - no_compress == 1)) { + if (no_compress == 1) { + btrfs_info(info, "use no compression"); + } else if ((info->compress_type != saved_compress_type) || + (compress_force != saved_compress_force) || + (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); -- cgit From 3ef3959b29c4a5bd65526ab310a1a18ae533172a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 22 Jul 2020 11:12:46 -0400 Subject: btrfs: don't show full path of bind mounts in subvol= Chris Murphy reported a problem where rpm ostree will bind mount a bunch of things for whatever voodoo it's doing. But when it does this /proc/mounts shows something like /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo/bar 0 0 Despite subvolid=256 being subvol=/foo. This is because we're just spitting out the dentry of the mount point, which in the case of bind mounts is the source path for the mountpoint. Instead we should spit out the path to the actual subvol. Fix this by looking up the name for the subvolid we have mounted. With this fix the same test looks like this /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index aa73422b0678..9b4e9c4c4673 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1386,6 +1386,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; + const char *subvol_name; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1472,8 +1473,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); - seq_puts(seq, ",subvol="); - seq_dentry(seq, dentry, " \t\n\\"); + subvol_name = btrfs_get_subvol_name_from_objectid(info, + BTRFS_I(d_inode(dentry))->root->root_key.objectid); + if (!IS_ERR(subvol_name)) { + seq_puts(seq, ",subvol="); + seq_escape(seq, subvol_name, " \t\n\\"); + kfree(subvol_name); + } return 0; } -- cgit From 4f26433e9b3eb7a55ed70d8f882ae9cd48ba448b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 29 Jul 2020 10:17:50 +0100 Subject: btrfs: fix memory leaks after failure to lookup checksums during inode logging While logging an inode, at copy_items(), if we fail to lookup the checksums for an extent we release the destination path, free the ins_data array and then return immediately. However a previous iteration of the for loop may have added checksums to the ordered_sums list, in which case we leak the memory used by them. So fix this by making sure we iterate the ordered_sums list and free all its checksums before returning. Fixes: 3650860b90cc2a ("Btrfs: remove almost all of the BUG()'s from tree-log.c") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ea8136dcf71f..696dd861cc3c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4036,11 +4036,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, fs_info->csum_root, ds + cs, ds + cs + cl - 1, &ordered_sums, 0); - if (ret) { - btrfs_release_path(dst_path); - kfree(ins_data); - return ret; - } + if (ret) + break; } } } @@ -4053,7 +4050,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, -- cgit From faa008899a4db21a2df99833cb4ff6fa67009a20 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 30 Jul 2020 11:18:09 -0400 Subject: btrfs: make sure SB_I_VERSION doesn't get unset by remount There's some inconsistency around SB_I_VERSION handling with mount and remount. Since we don't really want it to be off ever just work around this by making sure we don't get the flag cleared on remount. There's a tiny cpu cost of setting the bit, otherwise all changes to i_version also change some of the times (ctime/mtime) so the inode needs to be synced. We wouldn't save anything by disabling it. Reported-by: Eric Sandeen CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add perf impact analysis ] Signed-off-by: David Sterba --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b4e9c4c4673..e529ddb35b87 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1960,6 +1960,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: + /* + * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, + * since the absence of the flag means it can be toggled off by remount. + */ + *flags |= SB_I_VERSION; + wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); -- cgit From 30b5ae21b9a51be4366e8ba8bf5b7bf7eb298ea3 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 8 Aug 2020 16:36:37 +0800 Subject: cifs: Convert to use the fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Signed-off-by: Hongxiang Lou Signed-off-by: Miaohe Lin Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 24c2ac360591..667d70aa335f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3913,7 +3913,7 @@ smb2_readv_callback(struct mid_q_entry *mid) case MID_RESPONSE_MALFORMED: credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - /* fall through */ + fallthrough; default: rdata->result = -EIO; } @@ -4146,7 +4146,7 @@ smb2_writev_callback(struct mid_q_entry *mid) case MID_RESPONSE_MALFORMED: credits.value = le16_to_cpu(rsp->sync_hdr.CreditRequest); credits.instance = server->reconnect_instance; - /* fall through */ + fallthrough; default: wdata->result = -EIO; break; -- cgit From 99b48ecc8e800983b6e00a2350daaeceba1f7406 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 17 Jul 2020 15:52:38 +0300 Subject: x86/kvm/hyper-v: Synic default SCONTROL MSR needs to be enabled Based on an analysis of the HyperV firmwares (Gen1 and Gen2) it seems like the SCONTROL is not being set to the ENABLED state as like we have thought. Also from a test done by Vitaly Kuznetsov, running a nested HyperV it was concluded that the first access to the SCONTROL MSR with a read resulted with the value of 0x1, aka HV_SYNIC_CONTROL_ENABLE. It's important to note that this diverges from the value states in the HyperV TLFS of 0. Signed-off-by: Jon Doron Message-Id: <20200717125238.1103096-2-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index af9cdb426dd2..814d3aee5cef 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -900,6 +900,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); synic->active = true; synic->dont_zero_synic_pages = dont_zero_synic_pages; + synic->control = HV_SYNIC_CONTROL_ENABLE; return 0; } -- cgit From c15c2ec07a26b251040943a1a9f90d3037f041e5 Mon Sep 17 00:00:00 2001 From: Boleyn Su Date: Thu, 6 Aug 2020 15:31:44 +0900 Subject: btrfs: check correct variable after allocation in btrfs_backref_iter_alloc The `if (!ret)` check will always be false and it may result in ret->path being dereferenced while it is a NULL pointer. Fixes: a37f232b7b65 ("btrfs: backref: introduce the skeleton of btrfs_backref_iter") CC: stable@vger.kernel.org # 5.8+ Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Boleyn Su Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ea10f7bc99ab..ea1c28ccb44f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2303,7 +2303,7 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc( return NULL; ret->path = btrfs_alloc_path(); - if (!ret) { + if (!ret->path) { kfree(ret); return NULL; } -- cgit From 62ab2cc04ddc5dfab4e382ae167734fc111da5ed Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 3 Aug 2020 14:20:11 +0800 Subject: btrfs: sysfs: fix NULL pointer dereference at btrfs_sysfs_del_qgroups() [BUG] Unmounting a btrfs filesystem with quota disabled will cause the following NULL pointer dereference: BTRFS info (device dm-5): has skinny extents BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 7 PID: 637 Comm: umount Not tainted 5.8.0-rc7-next-20200731-custom #76 RIP: 0010:kobject_del+0x6/0x20 Call Trace: btrfs_sysfs_del_qgroups+0xac/0xf0 [btrfs] btrfs_free_qgroup_config+0x63/0x70 [btrfs] close_ctree+0x1f5/0x323 [btrfs] btrfs_put_super+0x15/0x17 [btrfs] generic_shutdown_super+0x72/0x110 kill_anon_super+0x18/0x30 btrfs_kill_super+0x17/0x30 [btrfs] deactivate_locked_super+0x3b/0xa0 deactivate_super+0x40/0x50 cleanup_mnt+0x135/0x190 __cleanup_mnt+0x12/0x20 task_work_run+0x64/0xb0 exit_to_user_mode_prepare+0x18a/0x190 syscall_exit_to_user_mode+0x4f/0x270 do_syscall_64+0x45/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 37b7adca5c1d5c5d ]--- [CAUSE] Commit 079ad2fb4bf9 ("kobject: Avoid premature parent object freeing in kobject_cleanup()") changed kobject_del() that it no longer accepts NULL pointer. Before that commit, kobject_del() and kobject_put() all accept NULL pointers and just ignore such NULL pointers. But that mentioned commit needs to access the parent node, killing the old NULL pointer behavior. Unfortunately btrfs is relying on that hidden feature thus we will trigger such NULL pointer dereference. [FIX] Instead of just saving several lines, do proper fs_info->qgroups_kobj check before calling kobject_del() and kobject_put(). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 104c80caaa74..c8df2edafd85 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1565,9 +1565,11 @@ void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info) rbtree_postorder_for_each_entry_safe(qgroup, next, &fs_info->qgroup_tree, node) btrfs_sysfs_del_one_qgroup(fs_info, qgroup); - kobject_del(fs_info->qgroups_kobj); - kobject_put(fs_info->qgroups_kobj); - fs_info->qgroups_kobj = NULL; + if (fs_info->qgroups_kobj) { + kobject_del(fs_info->qgroups_kobj); + kobject_put(fs_info->qgroups_kobj); + fs_info->qgroups_kobj = NULL; + } } /* Called when qgroups get initialized, thus there is no need for locking */ -- cgit From 709ed1bcef12398ac1a35c149f3e582db04456c2 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 16 Jul 2020 14:25:11 -0400 Subject: EDAC/ie31200: Fallback if host bridge device is already initialized The Intel uncore driver may claim some of the pci ids from ie31200 which means that the ie31200 edac driver will not initialize them as part of pci_register_driver(). Let's add a fallback for this case to 'pci_get_device()' to get a reference on the device such that it can still be configured. This is similar in approach to other edac drivers. Signed-off-by: Jason Baron Cc: Borislav Petkov Cc: Mauro Carvalho Chehab Cc: linux-edac Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/1594923911-10885-1-git-send-email-jbaron@akamai.com --- drivers/edac/ie31200_edac.c | 50 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index d68346a8e141..ebe50996cc42 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -170,6 +170,8 @@ (n << (28 + (2 * skl) - PAGE_SHIFT)) static int nr_channels; +static struct pci_dev *mci_pdev; +static int ie31200_registered = 1; struct ie31200_priv { void __iomem *window; @@ -538,12 +540,16 @@ fail_free: static int ie31200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - edac_dbg(0, "MC:\n"); + int rc; + edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; + rc = ie31200_probe1(pdev, ent->driver_data); + if (rc == 0 && !mci_pdev) + mci_pdev = pci_dev_get(pdev); - return ie31200_probe1(pdev, ent->driver_data); + return rc; } static void ie31200_remove_one(struct pci_dev *pdev) @@ -552,6 +558,8 @@ static void ie31200_remove_one(struct pci_dev *pdev) struct ie31200_priv *priv; edac_dbg(0, "\n"); + pci_dev_put(mci_pdev); + mci_pdev = NULL; mci = edac_mc_del_mc(&pdev->dev); if (!mci) return; @@ -593,17 +601,53 @@ static struct pci_driver ie31200_driver = { static int __init ie31200_init(void) { + int pci_rc, i; + edac_dbg(3, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - return pci_register_driver(&ie31200_driver); + pci_rc = pci_register_driver(&ie31200_driver); + if (pci_rc < 0) + goto fail0; + + if (!mci_pdev) { + ie31200_registered = 0; + for (i = 0; ie31200_pci_tbl[i].vendor != 0; i++) { + mci_pdev = pci_get_device(ie31200_pci_tbl[i].vendor, + ie31200_pci_tbl[i].device, + NULL); + if (mci_pdev) + break; + } + if (!mci_pdev) { + edac_dbg(0, "ie31200 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); + if (pci_rc < 0) { + edac_dbg(0, "ie31200 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + return 0; + +fail1: + pci_unregister_driver(&ie31200_driver); +fail0: + pci_dev_put(mci_pdev); + + return pci_rc; } static void __exit ie31200_exit(void) { edac_dbg(3, "MC:\n"); pci_unregister_driver(&ie31200_driver); + if (!ie31200_registered) + ie31200_remove_one(mci_pdev); } module_init(ie31200_init); -- cgit From 519a8a6cf91dda095be2d36216fc4ebc525270a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 18:42:14 +0200 Subject: net: Revert "net: optimize the sockptr_t for unified kernel/user address spaces" This reverts commits 6d04fe15f78acdf8e32329e208552e226f7a8ae6 and a31edb2059ed4e498f9aa8230c734b59d0ad797a. It turns out the idea to share a single pointer for both kernel and user space address causes various kinds of problems. So use the slightly less optimal version that uses an extra bit, but which is guaranteed to be safe everywhere. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: Eric Dumazet Reported-by: John Stultz Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 26 ++------------------------ net/ipv4/bpfilter/sockopt.c | 14 ++++++-------- net/socket.c | 6 +----- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include #include #include -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 545b2640f019..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, - char __user *user_optval, int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) { - sockptr_t optval; - int err, len; + int len; if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval, len); - if (err) - return err; - return bpfilter_mbox_request(sk, optname, optval, len, false); + + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/socket.c b/net/socket.c index f4d5998bdcba..dbbe8ea7d395 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2095,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval; + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2103,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval, optlen); - if (err) - return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; -- cgit From 8a7f280f29a80f6e0798f5d6e07c5dd8726620fe Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Mon, 10 Aug 2020 09:55:55 -0700 Subject: vmxnet3: use correct tcp hdr length when packet is encapsulated Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, while calculating tcp hdr length, it does not take into account if the packet is encapsulated or not. This patch fixes this issue by using correct reference for inner tcp header. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ca395f9679d0..2818015324b8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -886,7 +886,8 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, switch (protocol) { case IPPROTO_TCP: - ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) : + tcp_hdrlen(skb); break; case IPPROTO_UDP: ctx->l4_hdr_size = sizeof(struct udphdr); -- cgit From 56e287b3daa20a95e0756e016362f2e96158e1a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 10:32:04 -0700 Subject: nfp: update maintainer I'm not doing much work on the NFP driver any more. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d2784b502da0..83ea07711518 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11948,7 +11948,8 @@ F: include/uapi/linux/netrom.h F: net/netrom/ NETRONOME ETHERNET DRIVERS -M: Jakub Kicinski +M: Simon Horman +R: Jakub Kicinski L: oss-drivers@netronome.com S: Maintained F: drivers/net/ethernet/netronome/ -- cgit From f19008e676366c44e9241af57f331b6c6edf9552 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2020 13:38:39 -0400 Subject: tcp: correct read of TFO keys on big endian systems When TFO keys are read back on big endian systems either via the global sysctl interface or via getsockopt() using TCP_FASTOPEN_KEY, the values don't match what was written. For example, on s390x: # echo "1-2-3-4" > /proc/sys/net/ipv4/tcp_fastopen_key # cat /proc/sys/net/ipv4/tcp_fastopen_key 02000000-01000000-04000000-03000000 Instead of: # cat /proc/sys/net/ipv4/tcp_fastopen_key 00000001-00000002-00000003-00000004 Fix this by converting to the correct endianness on read. This was reported by Colin Ian King when running the 'tcp_fastopen_backup_key' net selftest on s390x, which depends on the read value matching what was written. I've confirmed that the test now passes on big and little endian systems. Signed-off-by: Jason Baron Fixes: 438ac88009bc ("net: fastopen: robustness and endianness fixes for SipHash") Cc: Ard Biesheuvel Cc: Eric Dumazet Reported-and-tested-by: Colin Ian King Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 16 ++++------------ net/ipv4/tcp.c | 16 ++++------------ net/ipv4/tcp_fastopen.c | 23 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dbf5c791a6eb..eab6c7510b5b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1672,6 +1672,8 @@ void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5653e3b011bf..54023a46db04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -301,24 +301,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)) }; - struct tcp_fastopen_context *ctx; - u32 user_key[TCP_FASTOPEN_KEY_MAX * 4]; - __le32 key[TCP_FASTOPEN_KEY_MAX * 4]; + u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)]; + __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)]; char *backup_data; - int ret, i = 0, off = 0, n_keys = 0; + int ret, i = 0, off = 0, n_keys; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; - rcu_read_lock(); - ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); - if (ctx) { - n_keys = tcp_fastopen_context_len(ctx); - memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys); - } - rcu_read_unlock(); - + n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key); if (!n_keys) { memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); n_keys = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c06d2bfd2ec4..31f3b858db81 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3685,22 +3685,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; - struct tcp_fastopen_context *ctx; - unsigned int key_len = 0; + u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)]; + unsigned int key_len; if (get_user(len, optlen)) return -EFAULT; - rcu_read_lock(); - ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) { - key_len = tcp_fastopen_context_len(ctx) * - TCP_FASTOPEN_KEY_LENGTH; - memcpy(&key[0], &ctx->key[0], key_len); - } - rcu_read_unlock(); - + key_len = tcp_fastopen_get_cipher(net, icsk, key) * + TCP_FASTOPEN_KEY_LENGTH; len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 19ad9586c720..1bb85821f1e6 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -108,6 +108,29 @@ out: return err; } +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key) +{ + struct tcp_fastopen_context *ctx; + int n_keys = 0, i; + + rcu_read_lock(); + if (icsk) + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + else + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctx) { + n_keys = tcp_fastopen_context_len(ctx); + for (i = 0; i < n_keys; i++) { + put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); + put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); + } + } + rcu_read_unlock(); + + return n_keys; +} + static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, const siphash_key_t *key, -- cgit From 444da3f52407d74c9aa12187ac6b01f76ee47d62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 11:21:11 -0700 Subject: bitfield.h: don't compile-time validate _val in FIELD_FIT When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the compiler to deduce a case where _val can only have the value of -1 at compile time. Specifically, /* struct bpf_insn: _s32 imm */ u64 imm = insn->imm; /* sign extend */ if (imm >> 32) { /* non-zero only if insn->imm is negative */ /* inlined from ur_load_imm_any */ u32 __imm = imm >> 32; /* therefore, always 0xffffffff */ if (__builtin_constant_p(__imm) && __imm > 255) compiletime_assert_XXX() This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that checks that a given value is representable in one byte (interpreted as unsigned). FIELD_FIT() should return true or false at runtime for whether a value can fit for not. Don't break the build over a value that's too large for the mask. We'd prefer to keep the inlining and compiler optimizations though we know this case will always return false. Cc: stable@vger.kernel.org Fixes: 1697599ee301a ("bitfield.h: add FIELD_FIT() helper") Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjXpEUL7d=V0CXiAXcNw@mail.gmail.com/ Reported-by: Masahiro Yamada Debugged-by: Sami Tolvanen Signed-off-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) -- cgit From b0294f30256bb6023b2044fd607855123863d98f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Aug 2020 20:32:48 -0700 Subject: time: Delete repeated words in comments Drop repeated words in kernel/time/. {when, one, into} Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20200807033248.8452-1-rdunlap@infradead.org --- kernel/time/alarmtimer.c | 2 +- kernel/time/sched_clock.c | 2 +- kernel/time/timekeeping.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2ffb466af77e..ca223a89530a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -192,7 +192,7 @@ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) * When a alarm timer fires, this runs through the timerqueue to * see which alarms expired, and runs those. If there are more alarm * timers queued for the future, we set the hrtimer to fire when - * when the next future alarm timer expires. + * the next future alarm timer expires. */ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) { diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 0deaf4b79fb4..1c03eec6ca9b 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -229,7 +229,7 @@ void __init generic_sched_clock_init(void) { /* * If no sched_clock() function has been provided at that point, - * make it the final one one. + * make it the final one. */ if (cd.actual_read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4c7212f3c603..35cd10f1143b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2001,7 +2001,7 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) * logarithmic_accumulation - shifted accumulation of cycles * * This functions accumulates a shifted interval of cycles into - * into a shifted interval nanoseconds. Allows for O(log) accumulation + * a shifted interval nanoseconds. Allows for O(log) accumulation * loop. * * Returns the unconsumed cycles. -- cgit From 0ba9c9edcd152158a0e321a4c13ac1dfc571ff3d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Aug 2020 19:41:50 -0600 Subject: io_uring: use TWA_SIGNAL for task_work uncondtionally An earlier commit: b7db41c9e03b ("io_uring: fix regression with always ignoring signals in io_cqring_wait()") ensured that we didn't get stuck waiting for eventfd reads when it's registered with the io_uring ring for event notification, but we still have cases where the task can be waiting on other events in the kernel and need a bigger nudge to make forward progress. Or the task could be in the kernel and running, but on its way to blocking. This means that TWA_RESUME cannot reliably be used to ensure we make progress. Use TWA_SIGNAL unconditionally. Cc: stable@vger.kernel.org # v5.7+ Reported-by: Josef Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e9b27cdaa735..af6811ddcfbd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1710,22 +1710,22 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb) { struct task_struct *tsk = req->task; struct io_ring_ctx *ctx = req->ctx; - int ret, notify = TWA_RESUME; + int ret, notify; /* - * SQPOLL kernel thread doesn't need notification, just a wakeup. - * If we're not using an eventfd, then TWA_RESUME is always fine, - * as we won't have dependencies between request completions for - * other kernel wait conditions. + * SQPOLL kernel thread doesn't need notification, just a wakeup. For + * all other cases, use TWA_SIGNAL unconditionally to ensure we're + * processing task_work. There's no reliable way to tell if TWA_RESUME + * will do the job. */ - if (ctx->flags & IORING_SETUP_SQPOLL) - notify = 0; - else if (ctx->cq_ev_fd) + notify = 0; + if (!(ctx->flags & IORING_SETUP_SQPOLL)) notify = TWA_SIGNAL; ret = task_work_add(tsk, cb, notify); if (!ret) wake_up_process(tsk); + return ret; } -- cgit From 7271ef3a93a832180068c7aade3f130b7f39b17e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 09:55:22 -0600 Subject: io_uring: fix recursive completion locking on oveflow flush syszbot reports a scenario where we recurse on the completion lock when flushing an overflow: 1 lock held by syz-executor287/6816: #0: ffff888093cdb4d8 (&ctx->completion_lock){....}-{2:2}, at: io_cqring_overflow_flush+0xc6/0xab0 fs/io_uring.c:1333 stack backtrace: CPU: 1 PID: 6816 Comm: syz-executor287 Not tainted 5.8.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1f0/0x31e lib/dump_stack.c:118 print_deadlock_bug kernel/locking/lockdep.c:2391 [inline] check_deadlock kernel/locking/lockdep.c:2432 [inline] validate_chain+0x69a4/0x88a0 kernel/locking/lockdep.c:3202 __lock_acquire+0x1161/0x2ab0 kernel/locking/lockdep.c:4426 lock_acquire+0x160/0x730 kernel/locking/lockdep.c:5005 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline] _raw_spin_lock_irq+0x67/0x80 kernel/locking/spinlock.c:167 spin_lock_irq include/linux/spinlock.h:379 [inline] io_queue_linked_timeout fs/io_uring.c:5928 [inline] __io_queue_async_work fs/io_uring.c:1192 [inline] __io_queue_deferred+0x36a/0x790 fs/io_uring.c:1237 io_cqring_overflow_flush+0x774/0xab0 fs/io_uring.c:1359 io_ring_ctx_wait_and_kill+0x2a1/0x570 fs/io_uring.c:7808 io_uring_release+0x59/0x70 fs/io_uring.c:7829 __fput+0x34f/0x7b0 fs/file_table.c:281 task_work_run+0x137/0x1c0 kernel/task_work.c:135 exit_task_work include/linux/task_work.h:25 [inline] do_exit+0x5f3/0x1f20 kernel/exit.c:806 do_group_exit+0x161/0x2d0 kernel/exit.c:903 __do_sys_exit_group+0x13/0x20 kernel/exit.c:914 __se_sys_exit_group+0x10/0x10 kernel/exit.c:912 __x64_sys_exit_group+0x37/0x40 kernel/exit.c:912 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by passing back the link from __io_queue_async_work(), and then let the caller handle the queueing of the link. Take care to also punt the submission reference put to the caller, as we're holding the completion lock for the __io_queue_defer() case. Hence we need to mark the io_kiocb appropriately for that case. Reported-by: syzbot+996f91b6ec3812c48042@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index af6811ddcfbd..360649041bfa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -898,6 +898,7 @@ static void io_put_req(struct io_kiocb *req); static void io_double_put_req(struct io_kiocb *req); static void __io_double_put_req(struct io_kiocb *req); static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req); +static void __io_queue_linked_timeout(struct io_kiocb *req); static void io_queue_linked_timeout(struct io_kiocb *req); static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, @@ -1179,7 +1180,7 @@ static void io_prep_async_link(struct io_kiocb *req) io_prep_async_work(cur); } -static void __io_queue_async_work(struct io_kiocb *req) +static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *link = io_prep_linked_timeout(req); @@ -1187,16 +1188,19 @@ static void __io_queue_async_work(struct io_kiocb *req) trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); io_wq_enqueue(ctx->io_wq, &req->work); - - if (link) - io_queue_linked_timeout(link); + return link; } static void io_queue_async_work(struct io_kiocb *req) { + struct io_kiocb *link; + /* init ->work of the whole link before punting */ io_prep_async_link(req); - __io_queue_async_work(req); + link = __io_queue_async_work(req); + + if (link) + io_queue_linked_timeout(link); } static void io_kill_timeout(struct io_kiocb *req) @@ -1229,12 +1233,19 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) do { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); + struct io_kiocb *link; if (req_need_defer(de->req, de->seq)) break; list_del_init(&de->list); /* punt-init is done before queueing for defer */ - __io_queue_async_work(de->req); + link = __io_queue_async_work(de->req); + if (link) { + __io_queue_linked_timeout(link); + /* drop submission reference */ + link->flags |= REQ_F_COMP_LOCKED; + io_put_req(link); + } kfree(de); } while (!list_empty(&ctx->defer_list)); } @@ -5939,15 +5950,12 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void io_queue_linked_timeout(struct io_kiocb *req) +static void __io_queue_linked_timeout(struct io_kiocb *req) { - struct io_ring_ctx *ctx = req->ctx; - /* * If the list is now empty, then our linked request finished before * we got a chance to setup the timer */ - spin_lock_irq(&ctx->completion_lock); if (!list_empty(&req->link_list)) { struct io_timeout_data *data = &req->io->timeout; @@ -5955,6 +5963,14 @@ static void io_queue_linked_timeout(struct io_kiocb *req) hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); } +} + +static void io_queue_linked_timeout(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + __io_queue_linked_timeout(req); spin_unlock_irq(&ctx->completion_lock); /* drop submission reference */ -- cgit From 9b7adba9eaec28e0e4343c96d0dbeb9578802f5f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 10:54:02 -0600 Subject: io_uring: add missing REQ_F_COMP_LOCKED for nested requests When we traverse into failing links or timeouts, we need to ensure we propagate the REQ_F_COMP_LOCKED flag to ensure that we correctly signal to the completion side that we already hold the completion lock. Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 360649041bfa..56115cb4b9fa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1609,6 +1609,7 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) return false; list_del_init(&link->link_list); + link->flags |= REQ_F_COMP_LOCKED; wake_ev = io_link_cancel_timeout(link); req->flags &= ~REQ_F_LINK_TIMEOUT; return wake_ev; @@ -1667,6 +1668,7 @@ static void __io_fail_links(struct io_kiocb *req) trace_io_uring_fail_link(req, link); io_cqring_fill_event(link, -ECANCELED); + link->flags |= REQ_F_COMP_LOCKED; __io_double_put_req(link); req->flags &= ~REQ_F_LINK_TIMEOUT; } @@ -5071,6 +5073,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) return -EALREADY; req_set_fail_links(req); + req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, -ECANCELED); io_put_req(req); return 0; -- cgit From 51a4cc112c7a42b62a91bcccdfac42e7c4561729 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 10:55:56 -0600 Subject: io_uring: defer file table grabbing request cleanup for locked requests If we're in the error path failing links and we have a link that has grabbed a reference to the fs_struct, then we cannot safely drop our reference to the table if we already hold the completion lock. This adds a hardirq dependency to the fs_struct->lock, which it currently doesn't have. Defer the final cleanup and free of such requests to avoid adding this dependency. Reported-by: syzbot+ef4b654b49ed7ff049bf@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 56115cb4b9fa..5488698189da 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1108,10 +1108,16 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) } } -static void io_req_clean_work(struct io_kiocb *req) +/* + * Returns true if we need to defer file table putting. This can only happen + * from the error path with REQ_F_COMP_LOCKED set. + */ +static bool io_req_clean_work(struct io_kiocb *req) { if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return; + return false; + + req->flags &= ~REQ_F_WORK_INITIALIZED; if (req->work.mm) { mmdrop(req->work.mm); @@ -1124,6 +1130,9 @@ static void io_req_clean_work(struct io_kiocb *req) if (req->work.fs) { struct fs_struct *fs = req->work.fs; + if (req->flags & REQ_F_COMP_LOCKED) + return true; + spin_lock(&req->work.fs->lock); if (--fs->users) fs = NULL; @@ -1132,7 +1141,8 @@ static void io_req_clean_work(struct io_kiocb *req) free_fs_struct(fs); req->work.fs = NULL; } - req->flags &= ~REQ_F_WORK_INITIALIZED; + + return false; } static void io_prep_async_work(struct io_kiocb *req) @@ -1544,7 +1554,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, fput(file); } -static void io_dismantle_req(struct io_kiocb *req) +static bool io_dismantle_req(struct io_kiocb *req) { io_clean_op(req); @@ -1552,7 +1562,6 @@ static void io_dismantle_req(struct io_kiocb *req) kfree(req->io); if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - io_req_clean_work(req); if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; @@ -1564,15 +1573,15 @@ static void io_dismantle_req(struct io_kiocb *req) wake_up(&ctx->inflight_wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); } + + return io_req_clean_work(req); } -static void __io_free_req(struct io_kiocb *req) +static void __io_free_req_finish(struct io_kiocb *req) { - struct io_ring_ctx *ctx; + struct io_ring_ctx *ctx = req->ctx; - io_dismantle_req(req); __io_put_req_task(req); - ctx = req->ctx; if (likely(!io_is_fallback_req(req))) kmem_cache_free(req_cachep, req); else @@ -1580,6 +1589,39 @@ static void __io_free_req(struct io_kiocb *req) percpu_ref_put(&ctx->refs); } +static void io_req_task_file_table_put(struct callback_head *cb) +{ + struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct fs_struct *fs = req->work.fs; + + spin_lock(&req->work.fs->lock); + if (--fs->users) + fs = NULL; + spin_unlock(&req->work.fs->lock); + if (fs) + free_fs_struct(fs); + req->work.fs = NULL; + __io_free_req_finish(req); +} + +static void __io_free_req(struct io_kiocb *req) +{ + if (!io_dismantle_req(req)) { + __io_free_req_finish(req); + } else { + int ret; + + init_task_work(&req->task_work, io_req_task_file_table_put); + ret = task_work_add(req->task, &req->task_work, TWA_RESUME); + if (unlikely(ret)) { + struct task_struct *tsk; + + tsk = io_wq_get_task(req->ctx->io_wq); + task_work_add(tsk, &req->task_work, 0); + } + } +} + static bool io_link_cancel_timeout(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1868,7 +1910,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) req->flags &= ~REQ_F_TASK_PINNED; } - io_dismantle_req(req); + WARN_ON_ONCE(io_dismantle_req(req)); rb->reqs[rb->to_free++] = req; if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) __io_req_free_batch_flush(req->ctx, rb); -- cgit From 4f88b4ccb7036ce523b3c031d1226d6eda36417b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:51:44 -0500 Subject: drm/vmwgfx: Use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 126f93c0b0b8..3914bfee0533 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1969,7 +1969,7 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) num_mip = 1; num_subres = num_layers * num_mip; - dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]); + dirty_size = struct_size(dirty, boxes, num_subres); acc_size = ttm_round_pot(dirty_size); ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv), acc_size, &ctx); -- cgit From 1d2c0c565bc0da25f5e899a862fb58e612b222df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jun 2020 13:34:37 +0300 Subject: drm/vmwgfx: Use correct vmw_legacy_display_unit pointer The "entry" pointer is an offset from the list head and it doesn't point to a valid vmw_legacy_display_unit struct. Presumably the intent was to point to the last entry. Also the "i++" wasn't used so I have removed that as well. Fixes: d7e1958dbe4a ("drm/vmwgfx: Support older hardware.") Signed-off-by: Dan Carpenter Reviewed-by: Roland Scheidegger Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 16dafff5cab1..009f1742bed5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -81,7 +81,7 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) struct vmw_legacy_display_unit *entry; struct drm_framebuffer *fb = NULL; struct drm_crtc *crtc = NULL; - int i = 0; + int i; /* If there is no display topology the host just assumes * that the guest will set the same layout as the host. @@ -92,12 +92,11 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) crtc = &entry->base.crtc; w = max(w, crtc->x + crtc->mode.hdisplay); h = max(h, crtc->y + crtc->mode.vdisplay); - i++; } if (crtc == NULL) return 0; - fb = entry->base.crtc.primary->state->fb; + fb = crtc->primary->state->fb; return vmw_kms_write_svga(dev_priv, w, h, fb->pitches[0], fb->format->cpp[0] * 8, -- cgit From 4437c1152ce0e57ab8f401aa696ea6291cc07ab1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jun 2020 13:39:59 +0300 Subject: drm/vmwgfx: Fix two list_for_each loop exit tests These if statements are supposed to be true if we ended the list_for_each_entry() loops without hitting a break statement but they don't work. In the first loop, we increment "i" after the "if (i == unit)" condition so we don't necessarily know that "i" is not equal to unit at the end of the loop. In the second loop we exit when mode is not pointing to a valid drm_display_mode struct so it doesn't make sense to check "mode->type". Fixes: a278724aa23c ("drm/vmwgfx: Implement fbdev on kms v2") Signed-off-by: Dan Carpenter Reviewed-by: Roland Scheidegger Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index bbce45d142aa..6e9a6135533a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2575,7 +2575,7 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, ++i; } - if (i != unit) { + if (&con->head == &dev_priv->dev->mode_config.connector_list) { DRM_ERROR("Could not find initial display unit.\n"); ret = -EINVAL; goto out_unlock; @@ -2599,13 +2599,13 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, break; } - if (mode->type & DRM_MODE_TYPE_PREFERRED) - *p_mode = mode; - else { + if (&mode->head == &con->modes) { WARN_ONCE(true, "Could not find initial preferred mode.\n"); *p_mode = list_first_entry(&con->modes, struct drm_display_mode, head); + } else { + *p_mode = mode; } out_unlock: -- cgit From 68745d1edf1700a668c15ecbed466d18f14c7e9b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 6 Aug 2020 17:42:27 +0200 Subject: drm/vmwgfx/stdu: Use drm_mode_config_reset When converting to atomic the state reset was done by directly calling the functions, and before the modeset object was fully initialized. This means the various ->dev pointers weren't set up. After commit 51f644b40b4b794b28b982fdd5d0dd8ee63f9272 Author: Daniel Vetter Date: Fri Jun 12 18:00:49 2020 +0200 drm/atomic-helper: reset vblank on crtc reset this started to oops because now we're trying to derefence drm_crtc->dev. Fix this up by entirely switching over to drm_mode_config_reset, called once everything is set up. Fixes: 51f644b40b4b ("drm/atomic-helper: reset vblank on crtc reset") Reported-by: Tetsuo Handa Cc: Tetsuo Handa Signed-off-by: Daniel Vetter Tested-by: Roland Scheidegger Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 16b385629688..cf3aafd00837 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1738,8 +1738,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) stdu->base.is_implicit = false; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, primary, 0, &vmw_stdu_plane_funcs, vmw_primary_plane_formats, @@ -1754,8 +1752,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_enable_fb_damage_clips(primary); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, cursor, 0, &vmw_stdu_cursor_funcs, vmw_cursor_plane_formats, @@ -1769,8 +1765,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_stdu_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); - ret = drm_connector_init(dev, connector, &vmw_stdu_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -1798,7 +1792,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &stdu->base.primary, &stdu->base.cursor, &vmw_stdu_crtc_funcs, NULL); @@ -1894,6 +1887,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv) } } + drm_mode_config_reset(dev); + DRM_INFO("Screen Target Display device initialized\n"); return 0; -- cgit From 1338441cf166e2ef789af5915b961d4e13a4ec31 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 7 Aug 2020 22:03:42 +0200 Subject: drm/vmwgfx/sou: Use drm_mode_config_reset Same problem as in stdu, same fix. Fixes: 51f644b40b4b ("drm/atomic-helper: reset vblank on crtc reset") Acked-by: Charmaine Lee Reviewed-by: Zack Rusin Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 32a22e4eddb1..4bf0f5ec4fc2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -859,8 +859,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) sou->base.is_implicit = false; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, &sou->base.primary, 0, &vmw_sou_plane_funcs, vmw_primary_plane_formats, @@ -875,8 +873,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_enable_fb_damage_clips(primary); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, &sou->base.cursor, 0, &vmw_sou_cursor_funcs, vmw_cursor_plane_formats, @@ -890,7 +886,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_sou_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); ret = drm_connector_init(dev, connector, &vmw_sou_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -918,8 +913,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &sou->base.primary, &sou->base.cursor, &vmw_screen_object_crtc_funcs, NULL); @@ -973,6 +966,8 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) dev_priv->active_display_unit = vmw_du_screen_object; + drm_mode_config_reset(dev); + DRM_INFO("Screen Objects Display Unit initialized\n"); return 0; -- cgit From 981243371a5d832af5bc572071172e955d02fe88 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 7 Aug 2020 22:04:33 +0200 Subject: drm/vmwgfx/ldu: Use drm_mode_config_reset Same problem as in stdu, same fix. Fixes: 51f644b40b4b ("drm/atomic-helper: reset vblank on crtc reset") Acked-by: Charmaine Lee Reviewed-by: Zack Rusin Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 009f1742bed5..c4017c7a24db 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -387,8 +387,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) ldu->base.is_implicit = true; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, &ldu->base.primary, 0, &vmw_ldu_plane_funcs, vmw_primary_plane_formats, @@ -402,8 +400,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(primary, &vmw_ldu_primary_plane_helper_funcs); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, &ldu->base.cursor, 0, &vmw_ldu_cursor_funcs, vmw_cursor_plane_formats, @@ -417,7 +413,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_ldu_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); ret = drm_connector_init(dev, connector, &vmw_legacy_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -445,7 +440,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &ldu->base.primary, &ldu->base.cursor, &vmw_legacy_crtc_funcs, NULL); @@ -520,6 +514,8 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv) dev_priv->active_display_unit = vmw_du_legacy; + drm_mode_config_reset(dev); + DRM_INFO("Legacy Display Unit initialized\n"); return 0; -- cgit From 1ae96fce3e4dfe769515350b7f142b3bca1e0bb6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 5 Aug 2020 12:31:55 +0100 Subject: drm/vmwgfx: fix spelling mistake "Cound" -> "Could" There is a spelling mistake in a DRM_ERROR message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 4284c4bd444d..e67e2e8f6e6f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3037,7 +3037,7 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, res = vmw_dx_streamoutput_lookup(vmw_context_res_man(ctx_node->ctx), cmd->body.soid); if (IS_ERR(res)) { - DRM_ERROR("Cound not find streamoutput to bind.\n"); + DRM_ERROR("Could not find streamoutput to bind.\n"); return PTR_ERR(res); } -- cgit From e97644ebcdc83854e6e29e96285b25042445c28c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Aug 2020 11:04:01 +0100 Subject: drm/vmwgfx: fix spelling mistake "Cant" -> "Can't" There is a spelling mistake in a DRM_ERROR message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 6e9a6135533a..312ed0881a99 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -186,7 +186,7 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, /* TODO handle none page aligned offsets */ /* TODO handle more dst & src != 0 */ /* TODO handle more then one copy */ - DRM_ERROR("Cant snoop dma request for cursor!\n"); + DRM_ERROR("Can't snoop dma request for cursor!\n"); DRM_ERROR("(%u, %u, %u) (%u, %u, %u) (%ux%ux%u) %u %u\n", box->srcx, box->srcy, box->srcz, box->x, box->y, box->z, -- cgit From a13f2ef168cb2a033a284eb841bcc481ffbc90cf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Jun 2020 10:35:39 +0200 Subject: x86/xen: remove 32-bit Xen PV guest support Xen is requiring 64-bit machines today and since Xen 4.14 it can be built without 32-bit PV guest support. There is no need to carry the burden of 32-bit PV guest support in the kernel any longer, as new guests can be either HVM or PVH, or they can use a 64 bit kernel. Remove the 32-bit Xen PV support from the kernel. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/entry/entry_32.S | 109 +----------- arch/x86/entry/vdso/vdso32/note.S | 30 ---- arch/x86/include/asm/proto.h | 2 +- arch/x86/include/asm/segment.h | 2 +- arch/x86/kernel/head_32.S | 31 ---- arch/x86/xen/Kconfig | 3 +- arch/x86/xen/apic.c | 17 -- arch/x86/xen/enlighten_pv.c | 64 +------ arch/x86/xen/mmu_pv.c | 356 +++++--------------------------------- arch/x86/xen/p2m.c | 6 +- arch/x86/xen/setup.c | 36 +--- arch/x86/xen/smp_pv.c | 18 -- arch/x86/xen/vdso.h | 6 - arch/x86/xen/xen-asm.S | 14 -- arch/x86/xen/xen-asm_32.S | 185 -------------------- arch/x86/xen/xen-head.S | 6 - arch/x86/xen/xen-ops.h | 1 - drivers/xen/Kconfig | 4 +- 18 files changed, 50 insertions(+), 840 deletions(-) delete mode 100644 arch/x86/xen/vdso.h delete mode 100644 arch/x86/xen/xen-asm_32.S diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2d0bd5d5f032..494d3224fa8a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -449,8 +449,6 @@ .macro SWITCH_TO_KERNEL_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - BUG_IF_WRONG_CR3 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax @@ -599,8 +597,6 @@ */ .macro SWITCH_TO_ENTRY_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - /* Bytes to copy */ movl $PTREGS_SIZE, %ecx @@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN_PV -/* - * Xen doesn't set %esp to be precisely what the normal SYSENTER - * entry point expects, so fix it up before using the normal path. - */ -SYM_CODE_START(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp .Lsysenter_past_esp -SYM_CODE_END(xen_sysenter_target) -#endif - /* * 32-bit SYSENTER entry. * @@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32) movl %esp, %eax call do_SYSENTER_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + testl %eax, %eax + jz .Lsyscall_32_done STACKLEAK_ERASE @@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -#ifdef CONFIG_PARAVIRT -SYM_CODE_START(native_iret) - iret - _ASM_EXTABLE(native_iret, asm_iret_error) -SYM_CODE_END(native_iret) -#endif - -#ifdef CONFIG_XEN_PV -/* - * See comment in entry_64.S for further explanation - * - * Note: This is not an actual IDT entry point. It's a XEN specific entry - * point and therefore named to match the 64-bit trampoline counterpart. - */ -SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback) - /* - * Check to see if we got the event in the critical - * region in xen_iret_direct, after we've reenabled - * events and checked for pending events. This simulates - * iret instruction's behaviour where it delivers a - * pending interrupt when enabling interrupts: - */ - cmpl $xen_iret_start_crit, (%esp) - jb 1f - cmpl $xen_iret_end_crit, (%esp) - jae 1f - call xen_iret_crit_fixup -1: - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - - mov %esp, %eax - call xen_pv_evtchn_do_upcall - jmp handle_exception_return -SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback) - -/* - * Hypervisor uses this for application faults while it executes. - * We get here for two reasons: - * 1. Fault while reloading DS, ES, FS or GS - * 2. Fault while executing IRET - * Category 1 we fix up by reattempting the load, and zeroing the segment - * register if the load fails. - * Category 2 we fix up by jumping to do_iret_error. We cannot use the - * normal Linux return path in this case because if we use the IRET hypercall - * to pop the stack frame we end up in an infinite loop of failsafe callbacks. - * We distinguish between categories by maintaining a status value in EAX. - */ -SYM_FUNC_START(xen_failsafe_callback) - pushl %eax - movl $1, %eax -1: mov 4(%esp), %ds -2: mov 8(%esp), %es -3: mov 12(%esp), %fs -4: mov 16(%esp), %gs - /* EAX == 0 => Category 1 (Bad segment) - EAX != 0 => Category 2 (Bad IRET) */ - testl %eax, %eax - popl %eax - lea 16(%esp), %esp - jz 5f - jmp asm_iret_error -5: pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - jmp handle_exception_return - -.section .fixup, "ax" -6: xorl %eax, %eax - movl %eax, 4(%esp) - jmp 1b -7: xorl %eax, %eax - movl %eax, 8(%esp) - jmp 2b -8: xorl %eax, %eax - movl %eax, 12(%esp) - jmp 3b -9: xorl %eax, %eax - movl %eax, 16(%esp) - jmp 4b -.previous - _ASM_EXTABLE(1b, 6b) - _ASM_EXTABLE(2b, 7b) - _ASM_EXTABLE(3b, 8b) - _ASM_EXTABLE(4b, 9b) -SYM_FUNC_END(xen_failsafe_callback) -#endif /* CONFIG_XEN_PV */ - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index e78047d119f6..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a") ELFNOTE_END BUILD_SALT - -#ifdef CONFIG_XEN -/* - * Add a special note telling glibc's dynamic linker a fake hardware - * flavor that it will use to choose the search path for libraries in the - * same way it uses real hardware capabilities like "mmx". - * We supply "nosegneg" as the fake capability, to indicate that we - * do not like negative offsets in instructions using segment overrides, - * since we implement those inefficiently. This makes it possible to - * install libraries optimized to avoid those access patterns in someplace - * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file - * corresponding to the bits here is needed to make ldconfig work right. - * It should contain: - * hwcap 1 nosegneg - * to match the mapping of bit to name that we give here. - * - * At runtime, the fake hardware feature will be considered to be present - * if its bit is set in the mask word. So, we start with the mask 0, and - * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. - */ - -#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ - -ELFNOTE_START(GNU, 2, "a") - .long 1 /* ncaps */ -VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */ - .long 0 /* mask */ - .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ -ELFNOTE_END -#endif diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6e81788a30c1..28996fe19301 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_INT80_compat(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); #endif #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 6669164abadc..9646c300f128 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f66a6b90f954..7ed84c282233 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -134,38 +134,7 @@ SYM_CODE_START(startup_32) movl %eax,pa(initial_page_table+0xffc) #endif -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb .Ldefault_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae .Lbad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -.Lbad_subarch: -SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long .Ldefault_entry /* normal x86/PC */ - .long xen_entry /* Xen hypervisor */ - .long .Ldefault_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#else jmp .Ldefault_entry -#endif /* CONFIG_PARAVIRT */ SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1aded63a95cb..218acbd5c7a0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -19,6 +19,7 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN + depends on X86_64 select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU @@ -50,7 +51,7 @@ config XEN_PVHVM_SMP config XEN_512GB bool "Limit Xen pv-domain memory to 512GB" - depends on XEN_PV && X86_64 + depends on XEN_PV default y help Limit paravirtualized user domains to 512GB of RAM. diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 5e53bfbe5823..ea6e9c54da9d 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -58,10 +58,6 @@ static u32 xen_apic_read(u32 reg) if (reg == APIC_LVR) return 0x14; -#ifdef CONFIG_X86_32 - if (reg == APIC_LDR) - return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); -#endif if (reg != APIC_ID) return 0; @@ -127,14 +123,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -#ifdef CONFIG_X86_32 -static int xen_x86_32_early_logical_apicid(int cpu) -{ - /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ - return 1 << cpu; -} -#endif - static void xen_noop(void) { } @@ -197,11 +185,6 @@ static struct apic xen_pv_apic = { .icr_write = xen_apic_icr_write, .wait_icr_idle = xen_noop, .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 - /* generic_processor_info and setup_local_APIC. */ - .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, -#endif }; static void __init xen_apic_check(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c46b9f2e732f..2831f009ef78 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -119,14 +119,6 @@ static void __init xen_banner(void) printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); - -#ifdef CONFIG_X86_32 - pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n" - "Support for running as 32-bit PV-guest under Xen will soon be removed\n" - "from the Linux kernel!\n" - "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n" - "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"); -#endif } static void __init xen_pv_init_platform(void) @@ -538,30 +530,12 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone - * and lazy gs handling is enabled, it means we're in a - * context switch, and %gs has just been saved. This means we - * can zero it out to prevent faults on exit from the - * hypervisor if the next process has no %gs. Either way, it - * has been saved, and the new value will get loaded properly. - * This will go away as soon as Xen has been modified to not - * save/restore %gs for normal hypercalls. - * - * On x86_64, this hack is not used for %gs, because gs points - * to KERNEL_GS_BASE (and uses it for PDA references), so we - * must not zero %gs on x86_64 - * - * For x86_64, we need to zero %fs, otherwise we may get an + * In lazy mode we need to zero %fs, otherwise we may get an * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { -#ifdef CONFIG_X86_32 - lazy_load_gs(0); -#else + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(fs, 0); -#endif - } xen_mc_batch(); @@ -572,13 +546,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) xen_mc_issue(PARAVIRT_LAZY_CPU); } -#ifdef CONFIG_X86_64 static void xen_load_gs_index(unsigned int idx) { if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) BUG(); } -#endif static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) @@ -597,7 +569,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } -#ifdef CONFIG_X86_64 void noist_exc_debug(struct pt_regs *regs); DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) @@ -697,7 +668,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) return true; } -#endif static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) @@ -710,10 +680,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->vector = vector; addr = gate_offset(val); -#ifdef CONFIG_X86_64 if (!get_trap_addr((void **)&addr, val->bits.ist)) return 0; -#endif /* CONFIG_X86_64 */ info->address = addr; info->cs = gate_segment(val); @@ -958,15 +926,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; -#ifdef CONFIG_X86_64 unsigned int which; u64 base; -#endif ret = 0; switch (msr) { -#ifdef CONFIG_X86_64 case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; @@ -976,7 +941,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) if (HYPERVISOR_set_segment_base(which, base) != 0) ret = -EIO; break; -#endif case MSR_STAR: case MSR_CSTAR: @@ -1058,9 +1022,7 @@ void __init xen_setup_vcpu_info_placement(void) static const struct pv_info xen_info __initconst = { .shared_kernel_pmd = 0, -#ifdef CONFIG_X86_64 .extra_user_64bit_cs = FLAT_USER_CS64, -#endif .name = "Xen", }; @@ -1086,18 +1048,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_pmc = xen_read_pmc, .iret = xen_iret, -#ifdef CONFIG_X86_64 .usergs_sysret64 = xen_sysret64, -#endif .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, .load_gdt = xen_load_gdt, .load_idt = xen_load_idt, .load_tls = xen_load_tls, -#ifdef CONFIG_X86_64 .load_gs_index = xen_load_gs_index, -#endif .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, @@ -1364,15 +1322,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ -#ifdef CONFIG_X86_32 - pv_info.kernel_rpl = 1; - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - pv_info.kernel_rpl = 0; -#else pv_info.kernel_rpl = 0; -#endif - /* set the limit of our address space */ - xen_reserve_top(); /* * We used to do this in xen_arch_setup, but that is too late @@ -1384,12 +1334,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (rc != 0) xen_raw_printk("physdev_op failed %d\n", rc); -#ifdef CONFIG_X86_32 - /* set up basic CPUID stuff */ - cpu_detect(&new_cpu_data); - set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); - new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); -#endif if (xen_start_info->mod_start) { if (xen_start_info->flags & SIF_MOD_START_PFN) @@ -1458,12 +1402,8 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_efi_init(&boot_params); /* Start the world */ -#ifdef CONFIG_X86_32 - i386_start_kernel(); -#else cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ x86_64_start_reservations((char *)__pa_symbol(&boot_params)); -#endif } static int xen_cpu_up_prepare_pv(unsigned int cpu) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index a58d9c69807a..ff99e20a84e3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,19 +86,8 @@ #include "mmu.h" #include "debugfs.h" -#ifdef CONFIG_X86_32 -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) -static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); -#endif -#ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ /* * Protects atomic reservation decrease/increase against concurrent increases. @@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) if (!xen_batched_set_pte(ptep, pteval)) { /* * Could call native_set_pte() here and trap and - * emulate the PTE write but with 32-bit guests this - * needs two traps (one for each of the two 32-bit - * words in the PTE) so do one hypercall directly - * instead. + * emulate the PTE write, but a hypercall is much cheaper. */ struct mmu_update u; @@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val) xen_set_pud_hyper(ptr, val); } -#ifdef CONFIG_X86_PAE -static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - trace_xen_mmu_set_pte_atomic(ptep, pte); - __xen_set_pte(ptep, pte); -} - -static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - trace_xen_mmu_pte_clear(mm, addr, ptep); - __xen_set_pte(ptep, native_make_pte(0)); -} - -static void xen_pmd_clear(pmd_t *pmdp) -{ - trace_xen_mmu_pmd_clear(pmdp); - set_pmd(pmdp, __pmd(0)); -} -#endif /* CONFIG_X86_PAE */ - __visible pmd_t xen_make_pmd(pmdval_t pmd) { pmd = pte_pfn_to_mfn(pmd); @@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#ifdef CONFIG_X86_64 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -571,7 +536,6 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ -#endif /* CONFIG_X86_64 */ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, int (*func)(struct mm_struct *mm, struct page *, enum pt_level), @@ -636,11 +600,8 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, * will be STACK_TOP_MAX, but at boot we need to pin up to * FIXADDR_TOP. * - * For 32-bit the important bit is that we don't pin beyond there, - * because then we start getting into Xen's ptes. - * - * For 64-bit, we must skip the Xen hole in the middle of the address - * space, just after the big x86-64 virtual hole. + * We must skip the Xen hole in the middle of the address space, just after + * the big x86-64 virtual hole. */ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, int (*func)(struct mm_struct *mm, struct page *, @@ -654,14 +615,12 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, limit--; BUG_ON(limit >= FIXADDR_TOP); -#ifdef CONFIG_X86_64 /* * 64-bit has a great big hole in the middle of the address * space, which contains the Xen mappings. */ hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); hole_high = pgd_index(GUARD_HOLE_END_ADDR); -#endif nr = pgd_index(limit) + 1; for (i = 0; i < nr; i++) { @@ -787,6 +746,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_pin(mm, pgd); xen_mc_batch(); @@ -800,26 +761,14 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) xen_mc_batch(); } -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - - if (user_pgd) { - xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); - xen_do_pin(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa(user_pgd))); - } + if (user_pgd) { + xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); + xen_do_pin(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa(user_pgd))); } -#else /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); -#endif /* CONFIG_X86_64 */ + xen_mc_issue(0); } @@ -870,9 +819,7 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static void __init xen_after_bootmem(void) { static_branch_enable(&xen_struct_pages_ready); -#ifdef CONFIG_X86_64 SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -919,29 +866,19 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, /* Release a pagetables pages back as normal RW */ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_unpin(mm, pgd); xen_mc_batch(); xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) { - xen_do_pin(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(user_pgd))); - xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); - } + if (user_pgd) { + xen_do_pin(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(user_pgd))); + xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); } -#endif - -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); @@ -1089,7 +1026,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) BUG(); } -#ifdef CONFIG_X86_64 static void __init xen_cleanhighmap(unsigned long vaddr, unsigned long vaddr_end) { @@ -1273,17 +1209,15 @@ static void __init xen_pagetable_cleanhighmap(void) xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); } -#endif static void __init xen_pagetable_p2m_setup(void) { xen_vmalloc_p2m_tree(); -#ifdef CONFIG_X86_64 xen_pagetable_p2m_free(); xen_pagetable_cleanhighmap(); -#endif + /* And revector! Bye bye old array */ xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; } @@ -1420,6 +1354,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) } static void xen_write_cr3(unsigned long cr3) { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + BUG_ON(preemptible()); xen_mc_batch(); /* disables interrupts */ @@ -1430,20 +1366,14 @@ static void xen_write_cr3(unsigned long cr3) __xen_write_cr3(true, cr3); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#ifdef CONFIG_X86_64 /* * At the start of the day - when Xen launches a guest, it has already * built pagetables for the guest. We diligently look over them @@ -1478,49 +1408,39 @@ static void __init xen_write_cr3_init(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#endif static int xen_pgd_alloc(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; - int ret = 0; + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + int ret = -ENOMEM; BUG_ON(PagePinned(virt_to_page(pgd))); + BUG_ON(page->private != 0); -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; - if (user_pgd != NULL) { + if (user_pgd != NULL) { #ifdef CONFIG_X86_VSYSCALL_EMULATION - user_pgd[pgd_index(VSYSCALL_ADDR)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + user_pgd[pgd_index(VSYSCALL_ADDR)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); #endif - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + ret = 0; } -#endif + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + return ret; } static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { -#ifdef CONFIG_X86_64 pgd_t *user_pgd = xen_get_user_pgd(pgd); if (user_pgd) free_page((unsigned long)user_pgd); -#endif } /* @@ -1539,7 +1459,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) */ __visible pte_t xen_make_pte_init(pteval_t pte) { -#ifdef CONFIG_X86_64 unsigned long pfn; /* @@ -1553,7 +1472,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte) pfn >= xen_start_info->first_p2m_pfn && pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) pte &= ~_PAGE_RW; -#endif + pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } @@ -1561,13 +1480,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { -#ifdef CONFIG_X86_32 - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_mfn(pte) != INVALID_P2M_ENTRY - && pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); -#endif __xen_set_pte(ptep, pte); } @@ -1702,7 +1614,6 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#ifdef CONFIG_X86_64 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -1712,20 +1623,6 @@ static void xen_release_pud(unsigned long pfn) { xen_release_ptpage(pfn, PT_PUD); } -#endif - -void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} /* * Like __va(), but returns address in the kernel mapping (which is @@ -1733,11 +1630,7 @@ void __init xen_reserve_top(void) */ static void * __init __ka(phys_addr_t paddr) { -#ifdef CONFIG_X86_64 return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif } /* Convert a machine address to physical address */ @@ -1771,56 +1664,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot) { return set_page_prot_flags(addr, prot, UVMF_NONE); } -#ifdef CONFIG_X86_32 -static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, - PAGE_SIZE); - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == LEVEL1_IDENT_ENTRIES) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} -#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1831,13 +1675,8 @@ void __init xen_setup_machphys_mapping(void) } else { machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } -#ifdef CONFIG_X86_32 - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) - < machine_to_phys_mapping); -#endif } -#ifdef CONFIG_X86_64 static void __init convert_pfn_mfn(void *v) { pte_t *pte = v; @@ -2168,105 +2007,6 @@ void __init xen_relocate_p2m(void) xen_start_info->nr_p2m_frames = n_frames; } -#else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); -RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE); -RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE); - -static void __init xen_write_cr3_init(unsigned long cr3) -{ - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); - - BUG_ON(read_cr3_pa() != __pa(initial_page_table)); - BUG_ON(cr3 != __pa(swapper_pg_dir)); - - /* - * We are switching to swapper_pg_dir for the first time (from - * initial_page_table) and therefore need to mark that page - * read-only and then pin it. - * - * Xen disallows sharing of kernel PMDs for PAE - * guests. Therefore we must copy the kernel PMD from - * initial_page_table into a new kernel PMD to be used in - * swapper_pg_dir. - */ - swapper_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - copy_page(swapper_kernel_pmd, initial_kernel_pmd); - swapper_pg_dir[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); - - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - xen_write_cr3(cr3); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(initial_page_table))); - set_page_prot(initial_page_table, PAGE_KERNEL); - set_page_prot(initial_kernel_pmd, PAGE_KERNEL); - - pv_ops.mmu.write_cr3 = &xen_write_cr3; -} - -/* - * For 32 bit domains xen_start_info->pt_base is the pgd address which might be - * not the first page table in the page table pool. - * Iterate through the initial page tables to find the real page table base. - */ -static phys_addr_t __init xen_find_pt_base(pmd_t *pmd) -{ - phys_addr_t pt_base, paddr; - unsigned pmdidx; - - pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd)); - - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) - if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) { - paddr = m2p(pmd[pmdidx].pmd); - pt_base = min(pt_base, paddr); - } - - return pt_base; -} - -void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - - xen_pt_base = xen_find_pt_base(kernel_pmd); - xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE; - - initial_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - - max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024); - - copy_page(initial_kernel_pmd, kernel_pmd); - - xen_map_identity_early(initial_kernel_pmd, max_pfn); - - copy_page(initial_page_table, pgd); - initial_page_table[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); - set_page_prot(initial_page_table, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, - PFN_DOWN(__pa(initial_page_table))); - xen_write_cr3(__pa(initial_page_table)); - - memblock_reserve(xen_pt_base, xen_pt_size); -} -#endif /* CONFIG_X86_64 */ - void __init xen_reserve_special_pages(void) { phys_addr_t paddr; @@ -2300,12 +2040,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#elif defined(CONFIG_X86_VSYSCALL_EMULATION) +#ifdef CONFIG_X86_VSYSCALL_EMULATION case VSYSCALL_PAGE: #endif /* All local page mappings */ @@ -2357,9 +2092,7 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.set_pte = xen_set_pte; pv_ops.mmu.set_pmd = xen_set_pmd; pv_ops.mmu.set_pud = xen_set_pud; -#ifdef CONFIG_X86_64 pv_ops.mmu.set_p4d = xen_set_p4d; -#endif /* This will work as long as patching hasn't happened yet (which it hasn't) */ @@ -2367,15 +2100,11 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.alloc_pmd = xen_alloc_pmd; pv_ops.mmu.release_pte = xen_release_pte; pv_ops.mmu.release_pmd = xen_release_pmd; -#ifdef CONFIG_X86_64 pv_ops.mmu.alloc_pud = xen_alloc_pud; pv_ops.mmu.release_pud = xen_release_pud; -#endif pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); -#ifdef CONFIG_X86_64 pv_ops.mmu.write_cr3 = &xen_write_cr3; -#endif } static void xen_leave_lazy_mmu(void) @@ -2420,17 +2149,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#ifdef CONFIG_X86_64 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_p4d = xen_set_p4d_hyper, @@ -2442,7 +2165,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), #endif -#endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 0acba2c712ab..be4151f42611 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) if (type == P2M_TYPE_PFN || i < chunk) { /* Use initial p2m page contents. */ -#ifdef CONFIG_X86_64 mfns = alloc_p2m_page(); copy_page(mfns, xen_p2m_addr + pfn); -#else - mfns = xen_p2m_addr + pfn; -#endif ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); @@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual - * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! + * pmd. */ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3566e37241d7..7eab14d56369 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -32,7 +32,6 @@ #include #include #include "xen-ops.h" -#include "vdso.h" #include "mmu.h" #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) @@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void) { unsigned long limit; -#ifdef CONFIG_X86_32 - limit = GB(64) / PAGE_SIZE; -#else limit = MAXMEM / PAGE_SIZE; if (!xen_initial_domain() && xen_512gb_limit) limit = GB(512) / PAGE_SIZE; -#endif + return limit; } @@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void) if (!xen_is_e820_reserved(start, size)) return; -#ifdef CONFIG_X86_32 - /* - * Relocating the p2m on 32 bit system to an arbitrary virtual address - * is not supported, so just give up. - */ - xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n"); - BUG(); -#else xen_relocate_p2m(); memblock_free(start, size); -#endif } /** @@ -921,20 +908,6 @@ char * __init xen_memory_setup(void) return "Xen"; } -/* - * Set the bit indicating "nosegneg" library variants should be used. - * We only need to bother in pure 32-bit mode; compat 32-bit processes - * can have un-truncated segments, so wrapping around is allowed. - */ -static void __init fiddle_vdso(void) -{ -#ifdef CONFIG_X86_32 - u32 *mask = vdso_image_32.data + - vdso_image_32.sym_VDSO32_NOTE_MASK; - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; -#endif -} - static int register_callback(unsigned type, const void *func) { struct callback_register callback = { @@ -951,11 +924,7 @@ void xen_enable_sysenter(void) int ret; unsigned sysenter_feature; -#ifdef CONFIG_X86_32 - sysenter_feature = X86_FEATURE_SEP; -#else sysenter_feature = X86_FEATURE_SYSENTER32; -#endif if (!boot_cpu_has(sysenter_feature)) return; @@ -967,7 +936,6 @@ void xen_enable_sysenter(void) void xen_enable_syscall(void) { -#ifdef CONFIG_X86_64 int ret; ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); @@ -983,7 +951,6 @@ void xen_enable_syscall(void) if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } -#endif /* CONFIG_X86_64 */ } static void __init xen_pvmmu_arch_setup(void) @@ -1024,7 +991,6 @@ void __init xen_arch_setup(void) disable_cpuidle(); disable_cpufreq(); WARN_ON(xen_set_default_idle()); - fiddle_vdso(); #ifdef CONFIG_NUMA numa_off = 1; #endif diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 171aff1b11f2..9218aa6ab28e 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -212,15 +212,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); -#ifdef CONFIG_X86_32 - /* - * Xen starts us with XEN_FLAT_RING1_DS, but linux code - * expects __USER_DS - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); @@ -301,10 +292,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_rw(cpu); -#ifdef CONFIG_X86_32 - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; -#endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); /* @@ -342,12 +329,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = task_top_of_stack(idle); -#ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; -#else ctxt->gs_base_kernel = per_cpu_offset(cpu); -#endif ctxt->event_callback_eip = (unsigned long)xen_asm_exc_xen_hypervisor_callback; ctxt->failsafe_callback_eip = diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h deleted file mode 100644 index 873c54c488fe..000000000000 --- a/arch/x86/xen/vdso.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Bit used for the pseudo-hwcap for non-negative segments. We use - bit 1 to avoid bugs in some versions of glibc when bit 0 is - used; the choice is otherwise arbitrary. */ -#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 508fe204520b..c59d077510bf 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -76,11 +76,7 @@ SYM_FUNC_END(xen_save_fl_direct) */ SYM_FUNC_START(xen_restore_fl_direct) FRAME_BEGIN -#ifdef CONFIG_X86_64 testw $X86_EFLAGS_IF, %di -#else - testb $X86_EFLAGS_IF>>8, %ah -#endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* * Preempt here doesn't matter because that will deal with any @@ -104,15 +100,6 @@ SYM_FUNC_END(xen_restore_fl_direct) */ SYM_FUNC_START(check_events) FRAME_BEGIN -#ifdef CONFIG_X86_32 - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax -#else push %rax push %rcx push %rdx @@ -132,7 +119,6 @@ SYM_FUNC_START(check_events) pop %rdx pop %rcx pop %rax -#endif FRAME_END ret SYM_FUNC_END(check_events) diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S deleted file mode 100644 index 4757cec33abe..000000000000 --- a/arch/x86/xen/xen-asm_32.S +++ /dev/null @@ -1,185 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include -#include -#include -#include - -#include - -#include - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - * This is run where a normal iret would be run, with the same stack setup: - * 8: eflags - * 4: cs - * esp-> 0: eip - * - * This attempts to make sure that any pending events are dealt with - * on return to usermode, but there is a small window in which an - * event can happen just before entering usermode. If the nested - * interrupt ends up setting one of the TIF_WORK_MASK pending work - * flags, they will not be tested again before returning to - * usermode. This means that a process can end up with pending work, - * which will be unprocessed until the process enters and leaves the - * kernel again, which could be an unbounded amount of time. This - * means that a pending signal or reschedule event could be - * indefinitely delayed. - * - * The fix is to notice a nested interrupt in the critical window, and - * if one occurs, then fold the nested interrupt into the current - * interrupt stack frame, and re-process it iteratively rather than - * recursively. This means that it will exit via the normal path, and - * all pending work will be dealt with appropriately. - * - * Because the nested interrupt handler needs to deal with the current - * stack state in whatever form its in, we keep things simple by only - * using a single register which is pushed/popped on the stack. - */ - -.macro POP_FS -1: - popw %fs -.pushsection .fixup, "ax" -2: movw $0, (%esp) - jmp 1b -.popsection - _ASM_EXTABLE(1b,2b) -.endm - -SYM_CODE_START(xen_iret) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* Store vcpu_info pointer for easy access */ -#ifdef CONFIG_SMP - pushw %fs - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - movl %fs:xen_vcpu, %eax - POP_FS -#else - movl %ss:xen_vcpu, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* - * Maybe enable events. Once this happens we could get a - * recursive event, so the critical region starts immediately - * afterwards. However, if that happens we don't end up - * resuming the code, so we don't have to be worried about - * being preempted to another CPU. - */ - setz %ss:XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) - - /* - * If there's something pending, mask events again so we can - * jump back into exc_xen_hypervisor_callback. Otherwise do not - * touch XEN_vcpu_info_mask. - */ - jne 1f - movb $1, %ss:XEN_vcpu_info_mask(%eax) - -1: popl %eax - - /* - * From this point on the registers are restored and the stack - * updated, so we don't need to worry about it if we're - * preempted - */ -iret_restore_end: - - /* - * Jump to hypervisor_callback after fixing up the stack. - * Events are masked, so jumping out of the critical region is - * OK. - */ - je xen_asm_exc_xen_hypervisor_callback - -1: iret -xen_iret_end_crit: - _ASM_EXTABLE(1b, asm_iret_error) - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 -SYM_CODE_END(xen_iret) - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees - * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. - * - * The stack format at this point is: - * ---------------- - * ss : (ss/esp may be present if we came from usermode) - * esp : - * eflags } outer exception info - * cs } - * eip } - * ---------------- - * eax : outer eax if it hasn't been restored - * ---------------- - * eflags } - * cs } nested exception info - * eip } - * return address : (into xen_asm_exc_xen_hypervisor_callback) - * - * In order to deliver the nested exception properly, we need to discard the - * nested exception frame such that when we handle the exception, we do it - * in the context of the outer exception rather than starting a new one. - * - * The only caveat is that if the outer eax hasn't been restored yet (i.e. - * it's still on stack), we need to restore its value here. -*/ -.pushsection .noinstr.text, "ax" -SYM_CODE_START(xen_iret_crit_fixup) - /* - * Paranoia: Make sure we're really coming from kernel space. - * One could imagine a case where userspace jumps into the - * critical range address, but just before the CPU delivers a - * PF, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. - */ - testb $2, 2*4(%esp) /* nested CS */ - jnz 2f - - /* - * If eip is before iret_restore_end then stack - * hasn't been restored yet. - */ - cmpl $iret_restore_end, 1*4(%esp) - jae 1f - - movl 4*4(%esp), %eax /* load outer EAX */ - ret $4*4 /* discard nested EIP, CS, and EFLAGS as - * well as the just restored EAX */ - -1: - ret $3*4 /* discard nested EIP, CS, and EFLAGS */ - -2: - ret -SYM_CODE_END(xen_iret_crit_fixup) -.popsection diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1ba601df3a37..2d7c8f34f56c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info -#ifdef CONFIG_X86_64 mov initial_stack(%rip), %rsp -#else - mov initial_stack, %esp -#endif -#ifdef CONFIG_X86_64 /* Set up %gs. * * The base of %gs always points to fixed_percpu_data. If the @@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen) movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax cdq wrmsr -#endif call xen_start_kernel SYM_CODE_END(startup_xen) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 53b224fd6177..45d556f71858 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); -void xen_reserve_top(void); void __init xen_reserve_special_pages(void); void __init xen_pt_check_e820(void); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 727f11eb46b2..46e7fd099904 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -52,9 +52,7 @@ config XEN_BALLOON_MEMORY_HOTPLUG config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT int "Hotplugged memory limit (in GiB) for a PV guest" - default 512 if X86_64 - default 4 if X86_32 - range 0 64 if X86_32 + default 512 depends on XEN_HAVE_PVMMU depends on XEN_BALLOON_MEMORY_HOTPLUG help -- cgit From 56415c4c3da971abd37d0a08c5a570a510b75a7a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Jul 2020 09:16:19 +0200 Subject: x86/xen: eliminate xen-asm_64.S With 32-bit pv-guest support removed xen-asm_64.S can be merged with xen-asm.S Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/Makefile | 3 +- arch/x86/xen/xen-asm.S | 180 ++++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/xen-asm_64.S | 192 ---------------------------------------------- 3 files changed, 180 insertions(+), 195 deletions(-) delete mode 100644 arch/x86/xen/xen-asm_64.S diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 084de77a109e..5de137d536cc 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_xen-asm.o := y ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -34,7 +34,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o obj-$(CONFIG_XEN_PV) += irq.o obj-$(CONFIG_XEN_PV) += multicalls.o obj-$(CONFIG_XEN_PV) += xen-asm.o -obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index c59d077510bf..1cb0e84b9161 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -6,12 +6,18 @@ * operations here; the indirect forms are better handled in C. */ +#include #include #include #include -#include +#include +#include #include +#include + +#include +#include #include /* @@ -137,3 +143,175 @@ SYM_FUNC_START(xen_read_cr2_direct) FRAME_END ret SYM_FUNC_END(xen_read_cr2_direct); + +.macro xen_pv_trap name +SYM_CODE_START(xen_\name) + pop %rcx + pop %r11 + jmp \name +SYM_CODE_END(xen_\name) +_ASM_NOKPROBE(xen_\name) +.endm + +xen_pv_trap asm_exc_divide_error +xen_pv_trap asm_xenpv_exc_debug +xen_pv_trap asm_exc_int3 +xen_pv_trap asm_xenpv_exc_nmi +xen_pv_trap asm_exc_overflow +xen_pv_trap asm_exc_bounds +xen_pv_trap asm_exc_invalid_op +xen_pv_trap asm_exc_device_not_available +xen_pv_trap asm_exc_double_fault +xen_pv_trap asm_exc_coproc_segment_overrun +xen_pv_trap asm_exc_invalid_tss +xen_pv_trap asm_exc_segment_not_present +xen_pv_trap asm_exc_stack_segment +xen_pv_trap asm_exc_general_protection +xen_pv_trap asm_exc_page_fault +xen_pv_trap asm_exc_spurious_interrupt_bug +xen_pv_trap asm_exc_coprocessor_error +xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap asm_exc_machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap asm_exc_simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +xen_pv_trap asm_exc_xen_hypervisor_callback + + __INIT +SYM_CODE_START(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +SYM_CODE_END(xen_early_idt_handler_array) + __FINIT + +hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 +/* + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } + */ +SYM_CODE_START(xen_iret) + pushq $0 + jmp hypercall_iret +SYM_CODE_END(xen_iret) + +SYM_CODE_START(xen_sysret64) + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back. + * + * tss.sp2 is scratch space. + */ + movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + pushq $__USER_DS + pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + pushq %r11 + pushq $__USER_CS + pushq %rcx + + pushq $VGCF_in_syscall + jmp hypercall_iret +SYM_CODE_END(xen_sysret64) + +/* + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + */ + +/* Normal 64-bit system call target */ +SYM_FUNC_START(xen_syscall_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER_DS and __USER_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER_DS, 4*8(%rsp) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +SYM_FUNC_END(xen_syscall_target) + +#ifdef CONFIG_IA32_EMULATION + +/* 32-bit compat syscall target */ +SYM_FUNC_START(xen_syscall32_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +SYM_FUNC_END(xen_syscall32_target) + +/* 32-bit compat sysenter target */ +SYM_FUNC_START(xen_sysenter_target) + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. + */ + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +SYM_FUNC_END(xen_sysenter_target) + +#else /* !CONFIG_IA32_EMULATION */ + +SYM_FUNC_START_ALIAS(xen_syscall32_target) +SYM_FUNC_START(xen_sysenter_target) + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +SYM_FUNC_END(xen_sysenter_target) +SYM_FUNC_END_ALIAS(xen_syscall32_target) + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S deleted file mode 100644 index aab1d99b2b48..000000000000 --- a/arch/x86/xen/xen-asm_64.S +++ /dev/null @@ -1,192 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -.macro xen_pv_trap name -SYM_CODE_START(xen_\name) - pop %rcx - pop %r11 - jmp \name -SYM_CODE_END(xen_\name) -_ASM_NOKPROBE(xen_\name) -.endm - -xen_pv_trap asm_exc_divide_error -xen_pv_trap asm_xenpv_exc_debug -xen_pv_trap asm_exc_int3 -xen_pv_trap asm_xenpv_exc_nmi -xen_pv_trap asm_exc_overflow -xen_pv_trap asm_exc_bounds -xen_pv_trap asm_exc_invalid_op -xen_pv_trap asm_exc_device_not_available -xen_pv_trap asm_exc_double_fault -xen_pv_trap asm_exc_coproc_segment_overrun -xen_pv_trap asm_exc_invalid_tss -xen_pv_trap asm_exc_segment_not_present -xen_pv_trap asm_exc_stack_segment -xen_pv_trap asm_exc_general_protection -xen_pv_trap asm_exc_page_fault -xen_pv_trap asm_exc_spurious_interrupt_bug -xen_pv_trap asm_exc_coprocessor_error -xen_pv_trap asm_exc_alignment_check -#ifdef CONFIG_X86_MCE -xen_pv_trap asm_exc_machine_check -#endif /* CONFIG_X86_MCE */ -xen_pv_trap asm_exc_simd_coprocessor_error -#ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat -#endif -xen_pv_trap asm_exc_xen_hypervisor_callback - - __INIT -SYM_CODE_START(xen_early_idt_handler_array) - i = 0 - .rept NUM_EXCEPTION_VECTORS - pop %rcx - pop %r11 - jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - i = i + 1 - .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc - .endr -SYM_CODE_END(xen_early_idt_handler_array) - __FINIT - -hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 -/* - * Xen64 iret frame: - * - * ss - * rsp - * rflags - * cs - * rip <-- standard iret frame - * - * flags - * - * rcx } - * r11 }<-- pushed by hypercall page - * rsp->rax } - */ -SYM_CODE_START(xen_iret) - pushq $0 - jmp hypercall_iret -SYM_CODE_END(xen_iret) - -SYM_CODE_START(xen_sysret64) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back. - * - * tss.sp2 is scratch space. - */ - movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq $__USER_DS - pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - pushq %r11 - pushq $__USER_CS - pushq %rcx - - pushq $VGCF_in_syscall - jmp hypercall_iret -SYM_CODE_END(xen_sysret64) - -/* - * Xen handles syscall callbacks much like ordinary exceptions, which - * means we have: - * - kernel gs - * - kernel rsp - * - an iret-like stack frame on the stack (including rcx and r11): - * ss - * rsp - * rflags - * cs - * rip - * r11 - * rsp->rcx - */ - -/* Normal 64-bit system call target */ -SYM_FUNC_START(xen_syscall_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER_DS and __USER_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER_DS, 4*8(%rsp) - movq $__USER_CS, 1*8(%rsp) - - jmp entry_SYSCALL_64_after_hwframe -SYM_FUNC_END(xen_syscall_target) - -#ifdef CONFIG_IA32_EMULATION - -/* 32-bit compat syscall target */ -SYM_FUNC_START(xen_syscall32_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSCALL_compat_after_hwframe -SYM_FUNC_END(xen_syscall32_target) - -/* 32-bit compat sysenter target */ -SYM_FUNC_START(xen_sysenter_target) - /* - * NB: Xen is polite and clears TF from EFLAGS for us. This means - * that we don't need to guard against single step exceptions here. - */ - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSENTER_compat_after_hwframe -SYM_FUNC_END(xen_sysenter_target) - -#else /* !CONFIG_IA32_EMULATION */ - -SYM_FUNC_START_ALIAS(xen_syscall32_target) -SYM_FUNC_START(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx, %r11 */ - mov $-ENOSYS, %rax - pushq $0 - jmp hypercall_iret -SYM_FUNC_END(xen_sysenter_target) -SYM_FUNC_END_ALIAS(xen_syscall32_target) - -#endif /* CONFIG_IA32_EMULATION */ -- cgit From f2e39e8c4f0c13bc4b49f942be8fd5b4b0470412 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Jul 2020 09:28:15 +0200 Subject: x86/xen: drop tests for highmem in pv code With support for 32-bit pv guests gone pure pv-code no longer needs to test for highmem. Dropping those tests removes the need for flushing in some places. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 14 ++--- arch/x86/xen/mmu_pv.c | 138 +++++++++++++++++--------------------------- 2 files changed, 57 insertions(+), 95 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 2831f009ef78..37d6511daf54 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -345,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t *ptep; pte_t pte; unsigned long pfn; - struct page *page; unsigned char dummy; + void *va; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); - page = pfn_to_page(pfn); - pte = pfn_pte(pfn, prot); /* @@ -383,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); - if (!PageHighMem(page)) { - void *av = __va(PFN_PHYS(pfn)); + va = __va(PFN_PHYS(pfn)); - if (av != v) - if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) - BUG(); - } else - kmap_flush_unused(); + if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) + BUG(); preempt_enable(); } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index ff99e20a84e3..3273c985d3dd 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -537,25 +537,26 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d) PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ -static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; for (i = 0; i < nr; i++) { if (!pmd_none(pmd[i])) - flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE); + (*func)(mm, pmd_page(pmd[i]), PT_PTE); } - return flush; } -static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pud_walk(struct mm_struct *mm, pud_t *pud, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; for (i = 0; i < nr; i++) { @@ -566,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, pmd = pmd_offset(&pud[i], 0); if (PTRS_PER_PMD > 1) - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); - flush |= xen_pmd_walk(mm, pmd, func, - last && i == nr - 1, limit); + (*func)(mm, virt_to_page(pmd), PT_PMD); + xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit); } - return flush; } -static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int flush = 0; pud_t *pud; if (p4d_none(*p4d)) - return flush; + return; pud = pud_offset(p4d, 0); if (PTRS_PER_PUD > 1) - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - flush |= xen_pud_walk(mm, pud, func, last, limit); - return flush; + (*func)(mm, virt_to_page(pud), PT_PUD); + xen_pud_walk(mm, pud, func, last, limit); } /* @@ -603,12 +601,12 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, * We must skip the Xen hole in the middle of the address space, just after * the big x86-64 virtual hole. */ -static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - int i, nr, flush = 0; + int i, nr; unsigned hole_low = 0, hole_high = 0; /* The limit is the last byte to be touched */ @@ -633,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, continue; p4d = p4d_offset(&pgd[i], 0); - flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); + xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } /* Do the top level last, so that the callbacks can use it as a cue to do final things like tlb flushes. */ - flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); - - return flush; + (*func)(mm, virt_to_page(pgd), PT_PGD); } -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void xen_pgd_walk(struct mm_struct *mm, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - return __xen_pgd_walk(mm, mm->pgd, func, limit); + __xen_pgd_walk(mm, mm->pgd, func, limit); } /* If we're using split pte locks, then take the page's lock and @@ -681,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn) xen_extend_mmuext_op(&op); } -static int xen_pin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_pin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestSetPagePinned(page); - int flush; - - if (pgfl) - flush = 0; /* already pinned */ - else if (PageHighMem(page)) - /* kmaps need flushing if we found an unpinned - highpage */ - flush = 1; - else { + + if (!pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); struct multicall_space mcs = __xen_mc_entry(0); spinlock_t *ptl; - flush = 0; - /* * We need to hold the pagetable lock between the time * we make the pagetable RO and when we actually pin @@ -737,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return flush; } /* This is called just after a mm has been created, but it has not @@ -752,14 +737,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) xen_mc_batch(); - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for flushing */ - xen_mc_issue(0); - - kmap_flush_unused(); - - xen_mc_batch(); - } + __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT); xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); @@ -803,11 +781,10 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, + enum pt_level level) { SetPagePinned(page); - return 0; } /* @@ -823,12 +800,12 @@ static void __init xen_after_bootmem(void) xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } -static int xen_unpin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_unpin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestClearPagePinned(page); - if (pgfl && !PageHighMem(page)) { + if (pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); spinlock_t *ptl = NULL; @@ -859,8 +836,6 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return 0; /* never need to flush on unpin */ } /* Release a pagetables pages back as normal RW */ @@ -1554,20 +1529,14 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (static_branch_likely(&xen_struct_pages_ready)) SetPagePinned(page); - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - __set_pfn_prot(pfn, PAGE_KERNEL_RO); + __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } + xen_mc_issue(PARAVIRT_LAZY_MMU); } } @@ -1590,16 +1559,15 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) trace_xen_mmu_release_ptpage(pfn, level, pinned); if (pinned) { - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - __set_pfn_prot(pfn, PAGE_KERNEL); + __set_pfn_prot(pfn, PAGE_KERNEL); + + xen_mc_issue(PARAVIRT_LAZY_MMU); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } ClearPagePinned(page); } } -- cgit From 60e5da629a7c29e0987f6f02ec20b14c4ee0645e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 9 Aug 2020 07:30:06 +0200 Subject: sections.h: dereference_function_descriptor() returns void pointer The function dereference_function_descriptor() takes on hppa64, ppc64 and ia64 a pointer to a function descriptor and returns a (void) pointer to the dereferenced function. To make cross-arch coding easier, on all other architectures the dereference_function_descriptor() macro should return a void pointer too. Signed-off-by: Helge Deller --- include/asm-generic/sections.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 66397ed10acb..d16302d3eb59 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -60,8 +60,8 @@ extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifndef dereference_function_descriptor -#define dereference_function_descriptor(p) (p) -#define dereference_kernel_function_descriptor(p) (p) +#define dereference_function_descriptor(p) ((void *)(p)) +#define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif /* random extra sections (if any). Override -- cgit From 875102ea4b7700330a33c0db71555d91dafa9c82 Mon Sep 17 00:00:00 2001 From: Oscar Carter Date: Sat, 4 Jul 2020 17:50:57 +0200 Subject: parisc/kernel/ftrace: Remove function callback casts In an effort to enable -Wcast-function-type in the top-level Makefile to support Control Flow Integrity builds, remove all the function callback casts. Co-developed-by: Helge Deller Signed-off-by: Oscar Carter Signed-off-by: Helge Deller --- arch/parisc/kernel/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 1df0f67ed667..4bab21c71055 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -64,7 +64,8 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, function_trace_op, regs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || + if (dereference_function_descriptor(ftrace_graph_return) != + dereference_function_descriptor(ftrace_stub) || ftrace_graph_entry != ftrace_graph_entry_stub) { unsigned long *parent_rp; -- cgit From 881a3a11c2b858fe9b69ef79ac5ee9978a266dc9 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 3 Aug 2020 11:35:06 +0200 Subject: btrfs: fix return value mixup in btrfs_get_extent btrfs_get_extent() sets variable ret, but out: error path expect error to be in variable err so the error code is lost. Fixes: 6bf9e4bd6a27 ("btrfs: inode: Verify inode mode to avoid NULL pointer dereference") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Pavel Machek (CIP) Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9988d754e465..16ce82039dcf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6627,7 +6627,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_type == BTRFS_FILE_EXTENT_PREALLOC) { /* Only regular file could have regular/prealloc extent */ if (!S_ISREG(inode->vfs_inode.i_mode)) { - ret = -EUCLEAN; + err = -EUCLEAN; btrfs_crit(fs_info, "regular/prealloc extent found for non-regular inode %llu", btrfs_ino(inode)); -- cgit From b9d8cf2eb3ceecdee3434b87763492aee9e28845 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Aug 2020 18:29:51 -0700 Subject: x86/hyperv: Make hv_setup_sched_clock inline Make hv_setup_sched_clock inline so the reference to pv_ops works correctly with objtool updates to detect noinstr violations. See https://lore.kernel.org/patchwork/patch/1283635/ Signed-off-by: Michael Kelley Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/1597022991-24088-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/include/asm/mshyperv.h | 12 ++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 7 ------- include/asm-generic/mshyperv.h | 1 - 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 60b944dd2df1..4f77b8f22e54 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -8,6 +8,7 @@ #include #include #include +#include typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -54,6 +55,17 @@ typedef int (*hyperv_fill_flush_list_func)( vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ +#ifdef CONFIG_PARAVIRT + pv_ops.time.sched_clock = sched_clock; +#endif +} + void hyperv_vector_handler(struct pt_regs *regs); static inline void hv_enable_stimer0_percpu_irq(int irq) {} diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index af94f05a5c66..31125448b174 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -361,13 +361,6 @@ static void __init ms_hyperv_init_platform(void) #endif } -void hv_setup_sched_clock(void *sched_clock) -{ -#ifdef CONFIG_PARAVIRT - pv_ops.time.sched_clock = sched_clock; -#endif -} - const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 1c4fd950f091..c5edc5e08b94 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -168,7 +168,6 @@ void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); void hyperv_cleanup(void); -void hv_setup_sched_clock(void *sched_clock); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } -- cgit From e792415c5d3e0eb52527cce228a72e4392f8cae2 Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Tue, 11 Aug 2020 07:18:37 -0400 Subject: KVM: MIPS/VZ: Fix build error caused by 'kvm_run' cleanup Commit c34b26b98caca48ec9ee9 ("KVM: MIPS: clean up redundant 'kvm_run' parameters") remove the 'kvm_run' parameter in kvm_vz_gpsi_lwc2. The following build error: arch/mips/kvm/vz.c: In function 'kvm_trap_vz_handle_gpsi': arch/mips/kvm/vz.c:1243:43: error: 'run' undeclared (first use in this function) er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); ^~~ arch/mips/kvm/vz.c:1243:43: note: each undeclared identifier is reported only once for each function it appears in scripts/Makefile.build:283: recipe for target 'arch/mips/kvm/vz.o' failed make[2]: *** [arch/mips/kvm/vz.o] Error 1 scripts/Makefile.build:500: recipe for target 'arch/mips/kvm' failed make[1]: *** [arch/mips/kvm] Error 2 Makefile:1785: recipe for target 'arch/mips' failed make: *** [arch/mips] Error 2 Signed-off-by: Xingxing Su Signed-off-by: Paolo Bonzini --- arch/mips/kvm/vz.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index a474578ccf68..c299e5d6d69c 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -1144,7 +1144,6 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, #ifdef CONFIG_CPU_LOONGSON64 static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, u32 *opc, u32 cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) { unsigned int rs, rd; @@ -1242,7 +1241,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, #endif #ifdef CONFIG_CPU_LOONGSON64 case lwc2_op: - er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); + er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu); break; #endif case spec3_op: -- cgit From 85cb905d3c5ff6107f4948314731c1b55b689cfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:39:58 +0200 Subject: ALSA: echoaduio: Drop superfluous volatile modifier The dsp_registers field of struct echoaduio has the volatile modifier, but it's basically superfluous; the field is accessed only for the base pointer of readl() and writel(), hence marking with __iomem alone should suffice. OTOH, having the volatile prefix causes a compile warning like: sound/pci/echoaudio/echoaudio.c:1878:14: warning: passing argument 1 of 'iounmap' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers] So it's better to drop this superfluous modifier. Link: https://lore.kernel.org/r/20200803143958.24324-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 3 +-- sound/pci/echoaudio/echoaudio.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 6aeb99aa2414..1b7a67ccabce 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1950,8 +1950,7 @@ static int snd_echo_create(struct snd_card *card, snd_echo_free(chip); return -EBUSY; } - chip->dsp_registers = (volatile u32 __iomem *) - ioremap(chip->dsp_registers_phys, sz); + chip->dsp_registers = ioremap(chip->dsp_registers_phys, sz); if (!chip->dsp_registers) { dev_err(chip->card->dev, "ioremap failed\n"); snd_echo_free(chip); diff --git a/sound/pci/echoaudio/echoaudio.h b/sound/pci/echoaudio/echoaudio.h index 30c640931f1e..0afe13f7b6e5 100644 --- a/sound/pci/echoaudio/echoaudio.h +++ b/sound/pci/echoaudio/echoaudio.h @@ -419,7 +419,7 @@ struct echoaudio { short asic_code; /* Current ASIC code */ u32 comm_page_phys; /* Physical address of the * memory seen by DSP */ - volatile u32 __iomem *dsp_registers; /* DSP's register base */ + u32 __iomem *dsp_registers; /* DSP's register base */ u32 active_mask; /* Chs. active mask or * punks out */ #ifdef CONFIG_PM_SLEEP -- cgit From 404690649e6a52ee39817168f2d984726412e091 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 11 Aug 2020 20:24:30 +0800 Subject: ALSA: hda - reverse the setting value in the micmute_led_set Before the micmute_led_set() is introduced, the function of alc_gpio_micmute_update() will set the gpio value with the !micmute_led.led_value, and the machines have the correct micmute led status. After the micmute_led_set() is introduced, it sets the gpio value with !!micmute_led.led_value, so the led status is not correct anymore, we need to set micmute_led_polarity = 1 to workaround it. Now we fix the micmute_led_set() and remove micmute_led_polarity = 1. Fixes: 87dc36482cab ("ALSA: hda/realtek - Add LED class support for micmute LED") Reported-and-suggested-by: Kai-Heng Feng Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200811122430.6546-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 09d93dd88713..073029aeaf3c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4125,7 +4125,7 @@ static int micmute_led_set(struct led_classdev *led_cdev, struct alc_spec *spec = codec->spec; alc_update_gpio_led(codec, spec->gpio_mic_led_mask, - spec->micmute_led_polarity, !!brightness); + spec->micmute_led_polarity, !brightness); return 0; } @@ -4162,8 +4162,6 @@ static void alc285_fixup_hp_gpio_led(struct hda_codec *codec, { struct alc_spec *spec = codec->spec; - spec->micmute_led_polarity = 1; - alc_fixup_hp_gpio_led(codec, action, 0x04, 0x01); } @@ -4414,7 +4412,6 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, { struct alc_spec *spec = codec->spec; - spec->micmute_led_polarity = 1; alc_fixup_hp_gpio_led(codec, action, 0, 0x04); if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->init_amp = ALC_INIT_DEFAULT; -- cgit From fe6a8fc5ed2f0081f17375ae2005718522c392c6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 10 Aug 2020 19:16:32 +0200 Subject: loop: unset GENHD_FL_NO_PART_SCAN on LOOP_CONFIGURE When LOOP_CONFIGURE is used with LO_FLAGS_PARTSCAN we need to propagate this into the GENHD_FL_NO_PART_SCAN. LOOP_SETSTATUS does this, LOOP_CONFIGURE doesn't so far. Effect is that setting up a loopback device with partition scanning doesn't actually work when LOOP_CONFIGURE is issued, though it works fine with LOOP_SETSTATUS. Let's correct that and propagate the flag in LOOP_CONFIGURE too. Fixes: 3448914e8cc5("loop: Add LOOP_CONFIGURE ioctl") Signed-off-by: Lennart Poettering Acked-by: Martijn Coenen Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d18160146226..2f137d6ce169 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1171,6 +1171,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; + if (partscan) + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). -- cgit From efa8480a831673bb52400df9dbe5da0aacda97bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 18:44:24 -0600 Subject: fs: RWF_NOWAIT should imply IOCB_NOIO With the change allowing read-ahead for IOCB_NOWAIT, we changed the RWF_NOWAIT semantics of only doing cached reads. Since we know have IOCB_NOIO to manage that specific side of it, just make RWF_NOWAIT imply IOCB_NOIO as well to restore the previous behavior. Fixes: 2e85abf053b9 ("mm: allow read-ahead with IOCB_NOWAIT set") Reported-by: Dave Chinner Reviewed-by: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index bd7ec3eaeed0..f1cca4bfdd7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3293,7 +3293,7 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; } if (flags & RWF_HIPRI) kiocb_flags |= IOCB_HIPRI; -- cgit From 6d816e088c359866f9867057e04f244c608c42fe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Aug 2020 08:04:14 -0600 Subject: io_uring: hold 'ctx' reference around task_work queue + execute We're holding the request reference, but we need to go one higher to ensure that the ctx remains valid after the request has finished. If the ring is closed with pending task_work inflight, and the given io_kiocb finishes sync during issue, then we need a reference to the ring itself around the task_work execution cycle. Cc: stable@vger.kernel.org # v5.7+ Reported-by: syzbot+9b260fc33297966f5a8e@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5488698189da..99582cf5106b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1821,8 +1821,10 @@ static void __io_req_task_submit(struct io_kiocb *req) static void io_req_task_submit(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct io_ring_ctx *ctx = req->ctx; __io_req_task_submit(req); + percpu_ref_put(&ctx->refs); } static void io_req_task_queue(struct io_kiocb *req) @@ -1830,6 +1832,7 @@ static void io_req_task_queue(struct io_kiocb *req) int ret; init_task_work(&req->task_work, io_req_task_submit); + percpu_ref_get(&req->ctx->refs); ret = io_req_task_work_add(req, &req->task_work); if (unlikely(ret)) { @@ -2318,6 +2321,8 @@ static void io_rw_resubmit(struct callback_head *cb) refcount_inc(&req->refs); io_queue_async_work(req); } + + percpu_ref_put(&ctx->refs); } #endif @@ -2330,6 +2335,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) return false; init_task_work(&req->task_work, io_rw_resubmit); + percpu_ref_get(&req->ctx->refs); + ret = io_req_task_work_add(req, &req->task_work); if (!ret) return true; @@ -3033,6 +3040,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, list_del_init(&wait->entry); init_task_work(&req->task_work, io_req_task_submit); + percpu_ref_get(&req->ctx->refs); + /* submit ref gets dropped, acquire a new one */ refcount_inc(&req->refs); ret = io_req_task_work_add(req, &req->task_work); @@ -4565,6 +4574,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, req->result = mask; init_task_work(&req->task_work, func); + percpu_ref_get(&req->ctx->refs); + /* * If this fails, then the task is exiting. When a task exits, the * work gets canceled, so just cancel this request as well instead @@ -4652,11 +4663,13 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) static void io_poll_task_func(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *nxt = NULL; io_poll_task_handler(req, &nxt); if (nxt) __io_req_task_submit(nxt); + percpu_ref_put(&ctx->refs); } static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, @@ -4752,6 +4765,7 @@ static void io_async_task_func(struct callback_head *cb) if (io_poll_rewait(req, &apoll->poll)) { spin_unlock_irq(&ctx->completion_lock); + percpu_ref_put(&ctx->refs); return; } @@ -4767,6 +4781,7 @@ static void io_async_task_func(struct callback_head *cb) else __io_req_task_cancel(req, -ECANCELED); + percpu_ref_put(&ctx->refs); kfree(apoll->double_poll); kfree(apoll); } -- cgit From f6ebbcf08f37b01827c51309a188e85165e498e7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Aug 2020 14:03:55 +0200 Subject: cpufreq: intel_pstate: Implement passive mode with HWP enabled Allow intel_pstate to work in the passive mode with HWP enabled and make it set the HWP minimum performance limit (HWP floor) to the P-state value given by the target frequency supplied by the cpufreq governor, so as to prevent the HWP algorithm and the CPU scheduler from working against each other, at least when the schedutil governor is in use, and update the intel_pstate documentation accordingly. Among other things, this allows utilization clamps to be taken into account, at least to a certain extent, when intel_pstate is in use and makes it more likely that sufficient capacity for deadline tasks will be provided. After this change, the resulting behavior of an HWP system with intel_pstate in the passive mode should be close to the behavior of the analogous non-HWP system with intel_pstate in the passive mode, except that the HWP algorithm is generally allowed to make the CPU run at a frequency above the floor P-state set by intel_pstate in the entire available range of P-states, while without HWP a CPU can run in a P-state above the requested one if the latter falls into the range of turbo P-states (referred to as the turbo range) or if the P-states of all CPUs in one package are coordinated with each other at the hardware level. [Note that in principle the HWP floor may not be taken into account by the processor if it falls into the turbo range, in which case the processor has a license to choose any P-state, either below or above the HWP floor, just like a non-HWP processor in the case when the target P-state falls into the turbo range.] With this change applied, intel_pstate in the passive mode assumes complete control over the HWP request MSR and concurrent changes of that MSR (eg. via the direct MSR access interface) are overridden by it. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada Reviewed-by: Francisco Jerez --- Documentation/admin-guide/pm/intel_pstate.rst | 89 +++++----- drivers/cpufreq/cpufreq.c | 6 +- drivers/cpufreq/intel_pstate.c | 245 +++++++++++++++++++------- include/linux/cpufreq.h | 2 + 4 files changed, 229 insertions(+), 113 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 40d481cca368..f85767e09911 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -54,10 +54,13 @@ registered (see `below `_). Operation Modes =============== -``intel_pstate`` can operate in three different modes: in the active mode with -or without hardware-managed P-states support and in the passive mode. Which of -them will be in effect depends on what kernel command line options are used and -on the capabilities of the processor. +``intel_pstate`` can operate in two different modes, active or passive. In the +active mode, it uses its own internal performance scaling governor algorithm or +allows the hardware to do preformance scaling by itself, while in the passive +mode it responds to requests made by a generic ``CPUFreq`` governor implementing +a certain performance scaling algorithm. Which of them will be in effect +depends on what kernel command line options are used and on the capabilities of +the processor. Active Mode ----------- @@ -194,10 +197,11 @@ This is the default operation mode of ``intel_pstate`` for processors without hardware-managed P-states (HWP) support. It is always used if the ``intel_pstate=passive`` argument is passed to the kernel in the command line regardless of whether or not the given processor supports HWP. [Note that the -``intel_pstate=no_hwp`` setting implies ``intel_pstate=passive`` if it is used -without ``intel_pstate=active``.] Like in the active mode without HWP support, -in this mode ``intel_pstate`` may refuse to work with processors that are not -recognized by it. +``intel_pstate=no_hwp`` setting causes the driver to start in the passive mode +if it is not combined with ``intel_pstate=active``.] Like in the active mode +without HWP support, in this mode ``intel_pstate`` may refuse to work with +processors that are not recognized by it if HWP is prevented from being enabled +through the kernel command line. If the driver works in this mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq". @@ -318,10 +322,9 @@ manuals need to be consulted to get to it too. For this reason, there is a list of supported processors in ``intel_pstate`` and the driver initialization will fail if the detected processor is not in that -list, unless it supports the `HWP feature `_. [The interface to -obtain all of the information listed above is the same for all of the processors -supporting the HWP feature, which is why they all are supported by -``intel_pstate``.] +list, unless it supports the HWP feature. [The interface to obtain all of the +information listed above is the same for all of the processors supporting the +HWP feature, which is why ``intel_pstate`` works with all of them.] User Space Interface in ``sysfs`` @@ -425,22 +428,16 @@ argument is passed to the kernel in the command line. as well as the per-policy ones) are then reset to their default values, possibly depending on the target operation mode.] - That only is supported in some configurations, though (for example, if - the `HWP feature is enabled in the processor `_, - the operation mode of the driver cannot be changed), and if it is not - supported in the current configuration, writes to this attribute will - fail with an appropriate error. - ``energy_efficiency`` - This attribute is only present on platforms, which have CPUs matching - Kaby Lake or Coffee Lake desktop CPU model. By default - energy efficiency optimizations are disabled on these CPU models in HWP - mode by this driver. Enabling energy efficiency may limit maximum - operating frequency in both HWP and non HWP mode. In non HWP mode, - optimizations are done only in the turbo frequency range. In HWP mode, - optimizations are done in the entire frequency range. Setting this - attribute to "1" enables energy efficiency optimizations and setting - to "0" disables energy efficiency optimizations. + This attribute is only present on platforms with CPUs matching the Kaby + Lake or Coffee Lake desktop CPU model. By default, energy-efficiency + optimizations are disabled on these CPU models if HWP is enabled. + Enabling energy-efficiency optimizations may limit maximum operating + frequency with or without the HWP feature. With HWP enabled, the + optimizations are done only in the turbo frequency range. Without it, + they are done in the entire available frequency range. Setting this + attribute to "1" enables the energy-efficiency optimizations and setting + to "0" disables them. Interpretation of Policy Attributes ----------------------------------- @@ -484,8 +481,8 @@ Next, the following policy attributes have special meaning if policy for the time interval between the last two invocations of the driver's utilization update callback by the CPU scheduler for that CPU. -One more policy attribute is present if the `HWP feature is enabled in the -processor `_: +One more policy attribute is present if the HWP feature is enabled in the +processor: ``base_frequency`` Shows the base frequency of the CPU. Any frequency above this will be @@ -526,11 +523,11 @@ on the following rules, regardless of the current operation mode of the driver: 3. The global and per-policy limits can be set independently. -If the `HWP feature is enabled in the processor `_, the -resulting effective values are written into its registers whenever the limits -change in order to request its internal P-state selection logic to always set -P-states within these limits. Otherwise, the limits are taken into account by -scaling governors (in the `passive mode `_) and by the driver +In the `active mode with the HWP feature enabled `_, the +resulting effective values are written into hardware registers whenever the +limits change in order to request its internal P-state selection logic to always +set P-states within these limits. Otherwise, the limits are taken into account +by scaling governors (in the `passive mode `_) and by the driver every time before setting a new P-state for a CPU. Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument @@ -541,12 +538,11 @@ at all and the only way to set the limits is by using the policy attributes. Energy vs Performance Hints --------------------------- -If ``intel_pstate`` works in the `active mode with the HWP feature enabled -`_ in the processor, additional attributes are present -in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow -user space to help ``intel_pstate`` to adjust the processor's internal P-state -selection logic by focusing it on performance or on energy-efficiency, or -somewhere between the two extremes: +If the hardware-managed P-states (HWP) is enabled in the processor, additional +attributes, intended to allow user space to help ``intel_pstate`` to adjust the +processor's internal P-state selection logic by focusing it on performance or on +energy-efficiency, or somewhere between the two extremes, are present in every +``CPUFreq`` policy directory in ``sysfs``. They are : ``energy_performance_preference`` Current value of the energy vs performance hint for the given policy @@ -650,12 +646,14 @@ of them have to be prepended with the ``intel_pstate=`` prefix. Do not register ``intel_pstate`` as the scaling driver even if the processor is supported by it. +``active`` + Register ``intel_pstate`` in the `active mode `_ to start + with. + ``passive`` Register ``intel_pstate`` in the `passive mode `_ to start with. - This option implies the ``no_hwp`` one described below. - ``force`` Register ``intel_pstate`` as the scaling driver instead of ``acpi-cpufreq`` even if the latter is preferred on the given system. @@ -670,13 +668,12 @@ of them have to be prepended with the ``intel_pstate=`` prefix. driver is used instead of ``acpi-cpufreq``. ``no_hwp`` - Do not enable the `hardware-managed P-states (HWP) feature - `_ even if it is supported by the processor. + Do not enable the hardware-managed P-states (HWP) feature even if it is + supported by the processor. ``hwp_only`` Register ``intel_pstate`` as the scaling driver only if the - `hardware-managed P-states (HWP) feature `_ is - supported by the processor. + hardware-managed P-states (HWP) feature is supported by the processor. ``support_acpi_ppc`` Take ACPI ``_PPC`` performance limits into account. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index afad06b91c77..02ab56b2a0d8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -73,8 +73,6 @@ static inline bool has_target(void) static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_init_governor(struct cpufreq_policy *policy); static void cpufreq_exit_governor(struct cpufreq_policy *policy); -static int cpufreq_start_governor(struct cpufreq_policy *policy); -static void cpufreq_stop_governor(struct cpufreq_policy *policy); static void cpufreq_governor_limits(struct cpufreq_policy *policy); static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, @@ -2266,7 +2264,7 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy) module_put(policy->governor->owner); } -static int cpufreq_start_governor(struct cpufreq_policy *policy) +int cpufreq_start_governor(struct cpufreq_policy *policy) { int ret; @@ -2293,7 +2291,7 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) return 0; } -static void cpufreq_stop_governor(struct cpufreq_policy *policy) +void cpufreq_stop_governor(struct cpufreq_policy *policy) { if (cpufreq_suspended || !policy->governor) return; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fc459c9c00ff..e0220a6fbc69 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -36,6 +36,7 @@ #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 +#define INTEL_CPUFREQ_TRANSITION_DELAY_HWP 5000 #define INTEL_CPUFREQ_TRANSITION_DELAY 500 #ifdef CONFIG_ACPI @@ -220,6 +221,7 @@ struct global_params { * preference/bias * @epp_saved: Saved EPP/EPB during system suspend or CPU offline * operation + * @epp_cached Cached HWP energy-performance preference value * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set @@ -257,6 +259,7 @@ struct cpudata { s16 epp_policy; s16 epp_default; s16 epp_saved; + s16 epp_cached; u64 hwp_req_cached; u64 hwp_cap_cached; u64 last_io_update; @@ -639,6 +642,26 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw return index; } +static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp) +{ + /* + * Use the cached HWP Request MSR value, because in the active mode the + * register itself may be updated by intel_pstate_hwp_boost_up() or + * intel_pstate_hwp_boost_down() at any time. + */ + u64 value = READ_ONCE(cpu->hwp_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + /* + * The only other updater of hwp_req_cached in the active mode, + * intel_pstate_hwp_set(), is called under the same lock as this + * function, so it cannot run in parallel with the update below. + */ + WRITE_ONCE(cpu->hwp_req_cached, value); + return wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); +} + static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, int pref_index, bool use_raw, u32 raw_epp) @@ -650,28 +673,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, epp = cpu_data->epp_default; if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { - /* - * Use the cached HWP Request MSR value, because the register - * itself may be updated by intel_pstate_hwp_boost_up() or - * intel_pstate_hwp_boost_down() at any time. - */ - u64 value = READ_ONCE(cpu_data->hwp_req_cached); - - value &= ~GENMASK_ULL(31, 24); - if (use_raw) epp = raw_epp; else if (epp == -EINVAL) epp = epp_values[pref_index - 1]; - value |= (u64)epp << 24; - /* - * The only other updater of hwp_req_cached in the active mode, - * intel_pstate_hwp_set(), is called under the same lock as this - * function, so it cannot run in parallel with the update below. - */ - WRITE_ONCE(cpu_data->hwp_req_cached, value); - ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); + ret = intel_pstate_set_epp(cpu_data, epp); } else { if (epp == -EINVAL) epp = (pref_index - 1) << 2; @@ -697,10 +704,12 @@ static ssize_t show_energy_performance_available_preferences( cpufreq_freq_attr_ro(energy_performance_available_preferences); +static struct cpufreq_driver intel_pstate; + static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cpudata *cpu = all_cpu_data[policy->cpu]; char str_preference[21]; bool raw = false; ssize_t ret; @@ -725,15 +734,44 @@ static ssize_t store_energy_performance_preference( raw = true; } + /* + * This function runs with the policy R/W semaphore held, which + * guarantees that the driver pointer will not change while it is + * running. + */ + if (!intel_pstate_driver) + return -EAGAIN; + mutex_lock(&intel_pstate_limits_lock); - ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp); - if (!ret) - ret = count; + if (intel_pstate_driver == &intel_pstate) { + ret = intel_pstate_set_energy_pref_index(cpu, ret, raw, epp); + } else { + /* + * In the passive mode the governor needs to be stopped on the + * target CPU before the EPP update and restarted after it, + * which is super-heavy-weight, so make sure it is worth doing + * upfront. + */ + if (!raw) + epp = ret ? epp_values[ret - 1] : cpu->epp_default; + + if (cpu->epp_cached != epp) { + int err; + + cpufreq_stop_governor(policy); + ret = intel_pstate_set_epp(cpu, epp); + err = cpufreq_start_governor(policy); + if (!ret) { + cpu->epp_cached = epp; + ret = err; + } + } + } mutex_unlock(&intel_pstate_limits_lock); - return ret; + return ret ?: count; } static ssize_t show_energy_performance_preference( @@ -1145,8 +1183,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, return count; } -static struct cpufreq_driver intel_pstate; - static void update_qos_request(enum freq_qos_req_type type) { int max_state, turbo_max, freq, i, perf_pct; @@ -1330,9 +1366,10 @@ static const struct attribute_group intel_pstate_attr_group = { static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[]; +static struct kobject *intel_pstate_kobject; + static void __init intel_pstate_sysfs_expose_params(void) { - struct kobject *intel_pstate_kobject; int rc; intel_pstate_kobject = kobject_create_and_add("intel_pstate", @@ -1357,17 +1394,31 @@ static void __init intel_pstate_sysfs_expose_params(void) rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); WARN_ON(rc); - if (hwp_active) { - rc = sysfs_create_file(intel_pstate_kobject, - &hwp_dynamic_boost.attr); - WARN_ON(rc); - } - if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) { rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr); WARN_ON(rc); } } + +static void intel_pstate_sysfs_expose_hwp_dynamic_boost(void) +{ + int rc; + + if (!hwp_active) + return; + + rc = sysfs_create_file(intel_pstate_kobject, &hwp_dynamic_boost.attr); + WARN_ON_ONCE(rc); +} + +static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void) +{ + if (!hwp_active) + return; + + sysfs_remove_file(intel_pstate_kobject, &hwp_dynamic_boost.attr); +} + /************************** sysfs end ************************/ static void intel_pstate_hwp_enable(struct cpudata *cpudata) @@ -2247,7 +2298,10 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy) static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy) { - intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); + if (hwp_active) + intel_pstate_hwp_force_min_perf(policy->cpu); + else + intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); } static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) @@ -2255,12 +2309,10 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("CPU %d exiting\n", policy->cpu); intel_pstate_clear_update_util_hook(policy->cpu); - if (hwp_active) { + if (hwp_active) intel_pstate_hwp_save_state(policy); - intel_pstate_hwp_force_min_perf(policy->cpu); - } else { - intel_cpufreq_stop_cpu(policy); - } + + intel_cpufreq_stop_cpu(policy); } static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) @@ -2390,13 +2442,71 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in fp_toint(cpu->iowait_boost * 100)); } +static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate, + bool fast_switch) +{ + u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev; + + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(target_pstate); + + /* + * The entire MSR needs to be updated in order to update the HWP min + * field in it, so opportunistically update the max too if needed. + */ + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(cpu->max_perf_ratio); + + if (value == prev) + return; + + WRITE_ONCE(cpu->hwp_req_cached, value); + if (fast_switch) + wrmsrl(MSR_HWP_REQUEST, value); + else + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); +} + +static void intel_cpufreq_adjust_perf_ctl(struct cpudata *cpu, + u32 target_pstate, bool fast_switch) +{ + if (fast_switch) + wrmsrl(MSR_IA32_PERF_CTL, + pstate_funcs.get_val(cpu, target_pstate)); + else + wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, + pstate_funcs.get_val(cpu, target_pstate)); +} + +static int intel_cpufreq_update_pstate(struct cpudata *cpu, int target_pstate, + bool fast_switch) +{ + int old_pstate = cpu->pstate.current_pstate; + + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + if (target_pstate != old_pstate) { + cpu->pstate.current_pstate = target_pstate; + if (hwp_active) + intel_cpufreq_adjust_hwp(cpu, target_pstate, + fast_switch); + else + intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, + fast_switch); + } + + intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH : + INTEL_PSTATE_TRACE_TARGET, old_pstate); + + return target_pstate; +} + static int intel_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpudata *cpu = all_cpu_data[policy->cpu]; struct cpufreq_freqs freqs; - int target_pstate, old_pstate; + int target_pstate; update_turbo_state(); @@ -2404,6 +2514,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, freqs.new = target_freq; cpufreq_freq_transition_begin(policy, &freqs); + switch (relation) { case CPUFREQ_RELATION_L: target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); @@ -2415,15 +2526,11 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); break; } - target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - old_pstate = cpu->pstate.current_pstate; - if (target_pstate != cpu->pstate.current_pstate) { - cpu->pstate.current_pstate = target_pstate; - wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, - pstate_funcs.get_val(cpu, target_pstate)); - } + + target_pstate = intel_cpufreq_update_pstate(cpu, target_pstate, false); + freqs.new = target_pstate * cpu->pstate.scaling; - intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate); + cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -2433,15 +2540,14 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - int target_pstate, old_pstate; + int target_pstate; update_turbo_state(); target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); - target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - old_pstate = cpu->pstate.current_pstate; - intel_pstate_update_pstate(cpu, target_pstate); - intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); + + target_pstate = intel_cpufreq_update_pstate(cpu, target_pstate, true); + return target_pstate * cpu->pstate.scaling; } @@ -2461,7 +2567,6 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; - policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; /* This reflects the intel_pstate_get_cpu_pstates() setting. */ policy->cur = policy->cpuinfo.min_freq; @@ -2473,10 +2578,18 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - if (hwp_active) + if (hwp_active) { + u64 value; + intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state); - else + policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); + WRITE_ONCE(cpu->hwp_req_cached, value); + cpu->epp_cached = (value & GENMASK_ULL(31, 24)) >> 24; + } else { turbo_max = cpu->pstate.turbo_pstate; + policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; + } min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); min_freq *= cpu->pstate.scaling; @@ -2553,6 +2666,10 @@ static void intel_pstate_driver_cleanup(void) } } put_online_cpus(); + + if (intel_pstate_driver == &intel_pstate) + intel_pstate_sysfs_hide_hwp_dynamic_boost(); + intel_pstate_driver = NULL; } @@ -2560,6 +2677,9 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) { int ret; + if (driver == &intel_pstate) + intel_pstate_sysfs_expose_hwp_dynamic_boost(); + memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; @@ -2577,9 +2697,6 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) static int intel_pstate_unregister_driver(void) { - if (hwp_active) - return -EBUSY; - cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); @@ -2835,7 +2952,10 @@ static int __init intel_pstate_init(void) hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; - default_driver = &intel_pstate; + intel_cpufreq.attr = hwp_cpufreq_attrs; + if (!default_driver) + default_driver = &intel_pstate; + goto hwp_cpu_matched; } } else { @@ -2906,14 +3026,13 @@ static int __init intel_pstate_setup(char *str) if (!str) return -EINVAL; - if (!strcmp(str, "disable")) { + if (!strcmp(str, "disable")) no_load = 1; - } else if (!strcmp(str, "active")) { + else if (!strcmp(str, "active")) default_driver = &intel_pstate; - } else if (!strcmp(str, "passive")) { + else if (!strcmp(str, "passive")) default_driver = &intel_cpufreq; - no_hwp = 1; - } + if (!strcmp(str, "no_hwp")) { pr_info("HWP disabled\n"); no_hwp = 1; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 58687a5bf9c8..8f141d4c859c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -576,6 +576,8 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +int cpufreq_start_governor(struct cpufreq_policy *policy); +void cpufreq_stop_governor(struct cpufreq_policy *policy); #define cpufreq_governor_init(__governor) \ static int __init __governor##_init(void) \ -- cgit From 3bc6e3dc5a54d5842938c6f1ed78dd1add379af7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 14 Jun 2020 10:50:42 +0200 Subject: parisc: Whitespace cleanups in atomic.h Fix whitespace indenting and drop trailing backslashes. Cc: # 4.19+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/atomic.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 6dd4171c9530..90e8267fc509 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -34,13 +34,13 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; /* Can't use raw_spin_lock_irq because of #include problems, so * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ arch_spin_unlock(s); \ local_irq_restore(f); \ } while(0) @@ -85,7 +85,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC_OP_RETURN(op, c_op) \ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ @@ -150,7 +150,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC64_OP_RETURN(op, c_op) \ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ -- cgit From f0cbd3b83ed47803df941865f720934c69abb803 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 30 Jul 2020 16:02:28 +0200 Subject: s390/atomic: circumvent gcc 10 build regression Circumvent the following gcc 10 allyesconfig build regression: CC drivers/leds/trigger/ledtrig-cpu.o In file included from ./arch/s390/include/asm/bitops.h:39, from ./include/linux/bitops.h:29, from ./include/linux/kernel.h:12, from drivers/leds/trigger/ledtrig-cpu.c:18: ./arch/s390/include/asm/atomic_ops.h: In function 'ledtrig_cpu': ./arch/s390/include/asm/atomic_ops.h:46:2: warning: 'asm' operand 1 probably does not match constraints 46 | asm volatile( \ | ^~~ ./arch/s390/include/asm/atomic_ops.h:53:2: note: in expansion of macro '__ATOMIC_CONST_OP' 53 | __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ | ^~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:56:1: note: in expansion of macro '__ATOMIC_CONST_OPS' 56 | __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") | ^~~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:46:2: error: impossible constraint in 'asm' 46 | asm volatile( \ | ^~~ ./arch/s390/include/asm/atomic_ops.h:53:2: note: in expansion of macro '__ATOMIC_CONST_OP' 53 | __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ | ^~~~~~~~~~~~~~~~~ ./arch/s390/include/asm/atomic_ops.h:56:1: note: in expansion of macro '__ATOMIC_CONST_OPS' 56 | __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") | ^~~~~~~~~~~~~~~~~~ scripts/Makefile.build:280: recipe for target 'drivers/leds/trigger/ledtrig-cpu.o' failed By swapping conditions as proposed here: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html Suggested-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/atomic.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index cae473a7b6f7..11c5952e1afa 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -45,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic_add_const(i, &v->counter); return; } @@ -112,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic64_add_const(i, (long *)&v->counter); return; } -- cgit From ba925fa35057a062ac98c3e8138b013ce4ce351c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 29 Jul 2020 22:22:34 +0200 Subject: s390/gmap: improve THP splitting During s390_enable_sie(), we need to take care of splitting all qemu user process THP mappings. This is currently done with follow_page(FOLL_SPLIT), by simply iterating over all vma ranges, with PAGE_SIZE increment. This logic is sub-optimal and can result in a lot of unnecessary overhead, especially when using qemu and ASAN with large shadow map. Ilya reported significant system slow-down with one CPU busy for a long time and overall unresponsiveness. Fix this by using walk_page_vma() and directly calling split_huge_pmd() only for present pmds, which greatly reduces overhead. Cc: # v5.4+ Reported-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Acked-by: Christian Borntraeger Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..46c1bf2a3b4b 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting -- cgit From 75d3e7f4769d276a056efa1cc7f08de571fc9b4b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 30 Jul 2020 14:36:02 +0800 Subject: s390/test_unwind: fix possible memleak in test_unwind() test_unwind() misses to call kfree(bt) in an error path. Add the missed function call to fix it. Fixes: 0610154650f1 ("s390/test_unwind: print verbose unwinding results") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/lib/test_unwind.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index eb382ceaa116..7c988994931f 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -64,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, break; if (state.reliable && !addr) { pr_err("unwind state reliable but addr is 0\n"); + kfree(bt); return -EINVAL; } sprint_symbol(sym, addr); -- cgit From 75be6b98eda46d27b4a218fdbfb034a5b0fb5b12 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 2 Aug 2020 19:15:26 +0800 Subject: s390/pkey: remove redundant variable initialization In the first place, the initialization value of `rc` is wrong. It is unnecessary to initialize `rc` variables, so remove their initialization operation. Fixes: f2bbc96e7cfad ("s390/pkey: add CCA AES cipher key support") Signed-off-by: Tianjia Zhang Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index d5880f52dc2b..5896e5282a4e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -818,7 +818,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; struct keytoken_header *hdr = (struct keytoken_header *)key; @@ -886,7 +886,7 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = -EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; if (ktype == PKEY_TYPE_CCA_DATA || ktype == PKEY_TYPE_CCA_CIPHER) { -- cgit From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 4 Aug 2020 20:35:49 +0200 Subject: s390/numa: set node distance to LOCAL_DISTANCE The node distance is hardcoded to 0, which causes a trouble for some user-level applications. In particular, "libnuma" expects the distance of a node to itself as LOCAL_DISTANCE. This update removes the offending node distance override. Cc: # 4.4 Fixes: 3a368f742da1 ("s390/numa: add core infrastructure") Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/topology.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fbb507504a3b..3a0ac0c7a9a3 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) -static inline int __node_distance(int a, int b) -{ - return 0; -} - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id -- cgit From 929a343b858612100cb09443a8aaa20d4a4706d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 5 Aug 2020 17:50:53 +0200 Subject: s390/Kconfig: add missing ZCRYPT dependency to VFIO_AP The VFIO_AP uses ap_driver_register() (and deregister) functions implemented in ap_bus.c (compiled into ap.o). However the ap.o will be built only if CONFIG_ZCRYPT is selected. This was not visible before commit e93a1695d7fb ("iommu: Enable compile testing for some of drivers") because the CONFIG_VFIO_AP depends on CONFIG_S390_AP_IOMMU which depends on the missing CONFIG_ZCRYPT. After adding COMPILE_TEST, it is possible to select a configuration with VFIO_AP and S390_AP_IOMMU but without the ZCRYPT. Add proper dependency to the VFIO_AP to fix build errors: ERROR: modpost: "ap_driver_register" [drivers/s390/crypto/vfio_ap.ko] undefined! ERROR: modpost: "ap_driver_unregister" [drivers/s390/crypto/vfio_ap.ko] undefined! Reported-by: kernel test robot Fixes: e93a1695d7fb ("iommu: Enable compile testing for some of drivers") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8c0b52940165..2a31a5e74e42 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -766,6 +766,7 @@ config VFIO_AP def_tristate n prompt "VFIO support for AP devices" depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices via the VFIO mediated device interface. -- cgit From 0990d836cecb207071492f5679d5b07b26574205 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Tue, 5 May 2020 10:34:52 +0200 Subject: s390/debug: debug feature version 3 Change __debug_entry structure in the following way: - remove redundant union - Field containing cpuid is expanded to 16 bits. 8-bit width was not enough since we already support up to 512 cpus. - Field containing the timestamp is expanded to 60 bits. The timestamp itself is now stored in the absolute Unix time format in microseconds taking the Epoch Index into acount. Adjust default header for debug entries by setting minimum width for cpuid to 4 digits. Reviewed-by: Heiko Carstens Signed-off-by: Mikhail Zaslonko Signed-off-by: Heiko Carstens --- arch/s390/include/asm/debug.h | 17 ++++++----------- arch/s390/kernel/debug.c | 32 ++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 17a26261f288..c1b82bcc017c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2000 + * Copyright IBM Corp. 1999, 2020 */ #ifndef DEBUG_H #define DEBUG_H @@ -26,19 +26,14 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ +#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */ struct __debug_entry { - union { - struct { - unsigned long clock : 52; - unsigned long exception : 1; - unsigned long level : 3; - unsigned long cpuid : 8; - } fields; - unsigned long stck; - } id; + unsigned long clock : 60; + unsigned long exception : 1; + unsigned long level : 3; void *caller; + unsigned short cpu; } __packed; typedef struct __debug_entry debug_entry_t; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index beb4b44a11d1..b6619ae9a3e0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) @@ -433,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info) act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] [p_info->act_page] + p_info->act_entry); - if (act_entry->id.stck == 0LL) + if (act_entry->clock == 0LL) goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, @@ -829,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - - *(unsigned long long *) &tod_clock_base[1]; - active->id.fields.cpuid = smp_processor_id(); + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + unsigned long timestamp; + + get_tod_clock_ext(clk); + timestamp = *(unsigned long *) &clk[0] >> 4; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); if (exception) proceed_active_area(id); @@ -1398,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf) { - unsigned long base, sec, usec; + unsigned long sec, usec; unsigned long caller; unsigned int level; char *except_str; int rc = 0; - level = entry->id.fields.level; - base = (*(unsigned long *) &tod_clock_base[0]) >> 4; - sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); + level = entry->level; + sec = entry->clock; usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", area, sec, usec, level, except_str, - entry->id.fields.cpuid, (void *)caller); + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); -- cgit From 12bbf0962afa903321f28c454d7b667199ae55da Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Aug 2020 14:50:41 +0200 Subject: s390/time: remove select CLOCKSOURCE_VALIDATE_LAST_CYCLE again Sven Schnelle reported that setting CLOCKSOURCE_VALIDATE_LAST_CYCLE doesn't make sense: even if our tod clock overflows delta calculation (now - last) with unsigned 64 bit values will still be correct. Therefore revert commit 555701a714f7 ("s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE"). Fixes: 555701a714f7 ("s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE") Reported-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2a31a5e74e42..3d86e12e8e3c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,6 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY -- cgit From b450eeb0c973ed4125ea91e35613f029337fd28b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 4 Aug 2020 20:35:50 +0200 Subject: s390/numa: move code to arch/s390/kernel Move all code from arch/s390/numa/ to arch/s390/kernel/ since numa.c is the only source file and all others were deleted with the fake NUMA support removal. Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/Kbuild | 1 - arch/s390/kernel/Makefile | 1 + arch/s390/kernel/numa.c | 42 ++++++++++++++++++++++++++++++++++++++++++ arch/s390/numa/Makefile | 2 -- arch/s390/numa/numa.c | 42 ------------------------------------------ 5 files changed, 43 insertions(+), 45 deletions(-) create mode 100644 arch/s390/kernel/numa.c delete mode 100644 arch/s390/numa/Makefile delete mode 100644 arch/s390/numa/numa.c diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e63940bb57cd..8b98c501142d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_NUMA) += numa/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a8f136943deb..efca70970761 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -49,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c new file mode 100644 index 000000000000..51c5a9f6e525 --- /dev/null +++ b/arch/s390/kernel/numa.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NUMA support for s390 + * + * Implement NUMA core code. + * + * Copyright IBM Corp. 2015 + */ + +#include +#include +#include +#include +#include +#include + +struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); + +void __init numa_setup(void) +{ + int nid; + + nodes_clear(node_possible_map); + node_set(0, node_possible_map); + node_set_online(0); + for (nid = 0; nid < MAX_NUMNODES; nid++) { + NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); + if (!NODE_DATA(nid)) + panic("%s: Failed to allocate %zu bytes align=0x%x\n", + __func__, sizeof(pg_data_t), 8); + } + NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; + NODE_DATA(0)->node_id = 0; +} + +static int __init numa_init_late(void) +{ + register_one_node(0); + return 0; +} +arch_initcall(numa_init_late); diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile deleted file mode 100644 index c89d26f4f77d..000000000000 --- a/arch/s390/numa/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += numa.o diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c deleted file mode 100644 index 51c5a9f6e525..000000000000 --- a/arch/s390/numa/numa.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NUMA support for s390 - * - * Implement NUMA core code. - * - * Copyright IBM Corp. 2015 - */ - -#include -#include -#include -#include -#include -#include - -struct pglist_data *node_data[MAX_NUMNODES]; -EXPORT_SYMBOL(node_data); - -void __init numa_setup(void) -{ - int nid; - - nodes_clear(node_possible_map); - node_set(0, node_possible_map); - node_set_online(0); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); - if (!NODE_DATA(nid)) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(pg_data_t), 8); - } - NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; - NODE_DATA(0)->node_id = 0; -} - -static int __init numa_init_late(void) -{ - register_one_node(0); - return 0; -} -arch_initcall(numa_init_late); -- cgit From 1fc0fd6739bcaf282f5ffb26ca082aea5a45a7b3 Mon Sep 17 00:00:00 2001 From: Bryan Brattlof Date: Tue, 11 Aug 2020 16:17:12 +0000 Subject: docs: trace: fix a typo emumerated -> enumerated Signed-off-by: Bryan Brattlof Link: https://lore.kernel.org/r/87lfili2d8.fsf@bryanbrattlof.com Signed-off-by: Jonathan Corbet --- Documentation/trace/intel_th.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/intel_th.rst b/Documentation/trace/intel_th.rst index 70b7126eaeeb..b31818d5f6c5 100644 --- a/Documentation/trace/intel_th.rst +++ b/Documentation/trace/intel_th.rst @@ -58,7 +58,7 @@ Bus and Subdevices For each Intel TH device in the system a bus of its own is created and assigned an id number that reflects the order in which TH -devices were emumerated. All TH subdevices (devices on intel_th bus) +devices were enumerated. All TH subdevices (devices on intel_th bus) begin with this id: 0-gth, 0-msc0, 0-msc1, 0-pti, 0-sth, which is followed by device's name and an optional index. -- cgit From 7caf3e3f1703650ea7d7221aba3ae4b3a798b89a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:18:28 +0530 Subject: Filesystems: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20200810184828.29297-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/journalling.rst | 66 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/Documentation/filesystems/journalling.rst b/Documentation/filesystems/journalling.rst index 58ce6b395206..7e2be2faf653 100644 --- a/Documentation/filesystems/journalling.rst +++ b/Documentation/filesystems/journalling.rst @@ -10,27 +10,27 @@ Details The journalling layer is easy to use. You need to first of all create a journal_t data structure. There are two calls to do this dependent on how you decide to allocate the physical media on which the journal -resides. The :c:func:`jbd2_journal_init_inode` call is for journals stored in -filesystem inodes, or the :c:func:`jbd2_journal_init_dev` call can be used +resides. The jbd2_journal_init_inode() call is for journals stored in +filesystem inodes, or the jbd2_journal_init_dev() call can be used for journal stored on a raw device (in a continuous range of blocks). A journal_t is a typedef for a struct pointer, so when you are finally -finished make sure you call :c:func:`jbd2_journal_destroy` on it to free up +finished make sure you call jbd2_journal_destroy() on it to free up any used kernel memory. Once you have got your journal_t object you need to 'mount' or load the journal file. The journalling layer expects the space for the journal was already allocated and initialized properly by the userspace tools. -When loading the journal you must call :c:func:`jbd2_journal_load` to process +When loading the journal you must call jbd2_journal_load() to process journal contents. If the client file system detects the journal contents does not need to be processed (or even need not have valid contents), it -may call :c:func:`jbd2_journal_wipe` to clear the journal contents before -calling :c:func:`jbd2_journal_load`. +may call jbd2_journal_wipe() to clear the journal contents before +calling jbd2_journal_load(). Note that jbd2_journal_wipe(..,0) calls -:c:func:`jbd2_journal_skip_recovery` for you if it detects any outstanding -transactions in the journal and similarly :c:func:`jbd2_journal_load` will -call :c:func:`jbd2_journal_recover` if necessary. I would advise reading -:c:func:`ext4_load_journal` in fs/ext4/super.c for examples on this stage. +jbd2_journal_skip_recovery() for you if it detects any outstanding +transactions in the journal and similarly jbd2_journal_load() will +call jbd2_journal_recover() if necessary. I would advise reading +ext4_load_journal() in fs/ext4/super.c for examples on this stage. Now you can go ahead and start modifying the underlying filesystem. Almost. @@ -39,57 +39,57 @@ You still need to actually journal your filesystem changes, this is done by wrapping them into transactions. Additionally you also need to wrap the modification of each of the buffers with calls to the journal layer, so it knows what the modifications you are actually making are. To do -this use :c:func:`jbd2_journal_start` which returns a transaction handle. +this use jbd2_journal_start() which returns a transaction handle. -:c:func:`jbd2_journal_start` and its counterpart :c:func:`jbd2_journal_stop`, +jbd2_journal_start() and its counterpart jbd2_journal_stop(), which indicates the end of a transaction are nestable calls, so you can reenter a transaction if necessary, but remember you must call -:c:func:`jbd2_journal_stop` the same number of times as -:c:func:`jbd2_journal_start` before the transaction is completed (or more +jbd2_journal_stop() the same number of times as +jbd2_journal_start() before the transaction is completed (or more accurately leaves the update phase). Ext4/VFS makes use of this feature to simplify handling of inode dirtying, quota support, etc. Inside each transaction you need to wrap the modifications to the individual buffers (blocks). Before you start to modify a buffer you -need to call :c:func:`jbd2_journal_get_create_access()` / -:c:func:`jbd2_journal_get_write_access()` / -:c:func:`jbd2_journal_get_undo_access()` as appropriate, this allows the +need to call jbd2_journal_get_create_access() / +jbd2_journal_get_write_access() / +jbd2_journal_get_undo_access() as appropriate, this allows the journalling layer to copy the unmodified data if it needs to. After all the buffer may be part of a previously uncommitted transaction. At this point you are at last ready to modify a buffer, and once you are have done so you need to call -:c:func:`jbd2_journal_dirty_metadata`. Or if you've asked for access to a +jbd2_journal_dirty_metadata(). Or if you've asked for access to a buffer you now know is now longer required to be pushed back on the -device you can call :c:func:`jbd2_journal_forget` in much the same way as you -might have used :c:func:`bforget` in the past. +device you can call jbd2_journal_forget() in much the same way as you +might have used bforget() in the past. -A :c:func:`jbd2_journal_flush` may be called at any time to commit and +A jbd2_journal_flush() may be called at any time to commit and checkpoint all your transactions. -Then at umount time , in your :c:func:`put_super` you can then call -:c:func:`jbd2_journal_destroy` to clean up your in-core journal object. +Then at umount time , in your put_super() you can then call +jbd2_journal_destroy() to clean up your in-core journal object. Unfortunately there a couple of ways the journal layer can cause a deadlock. The first thing to note is that each task can only have a single outstanding transaction at any one time, remember nothing commits -until the outermost :c:func:`jbd2_journal_stop`. This means you must complete +until the outermost jbd2_journal_stop(). This means you must complete the transaction at the end of each file/inode/address etc. operation you perform, so that the journalling system isn't re-entered on another journal. Since transactions can't be nested/batched across differing journals, and another filesystem other than yours (say ext4) may be modified in a later syscall. -The second case to bear in mind is that :c:func:`jbd2_journal_start` can block +The second case to bear in mind is that jbd2_journal_start() can block if there isn't enough space in the journal for your transaction (based on the passed nblocks param) - when it blocks it merely(!) needs to wait for transactions to complete and be committed from other tasks, so -essentially we are waiting for :c:func:`jbd2_journal_stop`. So to avoid -deadlocks you must treat :c:func:`jbd2_journal_start` / -:c:func:`jbd2_journal_stop` as if they were semaphores and include them in +essentially we are waiting for jbd2_journal_stop(). So to avoid +deadlocks you must treat jbd2_journal_start() / +jbd2_journal_stop() as if they were semaphores and include them in your semaphore ordering rules to prevent -deadlocks. Note that :c:func:`jbd2_journal_extend` has similar blocking -behaviour to :c:func:`jbd2_journal_start` so you can deadlock here just as -easily as on :c:func:`jbd2_journal_start`. +deadlocks. Note that jbd2_journal_extend() has similar blocking +behaviour to jbd2_journal_start() so you can deadlock here just as +easily as on jbd2_journal_start(). Try to reserve the right number of blocks the first time. ;-). This will be the maximum number of blocks you are going to touch in this @@ -116,8 +116,8 @@ called after each transaction commit. You can also use that need processing when the transaction commits. JBD2 also provides a way to block all transaction updates via -:c:func:`jbd2_journal_lock_updates()` / -:c:func:`jbd2_journal_unlock_updates()`. Ext4 uses this when it wants a +jbd2_journal_lock_updates() / +jbd2_journal_unlock_updates(). Ext4 uses this when it wants a window with a clean and stable fs for a moment. E.g. :: -- cgit From 2d88fc62d4b67cd7b3cf7ed89e4997122548ecdb Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:06:13 +0530 Subject: Dev-tools: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20200810183613.25643-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/kgdb.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst index 0e52e966a153..c908ef4d3f04 100644 --- a/Documentation/dev-tools/kgdb.rst +++ b/Documentation/dev-tools/kgdb.rst @@ -316,7 +316,7 @@ driver as a loadable kernel module kgdbwait will not do anything. Kernel parameter: ``kgdbcon`` ----------------------------- -The ``kgdbcon`` feature allows you to see :c:func:`printk` messages inside gdb +The ``kgdbcon`` feature allows you to see printk() messages inside gdb while gdb is connected to the kernel. Kdb does not make use of the kgdbcon feature. @@ -432,7 +432,7 @@ This is a quick example of how to use kdb. ``ps`` Displays only the active processes ``ps A`` Shows all the processes ``summary`` Shows kernel version info and memory usage - ``bt`` Get a backtrace of the current process using :c:func:`dump_stack` + ``bt`` Get a backtrace of the current process using dump_stack() ``dmesg`` View the kernel syslog buffer ``go`` Continue the system =========== ================================================================= @@ -724,7 +724,7 @@ The kernel debugger is organized into a number of components: The arch-specific portion implements: - contains an arch-specific trap catcher which invokes - :c:func:`kgdb_handle_exception` to start kgdb about doing its work + kgdb_handle_exception() to start kgdb about doing its work - translation to and from gdb specific packet format to :c:type:`pt_regs` @@ -769,7 +769,7 @@ The kernel debugger is organized into a number of components: config. Later run ``modprobe kdb_hello`` and the next time you enter the kdb shell, you can run the ``hello`` command. - - The implementation for :c:func:`kdb_printf` which emits messages directly + - The implementation for kdb_printf() which emits messages directly to I/O drivers, bypassing the kernel log. - SW / HW breakpoint management for the kdb shell @@ -875,7 +875,7 @@ kernel when ``CONFIG_KDB_KEYBOARD=y`` is set in the kernel configuration. The core polled keyboard driver for PS/2 type keyboards is in ``drivers/char/kdb_keyboard.c``. This driver is hooked into the debug core when kgdboc populates the callback in the array called -:c:type:`kdb_poll_funcs[]`. The :c:func:`kdb_get_kbd_char` is the top-level +:c:type:`kdb_poll_funcs[]`. The kdb_get_kbd_char() is the top-level function which polls hardware for single character input. kgdboc and kms @@ -887,10 +887,10 @@ that you have a video driver which has a frame buffer console and atomic kernel mode setting support. Every time the kernel debugger is entered it calls -:c:func:`kgdboc_pre_exp_handler` which in turn calls :c:func:`con_debug_enter` +kgdboc_pre_exp_handler() which in turn calls con_debug_enter() in the virtual console layer. On resuming kernel execution, the kernel -debugger calls :c:func:`kgdboc_post_exp_handler` which in turn calls -:c:func:`con_debug_leave`. +debugger calls kgdboc_post_exp_handler() which in turn calls +con_debug_leave(). Any video driver that wants to be compatible with the kernel debugger and the atomic kms callbacks must implement the ``mode_set_base_atomic``, -- cgit From ec8213f8900573cbbf250b240f9aebd78366b251 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 11 Aug 2020 00:00:19 +0530 Subject: Core-api: Documentation: Replace deprecated :c:func: Usage Replace :c:func: with func() as the previous usage is deprecated. Signed-off-by: Puranjay Mohan Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20200810183019.22170-1-puranjay12@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/idr.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst index a2738050c4f0..2eb5afdb9931 100644 --- a/Documentation/core-api/idr.rst +++ b/Documentation/core-api/idr.rst @@ -20,48 +20,48 @@ only ID allocation, and as a result is much more memory-efficient. IDR usage ========= -Start by initialising an IDR, either with :c:func:`DEFINE_IDR` -for statically allocated IDRs or :c:func:`idr_init` for dynamically +Start by initialising an IDR, either with DEFINE_IDR() +for statically allocated IDRs or idr_init() for dynamically allocated IDRs. -You can call :c:func:`idr_alloc` to allocate an unused ID. Look up -the pointer you associated with the ID by calling :c:func:`idr_find` -and free the ID by calling :c:func:`idr_remove`. +You can call idr_alloc() to allocate an unused ID. Look up +the pointer you associated with the ID by calling idr_find() +and free the ID by calling idr_remove(). If you need to change the pointer associated with an ID, you can call -:c:func:`idr_replace`. One common reason to do this is to reserve an +idr_replace(). One common reason to do this is to reserve an ID by passing a ``NULL`` pointer to the allocation function; initialise the object with the reserved ID and finally insert the initialised object into the IDR. Some users need to allocate IDs larger than ``INT_MAX``. So far all of these users have been content with a ``UINT_MAX`` limit, and they use -:c:func:`idr_alloc_u32`. If you need IDs that will not fit in a u32, +idr_alloc_u32(). If you need IDs that will not fit in a u32, we will work with you to address your needs. If you need to allocate IDs sequentially, you can use -:c:func:`idr_alloc_cyclic`. The IDR becomes less efficient when dealing +idr_alloc_cyclic(). The IDR becomes less efficient when dealing with larger IDs, so using this function comes at a slight cost. To perform an action on all pointers used by the IDR, you can -either use the callback-based :c:func:`idr_for_each` or the -iterator-style :c:func:`idr_for_each_entry`. You may need to use -:c:func:`idr_for_each_entry_continue` to continue an iteration. You can -also use :c:func:`idr_get_next` if the iterator doesn't fit your needs. +either use the callback-based idr_for_each() or the +iterator-style idr_for_each_entry(). You may need to use +idr_for_each_entry_continue() to continue an iteration. You can +also use idr_get_next() if the iterator doesn't fit your needs. -When you have finished using an IDR, you can call :c:func:`idr_destroy` +When you have finished using an IDR, you can call idr_destroy() to release the memory used by the IDR. This will not free the objects pointed to from the IDR; if you want to do that, use one of the iterators to do it. -You can use :c:func:`idr_is_empty` to find out whether there are any +You can use idr_is_empty() to find out whether there are any IDs currently allocated. If you need to take a lock while allocating a new ID from the IDR, you may need to pass a restrictive set of GFP flags, which can lead to the IDR being unable to allocate memory. To work around this, -you can call :c:func:`idr_preload` before taking the lock, and then -:c:func:`idr_preload_end` after the allocation. +you can call idr_preload() before taking the lock, and then +idr_preload_end() after the allocation. .. kernel-doc:: include/linux/idr.h :doc: idr sync -- cgit From a5019c7f563154bec1cc4026e0883f86126fb2f4 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Tue, 11 Aug 2020 05:53:50 +0530 Subject: documentation: coccinelle: Improve command example for make C={1,2} Modify coccinelle documentation to further clarify the usage of the makefile C variable by coccicheck. Signed-off-by: Sumera Priyadarsini Acked-by: Julia Lawall Link: https://lore.kernel.org/r/20200811002350.5553-1-sylphrenadin@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/coccinelle.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/dev-tools/coccinelle.rst b/Documentation/dev-tools/coccinelle.rst index 6c791af1c859..74c5e6aeeff5 100644 --- a/Documentation/dev-tools/coccinelle.rst +++ b/Documentation/dev-tools/coccinelle.rst @@ -175,13 +175,20 @@ For example, to check drivers/net/wireless/ one may write:: make coccicheck M=drivers/net/wireless/ To apply Coccinelle on a file basis, instead of a directory basis, the -following command may be used:: +C variable is used by the makefile to select which files to work with. +This variable can be used to run scripts for the entire kernel, a +specific directory, or for a single file. - make C=1 CHECK="scripts/coccicheck" +For example, to check drivers/bluetooth/bfusb.c, the value 1 is +passed to the C variable to check files that make considers +need to be compiled.:: -To check only newly edited code, use the value 2 for the C flag, i.e.:: + make C=1 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o - make C=2 CHECK="scripts/coccicheck" +The value 2 is passed to the C variable to check files regardless of +whether they need to be compiled or not.:: + + make C=2 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o In these modes, which work on a file basis, there is no information about semantic patches displayed, and no commit message proposed. -- cgit From f30c3ff3f0e8305d6c1a210df6d588a13333b8f7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 10 Aug 2020 11:50:00 +0200 Subject: Documentation/features: refresh RISC-V arch support files Support for these was added by the following commits: f2c9699f6555 ("riscv: Add STACKPROTECTOR supported") 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT"). ed48b297fe21 ("riscv: Enable context tracking") cbb3d91d3bcf ("riscv: Add kmemleak support") Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20200810095000.32092-1-tklauser@distanz.ch Signed-off-by: Jonathan Corbet --- Documentation/features/debug/kmemleak/arch-support.txt | 2 +- Documentation/features/debug/stackprotector/arch-support.txt | 2 +- Documentation/features/locking/lockdep/arch-support.txt | 2 +- Documentation/features/time/context-tracking/arch-support.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index b7e4f3608838..2db76807ec6f 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index 12410f606edc..79233416c05f 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | ok | | sparc: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index 98cb9d85c55d..56c7cf1404da 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | ok | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index 048bfb6d3872..0d211d737c9c 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | ok | -- cgit From be3a5b0e8b8762ef2007d1c6780f273cbd7252d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Aug 2020 19:49:41 -0700 Subject: Doc: admin-guide: use correct legends in kernel-parameters.txt Documentation/admin-guide/kernel-parameters.rst includes a legend telling us what configurations or hardware platforms are relevant for certain boot options. For X86, it is spelled "X86" and for x86_64, it is spelled "X86-64", so make corrections for those. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Link: https://lore.kernel.org/r/20200810024941.30231-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d325726ff425..8ccf1985a2a3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -719,7 +719,7 @@ memory region [offset, offset + size] for that kernel image. If '@offset' is omitted, then a suitable offset is selected automatically. - [KNL, x86_64] select a region under 4G first, and + [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. @@ -732,14 +732,14 @@ Documentation/admin-guide/kdump/kdump.rst for an example. crashkernel=size[KMG],high - [KNL, x86_64] range could be above 4G. Allow kernel + [KNL, X86-64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. crashkernel=size[KMG],low - [KNL, x86_64] range under 4G. When crashkernel=X,high + [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb @@ -1403,7 +1403,7 @@ gamma= [HW,DRM] - gart_fix_e820= [X86_64] disable the fix e820 for K8 GART + gart_fix_e820= [X86-64] disable the fix e820 for K8 GART Format: off | on default: on @@ -1790,7 +1790,7 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_pkru= [x86] Specify the default memory protection keys rights + init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can override in debugfs after boot. @@ -1798,7 +1798,7 @@ inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver Format: - int_pln_enable [x86] Enable power limit notification interrupt + int_pln_enable [X86] Enable power limit notification interrupt integrity_audit=[IMA] Format: { "0" | "1" } @@ -1816,7 +1816,7 @@ bypassed by not enabling DMAR with this option. In this case, gfx device will use physical address for DMA. - forcedac [x86_64] + forcedac [X86-64] With this option iommu will not optimize to look for io virtual address below 32-bit forcing dual address cycle on pci bus for cards supporting greater @@ -1901,7 +1901,7 @@ strict regions from userspace. relaxed - iommu= [x86] + iommu= [X86] off force noforce @@ -1911,8 +1911,8 @@ merge nomerge soft - pt [x86] - nopt [x86] + pt [X86] + nopt [X86] nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. @@ -2055,21 +2055,21 @@ iucv= [HW,NET] - ivrs_ioapic [HW,X86_64] + ivrs_ioapic [HW,X86-64] Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 - ivrs_hpet [HW,X86_64] + ivrs_hpet [HW,X86-64] Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 - ivrs_acpihid [HW,X86_64] + ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map UART-HID:UID AMD0020:0 to @@ -2346,7 +2346,7 @@ lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. - lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline + lapic= [X86,APIC] "notscdeadline" Do not use TSC deadline value for LAPIC timer one-shot implementation. Default back to the programmable timer unit in the LAPIC. @@ -3162,12 +3162,12 @@ register save and restore. The kernel will only save legacy floating-point registers on task switch. - nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings. + nohugeiomap [KNL,X86,PPC] Disable kernel huge I/O mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,x86] Disable symmetric multithreading (SMT). + [KNL,X86] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. @@ -3929,7 +3929,7 @@ pt. [PARIDE] See Documentation/admin-guide/blockdev/paride.rst. - pti= [X86_64] Control Page Table Isolation of user and + pti= [X86-64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature removes hardening, but improves performance of system calls and interrupts. @@ -3941,7 +3941,7 @@ Not specifying this option is equivalent to pti=auto. - nopti [X86_64] + nopti [X86-64] Equivalent to pti=off pty.legacy_count= -- cgit From 502b675050a506fb966c6f273d2ce42e5d8ddb07 Mon Sep 17 00:00:00 2001 From: Remi Andruccioli Date: Sat, 8 Aug 2020 18:31:23 +0200 Subject: docs: cdrom: Fix a typo and rst markup "The capability fags" should be "The capability flags". In rst markup, a incorrect markup expression is causing bad rendering in Sphinx output. Replace the erroneous single quote by a backquote. Signed-off-by: Remi Andruccioli Link: https://lore.kernel.org/r/20200808163123.17643-1-remi.andruccioli@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/cdrom/cdrom-standard.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index dde4f7f7fdbf..5a1305638be7 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -571,7 +571,7 @@ phase. Currently, the capabilities are any of:: CDC_DRIVE_STATUS /* driver implements drive status */ The capability flag is declared *const*, to prevent drivers from -accidentally tampering with the contents. The capability fags actually +accidentally tampering with the contents. The capability flags actually inform `cdrom.c` of what the driver can do. If the drive found by the driver does not have the capability, is can be masked out by the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM @@ -750,7 +750,7 @@ Description of routines in `cdrom.c` Only a few routines in `cdrom.c` are exported to the drivers. In this new section we will discuss these, as well as the functions that `take -over' the CD-ROM interface to the kernel. The header file belonging +over` the CD-ROM interface to the kernel. The header file belonging to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this file were placed in the file `ucdrom.h`, but this file has now been merged back into `cdrom.h`. -- cgit From 2824a3669fb28324a76bba1d2203f2f11b126124 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 8 Aug 2020 00:14:36 -0700 Subject: mailmap: Update comments for with format and more detalis Without having first read the git-shortlog man-page, the format of .mailmap may not be immediately obvious. Add comments with pointers to the man-page, along with other details. Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/202008080013.58EBD83@keescook Signed-off-by: Jonathan Corbet --- .mailmap | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 9b0b9ec3e53e..787df6ec8908 100644 --- a/.mailmap +++ b/.mailmap @@ -2,11 +2,16 @@ # This list is used by git-shortlog to fix a few botched name translations # in the git archive, either because the author's full name was messed up # and/or not always written the same way, making contributions from the -# same person appearing not to be so or badly displayed. +# same person appearing not to be so or badly displayed. Also allows for +# old email addresses to map to new email addresses. # +# For format details, see "MAPPING AUTHORS" in "man git-shortlog". +# +# Please keep this list dictionary sorted. +# +# This comment is parsed by git-shortlog: # repo-abbrev: /pub/scm/linux/kernel/git/ # - Aaron Durbin Adam Oldham Adam Radford -- cgit From cca73e4946c4a8d2ce3395207d8a0b5c565b530b Mon Sep 17 00:00:00 2001 From: Billy Wilson Date: Thu, 6 Aug 2020 17:17:54 -0600 Subject: docs: Correct the release date of 5.2 stable A table lists the 5.2 stable release date as September 15, but it was released on July 7. This may confuse a reader who is trying to understand the stable update release cycle. Signed-off-by: Billy Wilson Link: https://lore.kernel.org/r/20200806231754.7735-1-billy_wilson@byu.edu Signed-off-by: Jonathan Corbet --- Documentation/process/2.Process.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst index 3588f48841eb..4ae1e0f600c1 100644 --- a/Documentation/process/2.Process.rst +++ b/Documentation/process/2.Process.rst @@ -113,7 +113,7 @@ than one development cycle past their initial release. So, for example, the 5.2 kernel's history looked like this (all dates in 2019): ============== =============================== - September 15 5.2 stable release + July 7 5.2 stable release July 14 5.2.1 July 21 5.2.2 July 26 5.2.3 -- cgit From 7033a95ab411db94f41b7178e52ae0e434356cb8 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 6 Aug 2020 18:14:56 +0200 Subject: docs: remove the 2.6 "Upgrading I2C Drivers" guide All the drivers have long since been upgraded, and all the important information here is also included in the "Implementing I2C device drivers" guide. Signed-off-by: Stephen Kitt Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20200806161456.8680-1-steve@sk2.org Signed-off-by: Jonathan Corbet --- Documentation/i2c/index.rst | 1 - Documentation/i2c/upgrading-clients.rst | 285 -------------------------------- 2 files changed, 286 deletions(-) delete mode 100644 Documentation/i2c/upgrading-clients.rst diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index fee4744475df..8a2ad3845191 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -62,7 +62,6 @@ Legacy documentation .. toctree:: :maxdepth: 1 - upgrading-clients old-module-parameters .. only:: subproject and html diff --git a/Documentation/i2c/upgrading-clients.rst b/Documentation/i2c/upgrading-clients.rst deleted file mode 100644 index 1708090d7b8f..000000000000 --- a/Documentation/i2c/upgrading-clients.rst +++ /dev/null @@ -1,285 +0,0 @@ -================================================= -Upgrading I2C Drivers to the new 2.6 Driver Model -================================================= - -Ben Dooks - -Introduction ------------- - -This guide outlines how to alter existing Linux 2.6 client drivers from -the old to the new binding methods. - - -Example old-style driver ------------------------- - -:: - - struct example_state { - struct i2c_client client; - .... - }; - - static struct i2c_driver example_driver; - - static unsigned short ignore[] = { I2C_CLIENT_END }; - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - I2C_CLIENT_INSMOD; - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - { - struct example_state *state; - struct device *dev = &adap->dev; /* to use for dev_ reports */ - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - example->client.addr = addr; - example->client.flags = 0; - example->client.adapter = adap; - - i2c_set_clientdata(&state->i2c_client, state); - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - - ret = i2c_attach_client(&state->i2c_client); - if (ret < 0) { - dev_err(dev, "failed to attach client\n"); - kfree(state); - return ret; - } - - dev = &state->i2c_client.dev; - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_detach(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - i2c_detach_client(client); - kfree(state); - return 0; - } - - static int example_attach_adapter(struct i2c_adapter *adap) - { - return i2c_probe(adap, &addr_data, example_attach); - } - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .attach_adapter = example_attach_adapter, - .detach_client = example_detach, - }; - - -Updating the client -------------------- - -The new style binding model will check against a list of supported -devices and their associated address supplied by the code registering -the busses. This means that the driver .attach_adapter and -.detach_client methods can be removed, along with the addr_data, -as follows:: - - - static struct i2c_driver example_driver; - - - static unsigned short ignore[] = { I2C_CLIENT_END }; - - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - - I2C_CLIENT_INSMOD; - - - static int example_attach_adapter(struct i2c_adapter *adap) - - { - - return i2c_probe(adap, &addr_data, example_attach); - - } - - static struct i2c_driver example_driver = { - - .attach_adapter = example_attach_adapter, - - .detach_client = example_detach, - } - -Add the probe and remove methods to the i2c_driver, as so:: - - static struct i2c_driver example_driver = { - + .probe = example_probe, - + .remove = example_remove, - } - -Change the example_attach method to accept the new parameters -which include the i2c_client that it will be working with:: - - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - + static int example_probe(struct i2c_client *client, - + const struct i2c_device_id *id) - -Change the name of example_attach to example_probe to align it with the -i2c_driver entry names. The rest of the probe routine will now need to be -changed as the i2c_client has already been setup for use. - -The necessary client fields have already been setup before -the probe function is called, so the following client setup -can be removed:: - - - example->client.addr = addr; - - example->client.flags = 0; - - example->client.adapter = adap; - - - - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - -The i2c_set_clientdata is now:: - - - i2c_set_clientdata(&state->client, state); - + i2c_set_clientdata(client, state); - -The call to i2c_attach_client is no longer needed, if the probe -routine exits successfully, then the driver will be automatically -attached by the core. Change the probe routine as so:: - - - ret = i2c_attach_client(&state->i2c_client); - - if (ret < 0) { - - dev_err(dev, "failed to attach client\n"); - - kfree(state); - - return ret; - - } - - -Remove the storage of 'struct i2c_client' from the 'struct example_state' -as we are provided with the i2c_client in our example_probe. Instead we -store a pointer to it for when it is needed. - -:: - - struct example_state { - - struct i2c_client client; - + struct i2c_client *client; - -the new i2c client as so:: - - - struct device *dev = &adap->dev; /* to use for dev_ reports */ - + struct device *dev = &i2c_client->dev; /* to use for dev_ reports */ - -And remove the change after our client is attached, as the driver no -longer needs to register a new client structure with the core:: - - - dev = &state->i2c_client.dev; - -In the probe routine, ensure that the new state has the client stored -in it:: - - static int example_probe(struct i2c_client *i2c_client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &i2c_client->dev; - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - + state->client = i2c_client; - -Update the detach method, by changing the name to _remove and -to delete the i2c_detach_client call. It is possible that you -can also remove the ret variable as it is not needed for any -of the core functions. - -:: - - - static int example_detach(struct i2c_client *client) - + static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - - i2c_detach_client(client); - -And finally ensure that we have the correct ID table for the i2c-core -and other utilities:: - - + struct i2c_device_id example_idtable[] = { - + { "example", 0 }, - + { } - +}; - + - +MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - }, - + .id_table = example_ids, - - -Our driver should now look like this:: - - struct example_state { - struct i2c_client *client; - .... - }; - - static int example_probe(struct i2c_client *client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &client->dev; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - state->client = client; - i2c_set_clientdata(client, state); - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - kfree(state); - return 0; - } - - static struct i2c_device_id example_idtable[] = { - { "example", 0 }, - { } - }; - - MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .id_table = example_idtable, - .probe = example_probe, - .remove = example_remove, - }; -- cgit From ccfaed7bd443340d1cac7b6af7bb588766799964 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 2 Aug 2020 18:21:01 +0200 Subject: doc/zh_CN: fix title heading markup in admin-guide cpu-load Documentation generation warns: Documentation/translations/zh_CN/admin-guide/cpu-load.rst:1: WARNING: Title overline too short. Extend title heading markup by one. It was just off by one. Fixes: e210c66d567c ("doc/zh_CN: add cpu-load Chinese version") Signed-off-by: Lukas Bulwahn Acked-by: Tao Zhou Link: https://lore.kernel.org/r/20200802162101.18875-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/admin-guide/cpu-load.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst index 0116d0477799..c972731c0e57 100644 --- a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst @@ -1,6 +1,6 @@ -======= +======== CPU 负载 -======= +======== Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工具 如top(1)使用这些信息计算系统花费在某个特定状态的平均时间。 -- cgit From e176b7a3054eef44a22f6ca3d14168dcf9bad21e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 2 Aug 2020 18:19:56 +0200 Subject: doc/zh_CN: resolve undefined label warning in admin-guide index Documentation generation warns: Documentation/translations/zh_CN/admin-guide/index.rst:3: WARNING: undefined label: documentation/admin-guide/index.rst Use doc reference for .rst files to resolve the warning. Fixes: 37a607cf2318 ("doc/zh_CN: add admin-guide index") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20200802161956.18268-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/admin-guide/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index 7d502fa5da64..ed5ab7e37f38 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/admin-guide/index.rst` +:Original: :doc:`../../../admin-guide/index` :Translator: Alex Shi -- cgit From b06c19d9f827f6743122795570bfc0c72db482b0 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 10 Aug 2020 17:02:58 -0700 Subject: net/tls: Fix kmap usage When MSG_OOB is specified to tls_device_sendpage() the mapped page is never unmapped. Hold off mapping the page until after the flags are checked and the page is actually needed. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Ira Weiny Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 18fa6067bb7f..b74e2741f74f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -561,7 +561,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; - char *kaddr = kmap(page); + char *kaddr; struct kvec iov; int rc; @@ -576,6 +576,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); -- cgit From c79f428d6f14e754503fa7a3145d62039ccd05c9 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 8 Aug 2020 19:35:48 -0700 Subject: drivers/net/wan/x25_asy: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Added needed_headroom When this driver transmits data, first this driver will remove a pseudo header of 1 byte, then the lapb module will prepend the LAPB header of 2 or 3 bytes. So the value of needed_headroom in this driver should be 3 - 1. Cc: Willem de Bruijn Cc: Martin Schiller Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 84640a0c13f3..de7984463595 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -307,6 +307,14 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -752,6 +760,12 @@ static void x25_asy_setup(struct net_device *dev) dev->type = ARPHRD_X25; dev->tx_queue_len = 10; + /* When transmitting data: + * first this driver removes a pseudo header of 1 byte, + * then the lapb module prepends an LAPB header of at most 3 bytes. + */ + dev->needed_headroom = 3 - 1; + /* New-style flags. */ dev->flags = IFF_NOARP; } -- cgit From 1dab5877e8ebb7a00fbf0e7d797c869aa37830b9 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Sun, 9 Aug 2020 11:53:49 +0800 Subject: hinic: fix strncpy output truncated compile warnings fix the compile warnings of 'strncpy' output truncated before terminating nul copying N bytes from a string of the same length Signed-off-by: Luo bin Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_devlink.c | 32 +++++++++-------------- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 -- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index c6adc776f3c8..16bda7381ba0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -334,19 +334,14 @@ void hinic_devlink_unregister(struct hinic_devlink_priv *priv) static int chip_fault_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_level[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { - "fatal", "reset", "flr", "general", "suggestion"}; - char level_str[FAULT_SHOW_STR_LEN + 1] = {0}; - u8 level; + const char * const level_str[FAULT_LEVEL_MAX + 1] = { + "fatal", "reset", "flr", "general", "suggestion", "Unknown"}; + u8 fault_level; int err; - level = event->event.chip.err_level; - if (level < FAULT_LEVEL_MAX) - strncpy(level_str, fault_level[level], strlen(fault_level[level])); - else - strncpy(level_str, "Unknown", strlen("Unknown")); - - if (level == FAULT_LEVEL_SERIOUS_FLR) { + fault_level = (event->event.chip.err_level < FAULT_LEVEL_MAX) ? + event->event.chip.err_level : FAULT_LEVEL_MAX; + if (fault_level == FAULT_LEVEL_SERIOUS_FLR) { err = devlink_fmsg_u32_pair_put(fmsg, "Function level err func_id", (u32)event->event.chip.func_id); if (err) @@ -361,7 +356,7 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, if (err) return err; - err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str); + err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str[fault_level]); if (err) return err; @@ -381,18 +376,15 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, static int fault_report_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_type[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { + const char * const type_str[FAULT_TYPE_MAX + 1] = { "chip", "ucode", "mem rd timeout", "mem wr timeout", - "reg rd timeout", "reg wr timeout", "phy fault"}; - char type_str[FAULT_SHOW_STR_LEN + 1] = {0}; + "reg rd timeout", "reg wr timeout", "phy fault", "Unknown"}; + u8 fault_type; int err; - if (event->type < FAULT_TYPE_MAX) - strncpy(type_str, fault_type[event->type], strlen(fault_type[event->type])); - else - strncpy(type_str, "Unknown", strlen("Unknown")); + fault_type = (event->type < FAULT_TYPE_MAX) ? event->type : FAULT_TYPE_MAX; - err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str); + err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str[fault_type]); if (err) return err; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index dc6e645f2689..701eb81e09a7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -504,8 +504,6 @@ enum hinic_fault_type { FAULT_TYPE_MAX, }; -#define FAULT_SHOW_STR_LEN 16 - enum hinic_fault_err_level { FAULT_LEVEL_FATAL, FAULT_LEVEL_SERIOUS_RESET, -- cgit From 26896f01467a28651f7a536143fe5ac8449d4041 Mon Sep 17 00:00:00 2001 From: Qingyu Li Date: Mon, 10 Aug 2020 09:51:00 +0800 Subject: net/nfc/rawsock.c: add CAP_NET_RAW check. When creating a raw AF_NFC socket, CAP_NET_RAW needs to be checked first. Signed-off-by: Qingyu Li Signed-off-by: David S. Miller --- net/nfc/rawsock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b2061b6746ea..955c195ae14b 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -328,10 +328,13 @@ static int rawsock_create(struct net *net, struct socket *sock, if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW) + if (sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + return -EPERM; sock->ops = &rawsock_raw_ops; - else + } else { sock->ops = &rawsock_ops; + } sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) -- cgit From e71642009cbd4a874c3af71a4b16b39499f58658 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 10 Aug 2020 02:38:07 +0000 Subject: ionic_lif: Use devm_kcalloc() in ionic_qcq_alloc() A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "devm_kcalloc". Signed-off-by: Xu Wang Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1944bf5264db..26988ad7ec97 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -412,7 +412,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->flags = flags; - new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs, + new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), GFP_KERNEL); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); @@ -462,7 +462,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED; } - new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs, + new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), GFP_KERNEL); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); -- cgit From 50caa777a3a24d7027748e96265728ce748b41ef Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 10 Aug 2020 10:57:05 +0800 Subject: net: qcom/emac: add missed clk_disable_unprepare in error path of emac_clks_phase1_init Fix the missing clk_disable_unprepare() before return from emac_clks_phase1_init() in the error handling case. Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 20b1b43a0e39..1166b98d8bb2 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -474,13 +474,24 @@ static int emac_clks_phase1_init(struct platform_device *pdev, ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); if (ret) - return ret; + goto disable_clk_axi; ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); if (ret) - return ret; + goto disable_clk_cfg_ahb; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + if (ret) + goto disable_clk_cfg_ahb; - return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + return 0; + +disable_clk_cfg_ahb: + clk_disable_unprepare(adpt->clk[EMAC_CLK_CFG_AHB]); +disable_clk_axi: + clk_disable_unprepare(adpt->clk[EMAC_CLK_AXI]); + + return ret; } /* Enable clocks; needs emac_clks_phase1_init to be called before */ -- cgit From c1e2b8422bf946c80e832cee22b3399634f87a2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Aug 2020 11:59:50 +0800 Subject: block: fix double account of flush request's driver tag In case of none scheduler, we share data request's driver tag for flush request, so have to mark the flush request as INFLIGHT for avoiding double account of this driver tag. Fixes: 568f27006577 ("blk-mq: centralise related handling into blk_mq_get_driver_tag") Reported-by: Matthew Wilcox Signed-off-by: Ming Lei Tested-by: Matthew Wilcox Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 6e1543c10493..53abb5c73d99 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -308,9 +308,16 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; - if (!q->elevator) + if (!q->elevator) { flush_rq->tag = first_rq->tag; - else + + /* + * We borrow data request's driver tag, so have to mark + * this flush request as INFLIGHT for avoiding double + * account of this driver tag + */ + flush_rq->rq_flags |= RQF_MQ_INFLIGHT; + } else flush_rq->internal_tag = first_rq->internal_tag; flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; -- cgit From 0f5907af39137f8183ed536aaa00f322d7365130 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 10 Aug 2020 08:16:58 -0400 Subject: net: Fix potential memory leak in proto_register() If we failed to assign proto idx, we free the twsk_slab_name but forget to free the twsk_slab. Add a helper function tw_prot_cleanup() to free these together and also use this helper function in proto_unregister(). Fixes: b45ce32135d1 ("sock: fix potential memory leak in proto_register()") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/sock.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 49cd5ffe673e..c9083ad44ea1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3406,6 +3406,16 @@ static void sock_inuse_add(struct net *net, int val) } #endif +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) +{ + if (!twsk_prot) + return; + kfree(twsk_prot->twsk_slab_name); + twsk_prot->twsk_slab_name = NULL; + kmem_cache_destroy(twsk_prot->twsk_slab); + twsk_prot->twsk_slab = NULL; +} + static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) @@ -3476,7 +3486,7 @@ int proto_register(struct proto *prot, int alloc_slab) prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } } @@ -3484,15 +3494,15 @@ int proto_register(struct proto *prot, int alloc_slab) ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; -out_free_timewait_sock_slab_name: +out_free_timewait_sock_slab: if (alloc_slab && prot->twsk_prot) - kfree(prot->twsk_prot->twsk_slab_name); + tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); @@ -3516,12 +3526,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); - - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(prot->twsk_prot->twsk_slab_name); - prot->twsk_prot->twsk_slab = NULL; - } + tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); -- cgit From 1b8ef1423dbfd34de2439a2db457b84480b7c8a8 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 10 Aug 2020 17:01:58 +0200 Subject: net: phy: marvell10g: fix null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") added a check for PHY ID via phydev->drv->phy_id in a function which is called by devres at a time when phydev->drv is already set to null by phy_remove function. This null pointer dereference can be triggered via SFP subsystem with a SFP module containing this Marvell PHY. When the SFP interface is put down, the SFP subsystem removes the PHY. Fixes: c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") Signed-off-by: Marek Behún Cc: Maxime Chevallier Cc: Andrew Lunn Cc: Baruch Siach Cc: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index a7610eb55f30..1901ba277413 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -208,13 +208,6 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) MV_V2_TEMP_CTRL_MASK, val); } -static void mv3310_hwmon_disable(void *data) -{ - struct phy_device *phydev = data; - - mv3310_hwmon_config(phydev, false); -} - static int mv3310_hwmon_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -238,10 +231,6 @@ static int mv3310_hwmon_probe(struct phy_device *phydev) if (ret) return ret; - ret = devm_add_action_or_reset(dev, mv3310_hwmon_disable, phydev); - if (ret) - return ret; - priv->hwmon_dev = devm_hwmon_device_register_with_info(dev, priv->hwmon_name, phydev, &mv3310_hwmon_chip_info, NULL); @@ -426,6 +415,11 @@ static int mv3310_probe(struct phy_device *phydev) return phy_sfp_probe(phydev, &mv3310_sfp_ops); } +static void mv3310_remove(struct phy_device *phydev) +{ + mv3310_hwmon_config(phydev, false); +} + static int mv3310_suspend(struct phy_device *phydev) { return mv3310_power_down(phydev); @@ -784,6 +778,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, { .phy_id = MARVELL_PHY_ID_88E2110, @@ -798,6 +793,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, }; -- cgit From 62ffc589abb176821662efc4525ee4ac0b9c3894 Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:23 +0200 Subject: net: refactor bind_bucket fastreuse into helper Refactor the fastreuse update code in inet_csk_get_port into a small helper function that can be called from other places. Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++ net/ipv4/inet_connection_sock.c | 97 +++++++++++++++++++++----------------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1e209ce7d1bd..aa8893c68c50 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); +/* update the fast reuse flag when adding a socket */ +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk); + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d1a3913eebe0..b457dd2d6c75 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -296,6 +296,57 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, ipv6_only_sock(sk), true, false); } +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk) +{ + kuid_t uid = sock_i_uid(sk); + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; + + if (hlist_empty(&tb->owners)) { + tb->fastreuse = reuse; + if (sk->sk_reuseport) { + tb->fastreuseport = FASTREUSEPORT_ANY; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } else { + tb->fastreuseport = 0; + } + } else { + if (!reuse) + tb->fastreuse = 0; + if (sk->sk_reuseport) { + /* We didn't match or we don't have fastreuseport set on + * the tb, but we have sk_reuseport set on this socket + * and we know that there are no bind conflicts with + * this socket in this tb, so reset our tb's reuseport + * settings so that any subsequent sockets that match + * our current socket will be put on the fast path. + * + * If we reset we need to set FASTREUSEPORT_STRICT so we + * do extra checking for all subsequent sk_reuseport + * socks. + */ + if (!sk_reuseport_match(tb, sk)) { + tb->fastreuseport = FASTREUSEPORT_STRICT; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } + } else { + tb->fastreuseport = 0; + } + } +} + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. * We try to allocate an odd port (and leave even ports for connect()) @@ -308,7 +359,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); struct inet_bind_bucket *tb = NULL; - kuid_t uid = sock_i_uid(sk); int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); @@ -345,49 +395,8 @@ tb_found: goto fail_unlock; } success: - if (hlist_empty(&tb->owners)) { - tb->fastreuse = reuse; - if (sk->sk_reuseport) { - tb->fastreuseport = FASTREUSEPORT_ANY; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } else { - tb->fastreuseport = 0; - } - } else { - if (!reuse) - tb->fastreuse = 0; - if (sk->sk_reuseport) { - /* We didn't match or we don't have fastreuseport set on - * the tb, but we have sk_reuseport set on this socket - * and we know that there are no bind conflicts with - * this socket in this tb, so reset our tb's reuseport - * settings so that any subsequent sockets that match - * our current socket will be put on the fast path. - * - * If we reset we need to set FASTREUSEPORT_STRICT so we - * do extra checking for all subsequent sk_reuseport - * socks. - */ - if (!sk_reuseport_match(tb, sk)) { - tb->fastreuseport = FASTREUSEPORT_STRICT; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } - } else { - tb->fastreuseport = 0; - } - } + inet_csk_update_fastreuse(tb, sk); + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); -- cgit From d76f3351cea2d927fdf70dd7c06898235035e84e Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:24 +0200 Subject: net: initialize fastreuse on inet_inherit_port In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind behaviour or in the incorrect reuse of a bind. the kernel keeps track for each bind_bucket if all sockets in the bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. These flags allow skipping the costly bind_conflict check when possible (meaning when all sockets have the proper SO_REUSE option). For every socket added to a bind_bucket, these flags need to be updated. As soon as a socket that does not support reuse is added, the flag is set to false and will never go back to true, unless the bind_bucket is deleted. Note that there is no mechanism to re-evaluate these flags when a socket is removed (this might make sense when removing a socket that would not allow reuse; this leaves room for a future patch). For this optimization to work, it is mandatory that these flags are properly initialized and updated. When a child socket is created from a listen socket in __inet_inherit_port, the TPROXY case could create a new bind bucket without properly initializing these flags, thus preventing the optimization to work. Alternatively, a socket not allowing reuse could be added to an existing bind bucket without updating the flags, causing bind_conflict to never be called as it should. Call inet_csk_update_fastreuse when __inet_inherit_port decides to create a new bind_bucket or use a different bind_bucket than the one of the listen socket. Fixes: 093d282321da ("tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()") Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4eb4cd8d20dd..239e54474b65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) return -ENOMEM; } } + inet_csk_update_fastreuse(tb, child); } inet_bind_hash(child, tb, port); spin_unlock(&head->lock); -- cgit From 2c7f8937ef91520a8a4bd700d5817b5e9c99803c Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Tue, 16 Jun 2020 11:18:07 +0900 Subject: exfat: remove EXFAT_SB_DIRTY flag This flag is set/reset in exfat_put_super()/exfat_sync_fs() to avoid sync_blockdev(). - exfat_put_super(): Before calling this, the VFS has already called sync_filesystem(), so sync is never performed here. - exfat_sync_fs(): After calling this, the VFS calls sync_blockdev(), so, it is meaningless to check EXFAT_SB_DIRTY or to bypass sync_blockdev() here. Remove the EXFAT_SB_DIRTY check to ensure synchronization. And remove the code related to the flag. Signed-off-by: Tetsuhiro Kohada Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/balloc.c | 4 ++-- fs/exfat/dir.c | 16 ++++++++-------- fs/exfat/exfat_fs.h | 5 +---- fs/exfat/fatent.c | 7 ++----- fs/exfat/misc.c | 3 +-- fs/exfat/namei.c | 12 ++++++------ fs/exfat/super.c | 14 ++++++-------- 7 files changed, 26 insertions(+), 35 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 4055eb00ea9b..a987919686c0 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -158,7 +158,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); set_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); return 0; } @@ -180,7 +180,7 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); clear_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); if (opts->discard) { int ret_discard; diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 119abf0d8dd6..97296bc2b241 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -470,7 +470,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, &ep->dentry.file.access_date, NULL); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -480,7 +480,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, exfat_init_stream_entry(ep, (type == TYPE_FILE) ? ALLOC_FAT_CHAIN : ALLOC_NO_FAT_CHAIN, start_clu, size); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); return 0; @@ -516,7 +516,7 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, } fep->dentry.file.checksum = cpu_to_le16(chksum); - exfat_update_bh(sb, fbh, IS_DIRSYNC(inode)); + exfat_update_bh(fbh, IS_DIRSYNC(inode)); release_fbh: brelse(fbh); return ret; @@ -538,7 +538,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; ep->dentry.file.num_ext = (unsigned char)(num_entries - 1); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -547,7 +547,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, ep->dentry.stream.name_len = p_uniname->name_len; ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) { @@ -556,7 +556,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_init_name_entry(ep, uniname); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); uniname += EXFAT_FILE_NAME_LEN; } @@ -580,7 +580,7 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_set_entry_type(ep, TYPE_DELETED); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); } @@ -610,7 +610,7 @@ void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) for (i = 0; i < es->num_bh; i++) { if (es->modified) - exfat_update_bh(es->sb, es->bh[i], sync); + exfat_update_bh(es->bh[i], sync); brelse(es->bh[i]); } kfree(es); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 75c7bdbeba6d..05553b2924ee 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -13,8 +13,6 @@ #define EXFAT_SUPER_MAGIC 0x2011BAB0UL #define EXFAT_ROOT_INO 1 -#define EXFAT_SB_DIRTY 0 - #define EXFAT_CLUSTERS_UNTRACKED (~0u) /* @@ -238,7 +236,6 @@ struct exfat_sb_info { unsigned int clu_srch_ptr; /* cluster search pointer */ unsigned int used_clusters; /* number of used clusters */ - unsigned long s_state; struct mutex s_lock; /* superblock lock */ struct exfat_mount_options options; struct nls_table *nls_io; /* Charset used for input and display */ @@ -515,7 +512,7 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync); +void exfat_update_bh(struct buffer_head *bh, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 4e5c5c9c0f2d..82ee8246c080 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -75,7 +75,7 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc, fat_entry = (__le32 *)&(bh->b_data[off]); *fat_entry = cpu_to_le32(content); - exfat_update_bh(sb, bh, sb->s_flags & SB_SYNCHRONOUS); + exfat_update_bh(bh, sb->s_flags & SB_SYNCHRONOUS); exfat_mirror_bh(sb, sec, bh); brelse(bh); return 0; @@ -174,7 +174,6 @@ int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) return -EIO; } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); clu = p_chain->dir; if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { @@ -274,7 +273,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) goto release_bhs; } memset(bhs[n]->b_data, 0, sb->s_blocksize); - exfat_update_bh(sb, bhs[n], 0); + exfat_update_bh(bhs[n], 0); n++; blknr++; @@ -358,8 +357,6 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, } } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); - p_chain->dir = EXFAT_EOF_CLUSTER; while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) != diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 17d41f3d3709..8a3dde59052b 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -163,9 +163,8 @@ u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type) return chksum; } -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync) +void exfat_update_bh(struct buffer_head *bh, int sync) { - set_bit(EXFAT_SB_DIRTY, &EXFAT_SB(sb)->s_state); set_buffer_uptodate(bh); mark_buffer_dirty(bh); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2b9e21094a96..126ed3ba8f47 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -387,7 +387,7 @@ static int exfat_find_empty_entry(struct inode *inode, ep->dentry.stream.valid_size = cpu_to_le64(size); ep->dentry.stream.size = ep->dentry.stream.valid_size; ep->dentry.stream.flags = p_dir->flags; - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); if (exfat_update_dir_chksum(inode, &(ei->dir), ei->entry)) @@ -1071,7 +1071,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1087,7 +1087,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, } memcpy(epnew, epold, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1104,7 +1104,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epold->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, old_bh, sync); + exfat_update_bh(old_bh, sync); brelse(old_bh); ret = exfat_init_ext_entry(inode, p_dir, oldentry, num_new_entries, p_uniname); @@ -1159,7 +1159,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); @@ -1175,7 +1175,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, } memcpy(epnew, epmov, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 253a92460d52..b5bf6dedbe11 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -45,9 +45,6 @@ static void exfat_put_super(struct super_block *sb) struct exfat_sb_info *sbi = EXFAT_SB(sb); mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) - sync_blockdev(sb->s_bdev); - exfat_set_vol_flags(sb, VOL_CLEAN); exfat_free_bitmap(sbi); brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); @@ -60,13 +57,14 @@ static int exfat_sync_fs(struct super_block *sb, int wait) struct exfat_sb_info *sbi = EXFAT_SB(sb); int err = 0; + if (!wait) + return 0; + /* If there are some dirty buffers in the bdev inode */ mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) { - sync_blockdev(sb->s_bdev); - if (exfat_set_vol_flags(sb, VOL_CLEAN)) - err = -EIO; - } + sync_blockdev(sb->s_bdev); + if (exfat_set_vol_flags(sb, VOL_CLEAN)) + err = -EIO; mutex_unlock(&sbi->s_lock); return err; } -- cgit From 3db3c3fb840ed4a6c7666d1464959edd40fe54f1 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Tue, 23 Jun 2020 15:22:19 +0900 Subject: exfat: write multiple sectors at once Write multiple sectors at once when updating dir-entries. Add exfat_update_bhs() for that. It wait for write completion once instead of sector by sector. It's only effective if sync enabled. Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 15 +++++++++------ fs/exfat/exfat_fs.h | 1 + fs/exfat/misc.c | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 97296bc2b241..542f1a5ab56f 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -606,13 +606,16 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) { - int i; + int i, err = 0; - for (i = 0; i < es->num_bh; i++) { - if (es->modified) - exfat_update_bh(es->bh[i], sync); - brelse(es->bh[i]); - } + if (es->modified) + err = exfat_update_bhs(es->bh, es->num_bh, sync); + + for (i = 0; i < es->num_bh; i++) + if (err) + bforget(es->bh[i]); + else + brelse(es->bh[i]); kfree(es); } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 05553b2924ee..af0f526eece7 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -513,6 +513,7 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); void exfat_update_bh(struct buffer_head *bh, int sync); +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 8a3dde59052b..d34e6193258d 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -172,6 +172,25 @@ void exfat_update_bh(struct buffer_head *bh, int sync) sync_dirty_buffer(bh); } +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync) +{ + int i, err = 0; + + for (i = 0; i < nr_bhs; i++) { + set_buffer_uptodate(bhs[i]); + mark_buffer_dirty(bhs[i]); + if (sync) + write_dirty_buffer(bhs[i], 0); + } + + for (i = 0; i < nr_bhs && sync; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags) { -- cgit From 8b0c471773819c8201dc0b0123e1580639ee1570 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Wed, 24 Jun 2020 09:54:54 +0900 Subject: exfat: add error check when updating dir-entries Add error check when synchronously updating dir-entries. Suggested-by: Sungjong Seo Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 3 ++- fs/exfat/exfat_fs.h | 2 +- fs/exfat/file.c | 5 ++++- fs/exfat/inode.c | 9 +++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 542f1a5ab56f..573659bfbc55 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -604,7 +604,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) es->modified = true; } -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) { int i, err = 0; @@ -617,6 +617,7 @@ void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) else brelse(es->bh[i]); kfree(es); + return err; } static int exfat_walk_fat_chain(struct super_block *sb, diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index af0f526eece7..cb51d6e83199 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -460,7 +460,7 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned int type); -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a6a063830edc..6bdabfd4b134 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -154,6 +154,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) struct timespec64 ts; struct exfat_dentry *ep, *ep2; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -188,7 +189,9 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) } exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* cut off from the FAT chain */ diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index cf9ca6c4d046..f0160a7892a8 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -77,8 +77,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) ep2->dentry.stream.size = ep2->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, sync); - return 0; + return exfat_free_dentry_set(es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -222,6 +221,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, if (ei->dir.dir != DIR_DELETED && modified) { struct exfat_dentry *ep; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -240,8 +240,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, ep->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); - + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* end of if != DIR_DELETED */ inode->i_blocks += -- cgit From 4dc7d35e09ba78aa0a3bcaa9fad1c19952e018a7 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Wed, 24 Jun 2020 11:30:40 +0900 Subject: exfat: optimize exfat_zeroed_cluster() Replace part of exfat_zeroed_cluster() with exfat_update_bhs(). And remove exfat_sync_bhs(). Signed-off-by: Tetsuhiro Kohada Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/fatent.c | 53 ++++++++++------------------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 82ee8246c080..c3c9afee7418 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -229,21 +229,6 @@ int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, return 0; } -static inline int exfat_sync_bhs(struct buffer_head **bhs, int nr_bhs) -{ - int i, err = 0; - - for (i = 0; i < nr_bhs; i++) - write_dirty_buffer(bhs[i], 0); - - for (i = 0; i < nr_bhs; i++) { - wait_on_buffer(bhs[i]); - if (!err && !buffer_uptodate(bhs[i])) - err = -EIO; - } - return err; -} - int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) { struct super_block *sb = dir->i_sb; @@ -265,41 +250,23 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) } /* Zeroing the unused blocks on this cluster */ - n = 0; while (blknr < last_blknr) { - bhs[n] = sb_getblk(sb, blknr); - if (!bhs[n]) { - err = -ENOMEM; - goto release_bhs; - } - memset(bhs[n]->b_data, 0, sb->s_blocksize); - exfat_update_bh(bhs[n], 0); - - n++; - blknr++; - - if (n == nr_bhs) { - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); - if (err) - goto release_bhs; + for (n = 0; n < nr_bhs && blknr < last_blknr; n++, blknr++) { + bhs[n] = sb_getblk(sb, blknr); + if (!bhs[n]) { + err = -ENOMEM; + goto release_bhs; } - - for (i = 0; i < n; i++) - brelse(bhs[i]); - n = 0; + memset(bhs[n]->b_data, 0, sb->s_blocksize); } - } - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); + err = exfat_update_bhs(bhs, n, IS_DIRSYNC(dir)); if (err) goto release_bhs; - } - - for (i = 0; i < n; i++) - brelse(bhs[i]); + for (i = 0; i < n; i++) + brelse(bhs[i]); + } return 0; release_bhs: -- cgit From 7018ec68f08249de17cb131b324d5a48e89ed898 Mon Sep 17 00:00:00 2001 From: Tetsuhiro Kohada Date: Fri, 31 Jul 2020 14:58:26 +0900 Subject: exfat: retain 'VolumeFlags' properly MediaFailure and VolumeDirty should be retained if these are set before mounting. In '3.1.13.3 Media Failure Field' of exfat specification describe: If, upon mounting a volume, the value of this field is 1, implementations which scan the entire volume for media failures and record all failures as "bad" clusters in the FAT (or otherwise resolve media failures) may clear the value of this field to 0. Therefore, We should not clear MediaFailure without scanning volume. In '8.1 Recommended Write Ordering' of exfat specification describe: Clear the value of the VolumeDirty field to 0, if its value prior to the first step was 0. Therefore, We should not clear VolumeDirty after mounting. Also rename ERR_MEDIUM to MEDIA_FAILURE. Signed-off-by: Tetsuhiro Kohada Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 6 ++++-- fs/exfat/exfat_raw.h | 5 ++--- fs/exfat/file.c | 4 ++-- fs/exfat/inode.c | 4 ++-- fs/exfat/namei.c | 20 ++++++++++---------- fs/exfat/super.c | 36 +++++++++++++++++++++++++++--------- 6 files changed, 47 insertions(+), 28 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index cb51d6e83199..95d717f8620c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -224,7 +224,8 @@ struct exfat_sb_info { unsigned int num_FAT_sectors; /* num of FAT sectors */ unsigned int root_dir; /* root dir cluster */ unsigned int dentries_per_clu; /* num of dentries per cluster */ - unsigned int vol_flag; /* volume dirty flag */ + unsigned int vol_flags; /* volume flags */ + unsigned int vol_flags_persistent; /* volume flags to retain */ struct buffer_head *boot_bh; /* buffer_head of BOOT sector */ unsigned int map_clu; /* allocation bitmap start cluster */ @@ -380,7 +381,8 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, } /* super.c */ -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag); +int exfat_set_volume_dirty(struct super_block *sb); +int exfat_clear_volume_dirty(struct super_block *sb); /* fatent.c */ #define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h index 350ce59cc324..6aec6288e1f2 100644 --- a/fs/exfat/exfat_raw.h +++ b/fs/exfat/exfat_raw.h @@ -14,9 +14,8 @@ #define EXFAT_MAX_FILE_LEN 255 -#define VOL_CLEAN 0x0000 -#define VOL_DIRTY 0x0002 -#define ERR_MEDIUM 0x0004 +#define VOLUME_DIRTY 0x0002 +#define MEDIA_FAILURE 0x0004 #define EXFAT_EOF_CLUSTER 0xFFFFFFFFu #define EXFAT_BAD_CLUSTER 0xFFFFFFF7u diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 6bdabfd4b134..f41f523a58ad 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -106,7 +106,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) return -EPERM; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); num_clusters_phys = @@ -220,7 +220,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (exfat_free_cluster(inode, &clu)) return -EIO; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); return 0; } diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index f0160a7892a8..7f90204adef5 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -39,7 +39,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) return 0; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -167,7 +167,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, } if (*clu == EXFAT_EOF_CLUSTER) { - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? EXFAT_EOF_CLUSTER : last_clu + 1; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 126ed3ba8f47..e73f20f66cb2 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -562,10 +562,10 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -834,7 +834,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* update the directory entry */ if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) { err = -EIO; @@ -843,7 +843,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) /* This doesn't modify ei */ ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -873,10 +873,10 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -1001,14 +1001,14 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries); if (err) { exfat_err(sb, "failed to exfat_remove_entries : err(%d)", err); goto unlock; } ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -1300,7 +1300,7 @@ static int __exfat_rename(struct inode *old_parent_inode, if (ret) goto out; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); if (olddir.dir == newdir.dir) ret = exfat_rename_file(new_parent_inode, &olddir, dentry, @@ -1355,7 +1355,7 @@ del_out: */ new_ei->dir.dir = DIR_DELETED; } - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); out: return ret; } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index b5bf6dedbe11..3b6a1659892f 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -63,7 +63,7 @@ static int exfat_sync_fs(struct super_block *sb, int wait) /* If there are some dirty buffers in the bdev inode */ mutex_lock(&sbi->s_lock); sync_blockdev(sb->s_bdev); - if (exfat_set_vol_flags(sb, VOL_CLEAN)) + if (exfat_clear_volume_dirty(sb)) err = -EIO; mutex_unlock(&sbi->s_lock); return err; @@ -96,17 +96,20 @@ static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) +static int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flags) { struct exfat_sb_info *sbi = EXFAT_SB(sb); struct boot_sector *p_boot = (struct boot_sector *)sbi->boot_bh->b_data; bool sync; + /* retain persistent-flags */ + new_flags |= sbi->vol_flags_persistent; + /* flags are not changed */ - if (sbi->vol_flag == new_flag) + if (sbi->vol_flags == new_flags) return 0; - sbi->vol_flag = new_flag; + sbi->vol_flags = new_flags; /* skip updating volume dirty flag, * if this volume has been mounted with read-only @@ -114,9 +117,9 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) if (sb_rdonly(sb)) return 0; - p_boot->vol_flags = cpu_to_le16(new_flag); + p_boot->vol_flags = cpu_to_le16(new_flags); - if (new_flag == VOL_DIRTY && !buffer_dirty(sbi->boot_bh)) + if ((new_flags & VOLUME_DIRTY) && !buffer_dirty(sbi->boot_bh)) sync = true; else sync = false; @@ -129,6 +132,20 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) return 0; } +int exfat_set_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY); +} + +int exfat_clear_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY); +} + static int exfat_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; @@ -457,7 +474,8 @@ static int exfat_read_boot_sector(struct super_block *sb) sbi->dentries_per_clu = 1 << (sbi->cluster_size_bits - DENTRY_SIZE_BITS); - sbi->vol_flag = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags_persistent = sbi->vol_flags & (VOLUME_DIRTY | MEDIA_FAILURE); sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; @@ -472,9 +490,9 @@ static int exfat_read_boot_sector(struct super_block *sb) exfat_err(sb, "bogus data start sector"); return -EINVAL; } - if (sbi->vol_flag & VOL_DIRTY) + if (sbi->vol_flags & VOLUME_DIRTY) exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); - if (sbi->vol_flag & ERR_MEDIUM) + if (sbi->vol_flags & MEDIA_FAILURE) exfat_warn(sb, "Medium has reported failures. Some data may be lost."); /* exFAT file size is limited by a disk volume size */ -- cgit From 5b24993c21cbf2de11aff077a48c5cb0505a0450 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 11 Aug 2020 18:19:19 +0200 Subject: parisc: mask out enable and reserved bits from sba imask When using kexec the SBA IOMMU IBASE might still have the RE bit set. This triggers a WARN_ON when trying to write back the IBASE register later, and it also makes some mask calculations fail. Cc: Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- drivers/parisc/sba_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 7e112829d250..00785fa81ff7 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1270,7 +1270,7 @@ sba_ioc_init_pluto(struct parisc_device *sba, struct ioc *ioc, int ioc_num) ** (one that doesn't overlap memory or LMMIO space) in the ** IBASE and IMASK registers. */ - ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE); + ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1fffffULL; iova_space_size = ~(READ_REG(ioc->ioc_hpa + IOC_IMASK) & 0xFFFFFFFFUL) + 1; if ((ioc->ibase < 0xfed00000UL) && ((ioc->ibase + iova_space_size) > 0xfee00000UL)) { -- cgit From 62975d27d647a40c58d3b96c29b911fc4f33c310 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 12 Aug 2020 13:03:49 +1000 Subject: drm/ttm: revert "drm/ttm: make TT creation purely optional v3" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2ddef17678bc2ea1d20517dd2b4ed4aa967ffa8b. As it turned out VMWGFX needs a much wider audit to fix this. Signed-off-by: Christian König Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20200811092400.188124-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 37 ++++++++++++++++++++++++++++--------- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 ++----- drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 ----- drivers/gpu/drm/ttm/ttm_tt.c | 4 +--- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f297fd5e02d4..cc6a4e7551e3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -287,11 +287,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, */ if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) { - bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED); - - ret = ttm_tt_create(bo, zero); - if (ret) - goto out_err; + if (bo->ttm == NULL) { + bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED); + ret = ttm_tt_create(bo, zero); + if (ret) + goto out_err; + } ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement); if (ret) @@ -652,8 +653,13 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, placement.num_busy_placement = 0; bdev->driver->evict_flags(bo, &placement); - if (!placement.num_placement && !placement.num_busy_placement) - return ttm_bo_pipeline_gutting(bo); + if (!placement.num_placement && !placement.num_busy_placement) { + ret = ttm_bo_pipeline_gutting(bo); + if (ret) + return ret; + + return ttm_tt_create(bo, false); + } evict_mem = bo->mem; evict_mem.mm_node = NULL; @@ -1192,8 +1198,13 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, /* * Remove the backing store if no placement is given. */ - if (!placement->num_placement && !placement->num_busy_placement) - return ttm_bo_pipeline_gutting(bo); + if (!placement->num_placement && !placement->num_busy_placement) { + ret = ttm_bo_pipeline_gutting(bo); + if (ret) + return ret; + + return ttm_tt_create(bo, false); + } /* * Check whether we need to move buffer. @@ -1210,6 +1221,14 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, ttm_flag_masked(&bo->mem.placement, new_flags, ~TTM_PL_MASK_MEMTYPE); } + /* + * We might need to add a TTM. + */ + if (bo->mem.mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { + ret = ttm_tt_create(bo, true); + if (ret) + return ret; + } return 0; } EXPORT_SYMBOL(ttm_bo_validate); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7fb3e0bcbab4..e6c8bd254055 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -531,15 +531,12 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, .interruptible = false, .no_wait_gpu = false }; - struct ttm_tt *ttm; + struct ttm_tt *ttm = bo->ttm; pgprot_t prot; int ret; - ret = ttm_tt_create(bo, true); - if (ret) - return ret; + BUG_ON(!ttm); - ttm = bo->ttm; ret = ttm_tt_populate(ttm, &ctx); if (ret) return ret; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index dd60b96b7522..33526c5df0e8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -351,11 +351,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, }; - if (ttm_tt_create(bo, true)) { - ret = VM_FAULT_OOM; - goto out_io_unlock; - } - ttm = bo->ttm; if (ttm_tt_populate(bo->ttm, &ctx)) { ret = VM_FAULT_OOM; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 9d1c7177384c..3437711ddb43 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -50,9 +50,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) dma_resv_assert_held(bo->base.resv); - if (bo->ttm) - return 0; - if (bdev->need_dma32) page_flags |= TTM_PAGE_FLAG_DMA32; @@ -70,6 +67,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) page_flags |= TTM_PAGE_FLAG_SG; break; default: + bo->ttm = NULL; pr_err("Illegal buffer object type\n"); return -EINVAL; } -- cgit From e5b1d9776ad3817a8c90336038bf7a219425b57f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 12 Aug 2020 09:02:56 +0200 Subject: ALSA: hda/realtek - Fix unused variable warning The previous fix forgot to remove the unused variable that triggers a compile warning now: sound/pci/hda/patch_realtek.c: In function 'alc285_fixup_hp_gpio_led': sound/pci/hda/patch_realtek.c:4163:19: warning: unused variable 'spec' [-Wunused-variable] Fix it. Fixes: 404690649e6a ("ALSA: hda - reverse the setting value in the micmute_led_set") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20200812070256.32145-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 073029aeaf3c..7f9d35273734 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4160,8 +4160,6 @@ static void alc269_fixup_hp_gpio_led(struct hda_codec *codec, static void alc285_fixup_hp_gpio_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { - struct alc_spec *spec = codec->spec; - alc_fixup_hp_gpio_led(codec, action, 0x04, 0x01); } -- cgit From c57dd1f2f6a7cd1bb61802344f59ccdc5278c983 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 31 Jul 2020 19:29:11 +0800 Subject: btrfs: trim: fix underflow in trim length to prevent access beyond device boundary [BUG] The following script can lead to tons of beyond device boundary access: mkfs.btrfs -f $dev -b 10G mount $dev $mnt trimfs $mnt btrfs filesystem resize 1:-1G $mnt trimfs $mnt [CAUSE] Since commit 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit"), we try to avoid trimming ranges that's already trimmed. So we check device->alloc_state by finding the first range which doesn't have CHUNK_TRIMMED and CHUNK_ALLOCATED not set. But if we shrunk the device, that bits are not cleared, thus we could easily got a range starts beyond the shrunk device size. This results the returned @start and @end are all beyond device size, then we call "end = min(end, device->total_bytes -1);" making @end smaller than device size. Then finally we goes "len = end - start + 1", totally underflow the result, and lead to the beyond-device-boundary access. [FIX] This patch will fix the problem in two ways: - Clear CHUNK_TRIMMED | CHUNK_ALLOCATED bits when shrinking device This is the root fix - Add extra safety check when trimming free device extents We check and warn if the returned range is already beyond current device. Link: https://github.com/kdave/btrfs-progs/issues/282 Fixes: 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-io-tree.h | 2 ++ fs/btrfs/extent-tree.c | 14 ++++++++++++++ fs/btrfs/volumes.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index f39d47a2d01a..219a09a2b734 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -34,6 +34,8 @@ struct io_failure_record; */ #define CHUNK_ALLOCATED EXTENT_DIRTY #define CHUNK_TRIMMED EXTENT_DEFRAG +#define CHUNK_STATE_MASK (CHUNK_ALLOCATED | \ + CHUNK_TRIMMED) enum { IO_TREE_FS_PINNED_EXTENTS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61ede335f6c3..de6fe176fdfb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "discard.h" +#include "rcu-string.h" #undef SCRAMBLE_DELAYED_REFS @@ -5668,6 +5669,19 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) &start, &end, CHUNK_TRIMMED | CHUNK_ALLOCATED); + /* Check if there are any CHUNK_* bits left */ + if (start > device->total_bytes) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + btrfs_warn_in_rcu(fs_info, +"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", + start, end - start + 1, + rcu_str_deref(device->name), + device->total_bytes); + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + /* Ensure we skip the reserved area in the first 1M */ start = max_t(u64, start, SZ_1M); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d7670e2a9f39..ee96c5869f57 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4720,6 +4720,10 @@ again: } mutex_lock(&fs_info->chunk_mutex); + /* Clear all state bits beyond the shrunk device size */ + clear_extent_bits(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK); + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) list_add_tail(&device->post_commit_list, -- cgit From 985b30dbe2cf58c27dd81da85410439ced8ce6d7 Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Mon, 29 Jun 2020 22:42:10 -0400 Subject: rtc: pcf2127: add pca2129 device id The PCA2129 is the automotive grade version of the PCF2129. add it to the list of compatibles. Signed-off-by: Liam Beguin Signed-off-by: Alexandre Belloni Reviewed-by: Bruno Thomsen Link: https://lore.kernel.org/r/20200630024211.12782-2-liambeguin@gmail.com --- Documentation/devicetree/bindings/rtc/trivial-rtc.yaml | 2 ++ drivers/rtc/rtc-pcf2127.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 18cb456752f6..c7d14de214c4 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -52,6 +52,8 @@ properties: - nxp,pcf2127 # Real-time clock - nxp,pcf2129 + # Real-time clock + - nxp,pca2129 # Real-time Clock Module - pericom,pt7c4338 # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9c5670776c68..4accee09bfad 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -553,6 +553,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, static const struct of_device_id pcf2127_of_match[] = { { .compatible = "nxp,pcf2127" }, { .compatible = "nxp,pcf2129" }, + { .compatible = "nxp,pca2129" }, {} }; MODULE_DEVICE_TABLE(of, pcf2127_of_match); @@ -664,6 +665,7 @@ static int pcf2127_i2c_probe(struct i2c_client *client, static const struct i2c_device_id pcf2127_i2c_id[] = { { "pcf2127", 1 }, { "pcf2129", 0 }, + { "pca2129", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); @@ -729,6 +731,7 @@ static int pcf2127_spi_probe(struct spi_device *spi) static const struct spi_device_id pcf2127_spi_id[] = { { "pcf2127", 1 }, { "pcf2129", 0 }, + { "pca2129", 0 }, { } }; MODULE_DEVICE_TABLE(spi, pcf2127_spi_id); -- cgit From 8a914bac44be33623cfcf27688b18b6f81a5c7d5 Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Mon, 29 Jun 2020 22:42:11 -0400 Subject: rtc: pcf2127: add alarm support Add alarm support for the pcf2127 RTC chip family. Tested on pca2129. Signed-off-by: Liam Beguin Signed-off-by: Alexandre Belloni Reviewed-by: Bruno Thomsen Link: https://lore.kernel.org/r/20200630024211.12782-3-liambeguin@gmail.com --- drivers/rtc/rtc-pcf2127.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 4accee09bfad..4e99c45a87d7 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,9 @@ #define PCF2127_BIT_CTRL1_TSF1 BIT(4) /* Control register 2 */ #define PCF2127_REG_CTRL2 0x01 +#define PCF2127_BIT_CTRL2_AIE BIT(1) #define PCF2127_BIT_CTRL2_TSIE BIT(2) +#define PCF2127_BIT_CTRL2_AF BIT(4) #define PCF2127_BIT_CTRL2_TSF2 BIT(5) /* Control register 3 */ #define PCF2127_REG_CTRL3 0x02 @@ -46,6 +49,12 @@ #define PCF2127_REG_DW 0x07 #define PCF2127_REG_MO 0x08 #define PCF2127_REG_YR 0x09 +/* Alarm registers */ +#define PCF2127_REG_ALARM_SC 0x0A +#define PCF2127_REG_ALARM_MN 0x0B +#define PCF2127_REG_ALARM_HR 0x0C +#define PCF2127_REG_ALARM_DM 0x0D +#define PCF2127_REG_ALARM_DW 0x0E /* Watchdog registers */ #define PCF2127_REG_WD_CTL 0x10 #define PCF2127_BIT_WD_CTL_TF0 BIT(0) @@ -324,6 +333,114 @@ static const struct watchdog_ops pcf2127_watchdog_ops = { .set_timeout = pcf2127_wdt_set_timeout, }; +/* Alarm */ +static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + unsigned int buf[5], ctrl2; + int ret; + + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); + if (ret) + return ret; + + ret = pcf2127_wdt_active_ping(&pcf2127->wdd); + if (ret) + return ret; + + ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf, + sizeof(buf)); + if (ret) + return ret; + + alrm->enabled = ctrl2 & PCF2127_BIT_CTRL2_AIE; + alrm->pending = ctrl2 & PCF2127_BIT_CTRL2_AF; + + alrm->time.tm_sec = bcd2bin(buf[0] & 0x7F); + alrm->time.tm_min = bcd2bin(buf[1] & 0x7F); + alrm->time.tm_hour = bcd2bin(buf[2] & 0x3F); + alrm->time.tm_mday = bcd2bin(buf[3] & 0x3F); + alrm->time.tm_wday = buf[4] & 0x07; + + return 0; +} + +static int pcf2127_rtc_alarm_irq_enable(struct device *dev, u32 enable) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2, + PCF2127_BIT_CTRL2_AIE, + enable ? PCF2127_BIT_CTRL2_AIE : 0); + if (ret) + return ret; + + return pcf2127_wdt_active_ping(&pcf2127->wdd); +} + +static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + uint8_t buf[5]; + int ret; + + ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2, + PCF2127_BIT_CTRL2_AF, 0); + if (ret) + return ret; + + ret = pcf2127_wdt_active_ping(&pcf2127->wdd); + if (ret) + return ret; + + buf[0] = bin2bcd(alrm->time.tm_sec); + buf[1] = bin2bcd(alrm->time.tm_min); + buf[2] = bin2bcd(alrm->time.tm_hour); + buf[3] = bin2bcd(alrm->time.tm_mday); + buf[4] = (alrm->time.tm_wday & 0x07); + + ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf, + sizeof(buf)); + if (ret) + return ret; + + return pcf2127_rtc_alarm_irq_enable(dev, alrm->enabled); +} + +static irqreturn_t pcf2127_rtc_irq(int irq, void *dev) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + unsigned int ctrl2 = 0; + int ret = 0; + + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); + if (ret) + return IRQ_NONE; + + if (ctrl2 & PCF2127_BIT_CTRL2_AF) { + regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2, + ctrl2 & ~PCF2127_BIT_CTRL2_AF); + + rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF); + } + + ret = pcf2127_wdt_active_ping(&pcf2127->wdd); + if (ret) + return IRQ_NONE; + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops pcf2127_rtc_alrm_ops = { + .ioctl = pcf2127_rtc_ioctl, + .read_time = pcf2127_rtc_read_time, + .set_time = pcf2127_rtc_set_time, + .read_alarm = pcf2127_rtc_read_alarm, + .set_alarm = pcf2127_rtc_set_alarm, + .alarm_irq_enable = pcf2127_rtc_alarm_irq_enable, +}; + /* sysfs interface */ static ssize_t timestamp0_store(struct device *dev, @@ -419,6 +536,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, const char *name, bool has_nvmem) { struct pcf2127 *pcf2127; + int alarm_irq = 0; u32 wdd_timeout; int ret = 0; @@ -441,6 +559,22 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099; pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */ + alarm_irq = of_irq_get_byname(dev->of_node, "alarm"); + if (alarm_irq >= 0) { + ret = devm_request_irq(dev, alarm_irq, pcf2127_rtc_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev_name(dev), dev); + if (ret) { + dev_err(dev, "failed to request alarm irq\n"); + return ret; + } + } + + if (alarm_irq >= 0 || device_property_read_bool(dev, "wakeup-source")) { + device_init_wakeup(dev, true); + pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; + } + pcf2127->wdd.parent = dev; pcf2127->wdd.info = &pcf2127_wdt_info; pcf2127->wdd.ops = &pcf2127_watchdog_ops; -- cgit From 27006416be16b7887fb94b3b445f32453defb3f1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 12 Aug 2020 10:51:14 +0200 Subject: rtc: pcf2127: fix alarm handling Fix multiple issues when handling alarms: - Use threaded interrupt to avoid scheduling when atomic - Stop matching on week day as it may not be set correctly - Avoid parsing the DT interrupt and use what is provided by the i2c or spi subsystem - Avoid returning IRQ_NONE in case of error in the interrupt handler - Never write WDTF as specified in the datasheet - Set uie_unsupported, as for the pcf85063, setting alarms every seconds is not working correctly and confuses the RTC. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200812085114.474903-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf2127.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 4e99c45a87d7..ed6316992cbb 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -33,6 +33,7 @@ #define PCF2127_BIT_CTRL2_TSIE BIT(2) #define PCF2127_BIT_CTRL2_AF BIT(4) #define PCF2127_BIT_CTRL2_TSF2 BIT(5) +#define PCF2127_BIT_CTRL2_WDTF BIT(6) /* Control register 3 */ #define PCF2127_REG_CTRL3 0x02 #define PCF2127_BIT_CTRL3_BLIE BIT(0) @@ -55,6 +56,7 @@ #define PCF2127_REG_ALARM_HR 0x0C #define PCF2127_REG_ALARM_DM 0x0D #define PCF2127_REG_ALARM_DW 0x0E +#define PCF2127_BIT_ALARM_AE BIT(7) /* Watchdog registers */ #define PCF2127_REG_WD_CTL 0x10 #define PCF2127_BIT_WD_CTL_TF0 BIT(0) @@ -360,7 +362,6 @@ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_min = bcd2bin(buf[1] & 0x7F); alrm->time.tm_hour = bcd2bin(buf[2] & 0x3F); alrm->time.tm_mday = bcd2bin(buf[3] & 0x3F); - alrm->time.tm_wday = buf[4] & 0x07; return 0; } @@ -398,7 +399,7 @@ static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) buf[1] = bin2bcd(alrm->time.tm_min); buf[2] = bin2bcd(alrm->time.tm_hour); buf[3] = bin2bcd(alrm->time.tm_mday); - buf[4] = (alrm->time.tm_wday & 0x07); + buf[4] = PCF2127_BIT_ALARM_AE; /* Do not match on week day */ ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf, sizeof(buf)); @@ -418,16 +419,15 @@ static irqreturn_t pcf2127_rtc_irq(int irq, void *dev) if (ret) return IRQ_NONE; - if (ctrl2 & PCF2127_BIT_CTRL2_AF) { - regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2, - ctrl2 & ~PCF2127_BIT_CTRL2_AF); + if (!(ctrl2 & PCF2127_BIT_CTRL2_AF)) + return IRQ_NONE; - rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF); - } + regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2, + ctrl2 & ~(PCF2127_BIT_CTRL2_AF | PCF2127_BIT_CTRL2_WDTF)); - ret = pcf2127_wdt_active_ping(&pcf2127->wdd); - if (ret) - return IRQ_NONE; + rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF); + + pcf2127_wdt_active_ping(&pcf2127->wdd); return IRQ_HANDLED; } @@ -533,10 +533,9 @@ static const struct attribute_group pcf2127_attr_group = { }; static int pcf2127_probe(struct device *dev, struct regmap *regmap, - const char *name, bool has_nvmem) + int alarm_irq, const char *name, bool has_nvmem) { struct pcf2127 *pcf2127; - int alarm_irq = 0; u32 wdd_timeout; int ret = 0; @@ -558,12 +557,13 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099; pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */ + pcf2127->rtc->uie_unsupported = 1; - alarm_irq = of_irq_get_byname(dev->of_node, "alarm"); if (alarm_irq >= 0) { - ret = devm_request_irq(dev, alarm_irq, pcf2127_rtc_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - dev_name(dev), dev); + ret = devm_request_threaded_irq(dev, alarm_irq, NULL, + pcf2127_rtc_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev_name(dev), dev); if (ret) { dev_err(dev, "failed to request alarm irq\n"); return ret; @@ -792,7 +792,7 @@ static int pcf2127_i2c_probe(struct i2c_client *client, return PTR_ERR(regmap); } - return pcf2127_probe(&client->dev, regmap, + return pcf2127_probe(&client->dev, regmap, client->irq, pcf2127_i2c_driver.driver.name, id->driver_data); } @@ -858,7 +858,8 @@ static int pcf2127_spi_probe(struct spi_device *spi) return PTR_ERR(regmap); } - return pcf2127_probe(&spi->dev, regmap, pcf2127_spi_driver.driver.name, + return pcf2127_probe(&spi->dev, regmap, spi->irq, + pcf2127_spi_driver.driver.name, spi_get_device_id(spi)->driver_data); } -- cgit From 58277f502f429baee11725ca2d8cb3c43c7a6429 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2020 08:30:06 -0300 Subject: perf trace beauty: Add script to autogenerate socket families table To use with 'perf trace', to convert the protocol families to strings, e.g: $ tools/perf/trace/beauty/socket.sh static const char *socket_families[] = { [0] = "UNSPEC", [1] = "LOCAL", [2] = "INET", [3] = "AX25", [4] = "IPX", [5] = "APPLETALK", [6] = "NETROM", [7] = "BRIDGE", [8] = "ATMPVC", [9] = "X25", [10] = "INET6", [11] = "ROSE", [12] = "DECnet", [13] = "NETBEUI", [14] = "SECURITY", [15] = "KEY", [16] = "NETLINK", [17] = "PACKET", [18] = "ASH", [19] = "ECONET", [20] = "ATMSVC", [21] = "RDS", [22] = "SNA", [23] = "IRDA", [24] = "PPPOX", [25] = "WANPIPE", [26] = "LLC", [27] = "IB", [28] = "MPLS", [29] = "CAN", [30] = "TIPC", [31] = "BLUETOOTH", [32] = "IUCV", [33] = "RXRPC", [34] = "ISDN", [35] = "PHONET", [36] = "IEEE802154", [37] = "CAIF", [38] = "ALG", [39] = "NFC", [40] = "VSOCK", [41] = "KCM", [42] = "QIPCRTR", [43] = "SMC", [44] = "XDP", }; $ This uses a copy of include/linux/socket.h that is kept in a directory to be used just for these table generation scripts and for checking if the kernel has a new file that maybe gets something new for these tables. This allows us to: - Avoid accessing files outside tools/, in the kernel sources, that may be changed in unexpected ways and thus break these scripts. - Notice when those files change and thus check if the changes don't break those scripts, update them to automatically get the new definitions, a new socket family, for instance. - Not add then to the tools/include/ where it may end up used while building the tools and end up requiring dragging yet more stuff from the kernel or plain break the build in some of the myriad environments where perf may be built. This will replace the previous static array in tools/perf/ that was dated and was already missing the AF_KCM, AF_QIPCRTR, AF_SMC and AF_XDP families. The next cset will wire this up to the perf build process. At some point this must be made into a library to be used in places such as libtraceevent, bpftrace, etc. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 442 +++++++++++++++++++++++++ tools/perf/trace/beauty/socket.sh | 24 ++ 2 files changed, 466 insertions(+) create mode 100644 tools/perf/trace/beauty/include/linux/socket.h create mode 100755 tools/perf/trace/beauty/socket.sh diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h new file mode 100644 index 000000000000..e9cb30d8cbfb --- /dev/null +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SOCKET_H +#define _LINUX_SOCKET_H + + +#include /* arch-dependent defines */ +#include /* the SIOCxxx I/O controls */ +#include /* iovec support */ +#include /* pid_t */ +#include /* __user */ +#include + +struct file; +struct pid; +struct cred; +struct socket; + +#define __sockaddr_check_size(size) \ + BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) + +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + +typedef __kernel_sa_family_t sa_family_t; + +/* + * 1003.1g requires sa_family_t and that sa_data is char. + */ + +struct sockaddr { + sa_family_t sa_family; /* address family, AF_xxx */ + char sa_data[14]; /* 14 bytes of protocol address */ +}; + +struct linger { + int l_onoff; /* Linger active */ + int l_linger; /* How long to linger for */ +}; + +#define sockaddr_storage __kernel_sockaddr_storage + +/* + * As we do 4.4BSD message passing we use a 4.4BSD message passing + * system, not 4.3. Thus msg_accrights(len) are now missing. They + * belong in an obscure libc emulation or the bin. + */ + +struct msghdr { + void *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iov_iter msg_iter; /* data */ + + /* + * Ancillary data. msg_control_user is the user buffer used for the + * recv* side when msg_control_is_user is set, msg_control is the kernel + * buffer used for all other cases. + */ + union { + void *msg_control; + void __user *msg_control_user; + }; + bool msg_control_is_user : 1; + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ +}; + +struct user_msghdr { + void __user *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iovec __user *msg_iov; /* scatter/gather array */ + __kernel_size_t msg_iovlen; /* # elements in msg_iov */ + void __user *msg_control; /* ancillary data */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ +}; + +/* For recvmmsg/sendmmsg */ +struct mmsghdr { + struct user_msghdr msg_hdr; + unsigned int msg_len; +}; + +/* + * POSIX 1003.1g - ancillary data object information + * Ancillary data consits of a sequence of pairs of + * (cmsghdr, cmsg_data[]) + */ + +struct cmsghdr { + __kernel_size_t cmsg_len; /* data byte count, including hdr */ + int cmsg_level; /* originating protocol */ + int cmsg_type; /* protocol-specific type */ +}; + +/* + * Ancillary data object information MACROS + * Table 5-14 of POSIX 1003.1g + */ + +#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg)) +#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg)) + +#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) + +#define CMSG_DATA(cmsg) \ + ((void *)(cmsg) + sizeof(struct cmsghdr)) +#define CMSG_USER_DATA(cmsg) \ + ((void __user *)(cmsg) + sizeof(struct cmsghdr)) +#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len)) +#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len)) + +#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \ + (struct cmsghdr *)(ctl) : \ + (struct cmsghdr *)NULL) +#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) +#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \ + (cmsg)->cmsg_len <= (unsigned long) \ + ((mhdr)->msg_controllen - \ + ((char *)(cmsg) - (char *)(mhdr)->msg_control))) +#define for_each_cmsghdr(cmsg, msg) \ + for (cmsg = CMSG_FIRSTHDR(msg); \ + cmsg; \ + cmsg = CMSG_NXTHDR(msg, cmsg)) + +/* + * Get the next cmsg header + * + * PLEASE, do not touch this function. If you think, that it is + * incorrect, grep kernel sources and think about consequences + * before trying to improve it. + * + * Now it always returns valid, not truncated ancillary object + * HEADER. But caller still MUST check, that cmsg->cmsg_len is + * inside range, given by msg->msg_controllen before using + * ancillary object DATA. --ANK (980731) + */ + +static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr *__cmsg) +{ + struct cmsghdr * __ptr; + + __ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len)); + if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) + return (struct cmsghdr *)0; + + return __ptr; +} + +static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) +{ + return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); +} + +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + +/* "Socket"-level control message types: */ + +#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ +#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ +#define SCM_SECURITY 0x03 /* rw: security label */ + +struct ucred { + __u32 pid; + __u32 uid; + __u32 gid; +}; + +/* Supported address families. */ +#define AF_UNSPEC 0 +#define AF_UNIX 1 /* Unix domain sockets */ +#define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +#define AF_INET 2 /* Internet IP Protocol */ +#define AF_AX25 3 /* Amateur Radio AX.25 */ +#define AF_IPX 4 /* Novell IPX */ +#define AF_APPLETALK 5 /* AppleTalk DDP */ +#define AF_NETROM 6 /* Amateur Radio NET/ROM */ +#define AF_BRIDGE 7 /* Multiprotocol bridge */ +#define AF_ATMPVC 8 /* ATM PVCs */ +#define AF_X25 9 /* Reserved for X.25 project */ +#define AF_INET6 10 /* IP version 6 */ +#define AF_ROSE 11 /* Amateur Radio X.25 PLP */ +#define AF_DECnet 12 /* Reserved for DECnet project */ +#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ +#define AF_SECURITY 14 /* Security callback pseudo AF */ +#define AF_KEY 15 /* PF_KEY key management API */ +#define AF_NETLINK 16 +#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ +#define AF_PACKET 17 /* Packet family */ +#define AF_ASH 18 /* Ash */ +#define AF_ECONET 19 /* Acorn Econet */ +#define AF_ATMSVC 20 /* ATM SVCs */ +#define AF_RDS 21 /* RDS sockets */ +#define AF_SNA 22 /* Linux SNA Project (nutters!) */ +#define AF_IRDA 23 /* IRDA sockets */ +#define AF_PPPOX 24 /* PPPoX sockets */ +#define AF_WANPIPE 25 /* Wanpipe API Sockets */ +#define AF_LLC 26 /* Linux LLC */ +#define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ +#define AF_CAN 29 /* Controller Area Network */ +#define AF_TIPC 30 /* TIPC sockets */ +#define AF_BLUETOOTH 31 /* Bluetooth sockets */ +#define AF_IUCV 32 /* IUCV sockets */ +#define AF_RXRPC 33 /* RxRPC sockets */ +#define AF_ISDN 34 /* mISDN sockets */ +#define AF_PHONET 35 /* Phonet sockets */ +#define AF_IEEE802154 36 /* IEEE802154 sockets */ +#define AF_CAIF 37 /* CAIF sockets */ +#define AF_ALG 38 /* Algorithm sockets */ +#define AF_NFC 39 /* NFC sockets */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_KCM 41 /* Kernel Connection Multiplexor*/ +#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_SMC 43 /* smc sockets: reserve number for + * PF_SMC protocol family that + * reuses AF_INET address family + */ +#define AF_XDP 44 /* XDP sockets */ + +#define AF_MAX 45 /* For now.. */ + +/* Protocol families, same as address families. */ +#define PF_UNSPEC AF_UNSPEC +#define PF_UNIX AF_UNIX +#define PF_LOCAL AF_LOCAL +#define PF_INET AF_INET +#define PF_AX25 AF_AX25 +#define PF_IPX AF_IPX +#define PF_APPLETALK AF_APPLETALK +#define PF_NETROM AF_NETROM +#define PF_BRIDGE AF_BRIDGE +#define PF_ATMPVC AF_ATMPVC +#define PF_X25 AF_X25 +#define PF_INET6 AF_INET6 +#define PF_ROSE AF_ROSE +#define PF_DECnet AF_DECnet +#define PF_NETBEUI AF_NETBEUI +#define PF_SECURITY AF_SECURITY +#define PF_KEY AF_KEY +#define PF_NETLINK AF_NETLINK +#define PF_ROUTE AF_ROUTE +#define PF_PACKET AF_PACKET +#define PF_ASH AF_ASH +#define PF_ECONET AF_ECONET +#define PF_ATMSVC AF_ATMSVC +#define PF_RDS AF_RDS +#define PF_SNA AF_SNA +#define PF_IRDA AF_IRDA +#define PF_PPPOX AF_PPPOX +#define PF_WANPIPE AF_WANPIPE +#define PF_LLC AF_LLC +#define PF_IB AF_IB +#define PF_MPLS AF_MPLS +#define PF_CAN AF_CAN +#define PF_TIPC AF_TIPC +#define PF_BLUETOOTH AF_BLUETOOTH +#define PF_IUCV AF_IUCV +#define PF_RXRPC AF_RXRPC +#define PF_ISDN AF_ISDN +#define PF_PHONET AF_PHONET +#define PF_IEEE802154 AF_IEEE802154 +#define PF_CAIF AF_CAIF +#define PF_ALG AF_ALG +#define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK +#define PF_KCM AF_KCM +#define PF_QIPCRTR AF_QIPCRTR +#define PF_SMC AF_SMC +#define PF_XDP AF_XDP +#define PF_MAX AF_MAX + +/* Maximum queue length specifiable by listen. */ +#define SOMAXCONN 4096 + +/* Flags we can use with send/ and recv. + Added those for 1003.1g not all are supported yet + */ + +#define MSG_OOB 1 +#define MSG_PEEK 2 +#define MSG_DONTROUTE 4 +#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC 8 +#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC 0x20 +#define MSG_DONTWAIT 0x40 /* Nonblocking io */ +#define MSG_EOR 0x80 /* End of record */ +#define MSG_WAITALL 0x100 /* Wait for a full request */ +#define MSG_FIN 0x200 +#define MSG_SYN 0x400 +#define MSG_CONFIRM 0x800 /* Confirm path validity */ +#define MSG_RST 0x1000 +#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ +#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ +#define MSG_MORE 0x8000 /* Sender will send more */ +#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ +#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ +#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ +#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ +#define MSG_EOF MSG_FIN +#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ +#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry + * plain text and require encryption + */ + +#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ +#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file + descriptor received through + SCM_RIGHTS */ +#if defined(CONFIG_COMPAT) +#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ +#else +#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ +#endif + + +/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ +#define SOL_IP 0 +/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */ +#define SOL_TCP 6 +#define SOL_UDP 17 +#define SOL_IPV6 41 +#define SOL_ICMPV6 58 +#define SOL_SCTP 132 +#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */ +#define SOL_RAW 255 +#define SOL_IPX 256 +#define SOL_AX25 257 +#define SOL_ATALK 258 +#define SOL_NETROM 259 +#define SOL_ROSE 260 +#define SOL_DECNET 261 +#define SOL_X25 262 +#define SOL_PACKET 263 +#define SOL_ATM 264 /* ATM layer (cell level) */ +#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */ +#define SOL_IRDA 266 +#define SOL_NETBEUI 267 +#define SOL_LLC 268 +#define SOL_DCCP 269 +#define SOL_NETLINK 270 +#define SOL_TIPC 271 +#define SOL_RXRPC 272 +#define SOL_PPPOL2TP 273 +#define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 +#define SOL_RDS 276 +#define SOL_IUCV 277 +#define SOL_CAIF 278 +#define SOL_ALG 279 +#define SOL_NFC 280 +#define SOL_KCM 281 +#define SOL_TLS 282 +#define SOL_XDP 283 + +/* IPX options */ +#define IPX_TYPE 1 + +extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); +extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); + +struct timespec64; +struct __kernel_timespec; +struct old_timespec32; + +struct scm_timestamping_internal { + struct timespec64 ts[3]; +}; + +extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); +extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss); + +/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff + * forbid_cmsg_compat==false + */ +extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct __kernel_timespec __user *timeout, + struct old_timespec32 __user *timeout32); +extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + bool forbid_cmsg_compat); +extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, + unsigned int flags); +extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, + struct user_msghdr __user *umsg, + struct sockaddr __user *uaddr, + unsigned int flags); +extern int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov); +extern int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov); +extern int __copy_msghdr_from_user(struct msghdr *kmsg, + struct user_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec __user **uiov, size_t *nsegs); + +/* helpers which do the actual work for syscalls */ +extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned int flags, struct sockaddr __user *addr, + int __user *addr_len); +extern int __sys_sendto(int fd, void __user *buff, size_t len, + unsigned int flags, struct sockaddr __user *addr, + int addr_len); +extern int __sys_accept4_file(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags, + unsigned long nofile); +extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); +extern int __sys_socket(int family, int type, int protocol); +extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, + int addrlen, int file_flags); +extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen); +extern int __sys_listen(int fd, int backlog); +extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); +extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); +extern int __sys_socketpair(int family, int type, int protocol, + int __user *usockvec); +extern int __sys_shutdown(int fd, int how); + +extern struct ns_common *get_net_ns(struct ns_common *ns); +#endif /* _LINUX_SOCKET_H */ diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh new file mode 100755 index 000000000000..3820e5c82293 --- /dev/null +++ b/tools/perf/trace/beauty/socket.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +# This one uses a copy from the kernel sources headers that is in a +# place used just for these tools/perf/beauty/ usage, we shouldn't not +# put it in tools/include/linux otherwise they would be used in the +# normal compiler building process and would drag needless stuff from the +# kernel. + +# When what these scripts need is already in tools/include/ then use it, +# otherwise grab and check the copy from the kernel sources just for these +# string table building scripts. + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/ + +printf "static const char *socket_families[] = {\n" +# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*' + +egrep $regex ${header_dir}/socket.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s] = \"%s\",\n" | \ + egrep -v "\"(UNIX|MAX)\"" +printf "};\n" -- cgit From f3cf7fa963c46391da54a0e50f54c089c9b4c450 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2020 08:43:51 -0300 Subject: perf trace beauty: Use the autogenerated protocol family table That helps us not to lose new protocol families when they are introduced, replacing that hardcoded, dated family->string table. To recap what this allows us to do: # perf trace -e syscalls:sys_enter_socket/max-stack=10/ --filter=family==INET --max-events=1 0.000 fetchmail/41097 syscalls:sys_enter_socket(family: INET, type: DGRAM|CLOEXEC|NONBLOCK, protocol: IP) __GI___socket (inlined) reopen (/usr/lib64/libresolv-2.31.so) send_dg (/usr/lib64/libresolv-2.31.so) __res_context_send (/usr/lib64/libresolv-2.31.so) __GI___res_context_query (inlined) __GI___res_context_search (inlined) _nss_dns_gethostbyname4_r (/usr/lib64/libnss_dns-2.31.so) gaih_inet.constprop.0 (/usr/lib64/libc-2.31.so) __GI_getaddrinfo (inlined) [0x15cb2] (/usr/bin/fetchmail) # More work is still needed to allow for the more natura strace-like syscall name usage instead of the trace event name: # perf trace -e socket/max-stack=10,family==INET/ --max-events=1 I.e. to allow for modifiers to follow the syscall name and for logical expressions to be accepted as filters to use with that syscall, be it as trace event filters or BPF based ones. Using -v we can see how the trace event filter is built: # perf trace -v -e syscalls:sys_enter_socket/call-graph=dwarf/ --filter=family==INET --max-events=2 New filter for syscalls:sys_enter_socket: (family==0x2) && (common_pid != 41384 && common_pid != 2836) $ tools/perf/trace/beauty/socket.sh | grep -w 2 [2] = "INET", $ Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++++ tools/perf/check-headers.sh | 3 +++ tools/perf/trace/beauty/sockaddr.c | 9 +-------- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 86dbb51bb272..e339e225410c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -418,6 +418,7 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ @@ -501,6 +502,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh $(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl) $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@ +socket_arrays := $(beauty_outdir)/socket_arrays.c +socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh + +$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl) + $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@ + vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -697,6 +704,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(kcmp_type_array) \ $(kvm_ioctl_array) \ $(socket_ipproto_array) \ + $(socket_arrays) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ @@ -1006,6 +1014,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(kvm_ioctl_array) \ $(OUTPUT)$(kcmp_type_array) \ $(OUTPUT)$(socket_ipproto_array) \ + $(OUTPUT)$(socket_arrays) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 94c2bc22c2bb..0b4d6431b072 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -128,6 +128,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +# These will require a beauty_check when we get some more like that +check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h + # check duplicated library files check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index e0c13e6a5788..cd110634ab09 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -7,14 +7,7 @@ #include #include -static const char *socket_families[] = { - "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", - "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", - "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", - "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", - "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", - "ALG", "NFC", "VSOCK", -}; +#include "trace/beauty/generated/socket_arrays.c" DEFINE_STRARRAY(socket_families, "PF_"); static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size) -- cgit From 23db762be2eca6d3a0ee673a2bad2ce5411900f3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2020 08:52:32 -0300 Subject: tools headers kvm s390: Sync headers with the kernel sources To pick the changes in: 23a60f834406 ("s390/kvm: diagnose 0x318 sync and reset") None of them trigger any changes in tooling, this time this is just to silence these perf build warnings: Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/kvm.h' differs from latest version at 'arch/s390/include/uapi/asm/kvm.h' diff -u tools/arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Christian Borntraeger Cc: Collin Walling Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/kvm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 436ec7636927..7a6b14874d65 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) +#define KVM_SYNC_DIAG318 (1UL << 12) #define KVM_SYNC_S390_VALID_FIELDS \ (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ - KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ + KVM_SYNC_DIAG318) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 @@ -264,7 +266,8 @@ struct kvm_sync_regs { __u8 reserved2 : 7; __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ - __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + __u64 diag318; /* diagnose 0x318 info */ + __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ union { __u8 sdnx[SDNXL]; /* state description annex */ struct { -- cgit From fe452fb84329b655324d55220a8da886fc9ba6e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2020 08:57:07 -0300 Subject: tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: 25abc060d282 ("vhost-vdpa: support IOTLB batching hints") This doesn't result in any changes in tooling, no new ioctls to be picked up by the id->string table generators, etc. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: Jason Wang Cc: Jiri Olsa Cc: Michael S. Tsirkin Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 0c2349612e77..75232185324a 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -91,6 +91,8 @@ /* Use message type V2 */ #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) -- cgit From 6016e0348792bb65719abaf4424e1e44ed09aeb2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2020 09:02:40 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 3edd68399dc1 ("KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support") 1aa561b1a4c0 ("kvm: x86: Add "last CPU" to some KVM_EXIT information") 23a60f834406 ("s390/kvm: diagnose 0x318 sync and reset") That do not result in any change in tooling, as the additions are not being used in any table generator. This silences these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Christian Borntraeger Cc: Collin Walling Cc: Jim Mattson Cc: Jiri Olsa Cc: Mohammed Gamal Cc: Namhyung Kim Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 4fdf30316582..f6d86033c4fa 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -289,6 +289,7 @@ struct kvm_run { /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { @@ -1031,6 +1032,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_LAST_CPU 184 +#define KVM_CAP_SMALLER_MAXPHYADDR 185 +#define KVM_CAP_S390_DIAG318 186 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From 72d69c2a4ee1fe2a66c3296ad42e60d563ba9a36 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 10 Aug 2020 08:21:59 +0200 Subject: perf bench numa: Fix number of processes in "2x3-convergence" test Signed-off-by: Alexander Gordeev Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/d949f5f48e17fc816f3beecf8479f1b2480345e4.1597004831.git.agordeev@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5797253b9700..3aad1fc6cbca 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1755,7 +1755,7 @@ static const char *tests[][MAX_ARGS] = { { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, - { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, { " 4x4-convergence-NOTHP,", -- cgit From 85372c6974aad0e40b97513434694abe84c1017e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 10 Aug 2020 08:22:00 +0200 Subject: perf bench numa: Fix benchmark names Standard benchmark names let users know the tests specifics. For example "2x1-bw-process" name tells that two processes one thread each are run and the RAM bandwidth is measured. Several benchmarks names do not correspond to their actual running configuration. Fix that and also some whitespace and comment inconsistencies. Signed-off-by: Alexander Gordeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/6b6f2084f132ee8e9203dc7c32f9deb209b87a68.1597004831.git.agordeev@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 3aad1fc6cbca..31e2601d39c8 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -813,12 +813,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val } } } else if (!g->p.data_backwards || (nr + loop) & 1) { + /* Process data forwards: */ d0 = data + off; d = data + off + 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d >= d1)) d = data; @@ -836,7 +836,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val d = data + off - 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d < data)) d = data + words-1; @@ -1733,12 +1732,12 @@ err: */ static const char *tests[][MAX_ARGS] = { /* Basic single-stream NUMA bandwidth measurements: */ - { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM }, { "RAM-bw-local-NOTHP,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, - { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "1", OPT_BW_RAM }, /* 2-stream NUMA bandwidth measurements: */ @@ -1780,24 +1779,24 @@ static const char *tests[][MAX_ARGS] = { "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, - { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, - { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, - { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, - { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, + { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, + { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, + { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, - { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, - { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, - { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, - { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, - { " 4x8-bw-thread-NOTHP,", + { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, + { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, + { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, + { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, + { " 4x8-bw-process-NOTHP,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, - { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, - { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, + { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, + { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, - { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, - { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, + { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, + { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, - { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, { "numa01-bw-thread-NOTHP,", -- cgit From f9f95068266d875d1840ad90abf51773a765f41a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 12 Aug 2020 07:46:47 +0100 Subject: perf bench: Fix a couple of spelling mistakes in options text There are a couple of spelling mistakes in the text. Fix these. Signed-off-by: Colin King Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20200812064647.200132-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/find-bit-bench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index fa90f3e9d368..73b5bcc5946a 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -17,9 +17,9 @@ static unsigned int inner_iterations = 100000; static const struct option options[] = { OPT_UINTEGER('i', "outer-iterations", &outer_iterations, - "Number of outerer iterations used"), + "Number of outer iterations used"), OPT_UINTEGER('j', "inner-iterations", &inner_iterations, - "Number of outerer iterations used"), + "Number of inner iterations used"), OPT_END() }; -- cgit From e96ebd589debd9a6a793608c4ec7019c38785dea Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 30 Jul 2020 08:59:12 -0400 Subject: parisc: Implement __smp_store_release and __smp_load_acquire barriers This patch implements the __smp_store_release and __smp_load_acquire barriers using ordered stores and loads. This avoids the sync instruction present in the generic implementation. Cc: # 4.14+ Signed-off-by: Dave Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/barrier.h | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h index dbaaca84f27f..640d46edf32e 100644 --- a/arch/parisc/include/asm/barrier.h +++ b/arch/parisc/include/asm/barrier.h @@ -26,6 +26,67 @@ #define __smp_rmb() mb() #define __smp_wmb() mb() +#define __smp_store_release(p, v) \ +do { \ + typeof(p) __p = (p); \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("stb,ma %0,0(%1)" \ + : : "r"(*(__u8 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("sth,ma %0,0(%1)" \ + : : "r"(*(__u16 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("stw,ma %0,0(%1)" \ + : : "r"(*(__u32 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("std,ma %0,0(%1)" \ + : : "r"(*(__u64 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("ldb,ma 0(%1),%0" \ + : "=r"(*(__u8 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("ldh,ma 0(%1),%0" \ + : "=r"(*(__u16 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("ldw,ma 0(%1),%0" \ + : "=r"(*(__u32 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("ldd,ma 0(%1),%0" \ + : "=r"(*(__u64 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + } \ + __u.__val; \ +}) #include #endif /* !__ASSEMBLY__ */ -- cgit From 54898f70a8549492f2268f7cdd855fa1d8c5e0ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Aug 2020 22:16:06 -0400 Subject: NFS: Add layout segment info to pnfs read/write/commit tracepoints Allow the pnfs I/O tracepoints to trace which layout segment is being used. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 61c33536dcf3..b4f852d4d099 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1727,6 +1727,13 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid); DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name); DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); +#ifdef CONFIG_NFS_V4_1 +#define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) \ + (lseg ? nfs_stateid_hash(&lseg->pls_layout->plh_stateid) : 0) +#else +#define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) (0) +#endif + DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( const struct nfs_pgio_header *hdr, @@ -1745,6 +1752,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) + __field(int, layoutstateid_seq) + __field(u32, layoutstateid_hash) ), TP_fast_assign( @@ -1754,6 +1763,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + const struct pnfs_layout_segment *lseg = hdr->lseg; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; @@ -1766,11 +1776,15 @@ DECLARE_EVENT_CLASS(nfs4_read_event, be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = nfs_stateid_hash(&state->stateid); + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; + __entry->layoutstateid_hash = + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x " + "layoutstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1778,7 +1792,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, - __entry->stateid_seq, __entry->stateid_hash + __entry->stateid_seq, __entry->stateid_hash, + __entry->layoutstateid_seq, __entry->layoutstateid_hash ) ); #define DEFINE_NFS4_READ_EVENT(name) \ @@ -1811,6 +1826,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) + __field(int, layoutstateid_seq) + __field(u32, layoutstateid_hash) ), TP_fast_assign( @@ -1820,6 +1837,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + const struct pnfs_layout_segment *lseg = hdr->lseg; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; @@ -1832,11 +1850,15 @@ DECLARE_EVENT_CLASS(nfs4_write_event, be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = nfs_stateid_hash(&state->stateid); + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; + __entry->layoutstateid_hash = + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x " + "layoutstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1844,7 +1866,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, - __entry->stateid_seq, __entry->stateid_hash + __entry->stateid_seq, __entry->stateid_hash, + __entry->layoutstateid_seq, __entry->layoutstateid_hash ) ); @@ -1875,6 +1898,8 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(unsigned long, error) __field(loff_t, offset) __field(u32, count) + __field(int, layoutstateid_seq) + __field(u32, layoutstateid_hash) ), TP_fast_assign( @@ -1882,6 +1907,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = data->args.fh ? data->args.fh : &nfsi->fh; + const struct pnfs_layout_segment *lseg = data->lseg; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; @@ -1889,18 +1915,22 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->error = error < 0 ? -error : 0; + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; + __entry->layoutstateid_hash = + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u", + "offset=%lld count=%u layoutstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count + __entry->count, + __entry->layoutstateid_seq, __entry->layoutstateid_hash ) ); #define DEFINE_NFS4_COMMIT_EVENT(name) \ -- cgit From ff041727e9e029845857cac41aae118ead5e261b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Aug 2020 16:30:30 -0400 Subject: NFS: Don't move layouts to plh_return_segs list while in use If the layout segment is still in use for a read or a write, we should not move it to the layout plh_return_segs list. If we do, we can end up returning the layout while I/O is still in progress. Fixes: e0b7d420f72a ("pNFS: Don't discard layout segments that are marked for return") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d8cdb94c6668..262ce01c7abe 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2392,16 +2392,6 @@ out_forget: return ERR_PTR(-EAGAIN); } -static int -mark_lseg_invalid_or_return(struct pnfs_layout_segment *lseg, - struct list_head *tmp_list) -{ - if (!mark_lseg_invalid(lseg, tmp_list)) - return 0; - pnfs_cache_lseg_for_layoutreturn(lseg->pls_layout, lseg); - return 1; -} - /** * pnfs_mark_matching_lsegs_return - Free or return matching layout segments * @lo: pointer to layout header @@ -2438,7 +2428,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); - if (mark_lseg_invalid_or_return(lseg, tmp_list)) + if (mark_lseg_invalid(lseg, tmp_list)) continue; remaining++; set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); -- cgit From d474f96104bd4377573526ebae2ee212205a6839 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Aug 2020 09:03:56 -0400 Subject: NFS: Don't return layout segments that are in use If the NFS_LAYOUT_RETURN_REQUESTED flag is set, we want to return the layout as soon as possible, meaning that the affected layout segments should be marked as invalid, and should no longer be in use for I/O. Fixes: f0b429819b5f ("pNFS: Ignore non-recalled layouts in pnfs_layout_need_return()") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 262ce01c7abe..b5baf36d4de5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1226,31 +1226,27 @@ out: return status; } +static bool +pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, + enum pnfs_iomode iomode, + u32 seq) +{ + struct pnfs_layout_range recall_range = { + .length = NFS4_MAX_UINT64, + .iomode = iomode, + }; + return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &recall_range, seq) != -EBUSY; +} + /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo) { - struct pnfs_layout_segment *s; - enum pnfs_iomode iomode; - u32 seq; - if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) return false; - - seq = lo->plh_return_seq; - iomode = lo->plh_return_iomode; - - /* Defer layoutreturn until all recalled lsegs are done */ - list_for_each_entry(s, &lo->plh_segs, pls_list) { - if (seq && pnfs_seqid_is_newer(s->pls_seq, seq)) - continue; - if (iomode != IOMODE_ANY && s->pls_range.iomode != iomode) - continue; - if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) - return false; - } - - return true; + return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, + lo->plh_return_seq); } static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) -- cgit From b4487b93545214a9db8cbf32e86411677b0cca21 Mon Sep 17 00:00:00 2001 From: Jeffrey Mitchell Date: Wed, 5 Aug 2020 12:23:19 -0500 Subject: nfs: Fix getxattr kernel panic and memory overflow Move the buffer size check to decode_attr_security_label() before memcpy() Only call memcpy() if the buffer is large enough Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS") Signed-off-by: Jeffrey Mitchell [Trond: clean up duplicate test of label->len != 0] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 -- fs/nfs/nfs4xdr.c | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 113e0d6dd3d3..5db240688d4e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5856,8 +5856,6 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - if (buflen < label.len) - return -ERANGE; return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 388ac520b104..79018938d6b3 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4166,7 +4166,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, return -EIO; if (len < NFS4_MAXLABELLEN) { if (label) { - memcpy(label->label, p, len); + if (label->len) { + if (label->len < len) + return -ERANGE; + memcpy(label->label, p, len); + } label->len = len; label->pi = pi; label->lfs = lfs; -- cgit From 8fe5db97c9d1166838538a4437a14665e8f79300 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 10 Aug 2020 02:46:01 +0000 Subject: rpc_pipefs: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Xu Wang Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/rpc_pipefs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 9fb067a6f7e0..ef9db135c649 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -79,7 +79,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, goto out_free_data; bl_msg = msg->data; - bl_msg->type = BL_DEVICE_MOUNT, + bl_msg->type = BL_DEVICE_MOUNT; bl_msg->totallen = b->simple.len; nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); -- cgit From a5032910c5e542dddedeaa6500e5eab3f1175384 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 10 Aug 2020 19:18:35 -0700 Subject: fs: nfs: delete repeated words in comments Drop duplicated words {the, and} in comments. Signed-off-by: Randy Dunlap Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index ccc88be88d6a..66949da0e827 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -982,7 +982,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc, /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the - * the kernel. To continue to support that functionality we + * kernel. To continue to support that functionality we * have a touch of selinux knowledge here in the NFS code. The * userspace code converted context=blah to just blah so we are * converting back to the full string selinux understands. diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 79018938d6b3..0b3510f62623 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5256,7 +5256,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) * The XDR encode routine has set things up so that * the link text will be copied directly into the * buffer. We just have to do overflow-checking, - * and and null-terminate the text (the VFS expects + * and null-terminate the text (the VFS expects * null-termination). */ xdr_terminate_string(rcvbuf, len); -- cgit From a36da65c46565d2527eec3efdb546251e38253fd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Aug 2020 09:50:19 -0600 Subject: io_uring: fail poll arm on queue proc failure Check the ipt.error value, it must have been either cleared to zero or set to another error than the default -EINVAL if we don't go through the waitqueue proc addition. Just give up on poll at that point and return failure, this will fallback to async work. io_poll_add() doesn't suffer from this failure case, as it returns the error value directly. Cc: stable@vger.kernel.org # v5.7+ Reported-by: syzbot+a730016dc0bdce4f6ff5@syzkaller.appspotmail.com Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 99582cf5106b..8a2afd8c33c9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4883,7 +4883,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); - if (ret) { + if (ret || ipt.error) { io_poll_remove_double(req, apoll->double_poll); spin_unlock_irq(&ctx->completion_lock); kfree(apoll->double_poll); -- cgit From 563c53e73b8b6ec842828736f77e633f7b0911e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Aug 2020 13:36:32 -0400 Subject: NFS: Fix flexfiles read failover The current mirrored read failover code is correctly resetting the mirror index between failed reads, however it is not able to actually flip the RPC call over to the next RPC client. The end result is that we keep resending the RPC call to the same client over and over. The fix is to use the pnfs_read_resend_pnfs() mechanism to schedule a new RPC call, but we need to add the ability to pass in a mirror index so that we always retry the next mirror in the list. Fixes: 166bd5b889ac ("pNFS/flexfiles: Fix layoutstats handling during read failovers") Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 50 ++++++++++++++++++++++++---------- fs/nfs/pnfs.c | 4 ++- fs/nfs/pnfs.h | 2 +- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b26173d72735..965145592750 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -790,6 +790,19 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); } +static struct nfs4_pnfs_ds * +ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) +{ + struct pnfs_layout_segment *lseg = pgio->pg_lseg; + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); + if (ds || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); +} + static void ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, @@ -840,7 +853,7 @@ retry: goto out_nolseg; } - ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); if (!ds) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; @@ -1022,11 +1035,24 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) } } +static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) +{ + u32 idx = hdr->pgio_mirror_idx + 1; + int new_idx = 0; + + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) + ff_layout_send_layouterror(hdr->lseg); + else + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); +} + static void ff_layout_reset_read(struct nfs_pgio_header *hdr) { struct rpc_task *task = &hdr->task; pnfs_layoutcommit_inode(hdr->inode, false); + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " @@ -1228,6 +1254,12 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); + /* + * Don't return the layout if this is a read and we still + * have layouts to try + */ + if (opnum == OP_READ) + break; /* Fallthrough */ default: pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, @@ -1241,7 +1273,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - int new_idx = hdr->pgio_mirror_idx; int err; if (task->tk_status < 0) { @@ -1261,10 +1292,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: - if (ff_layout_choose_best_ds_for_read(hdr->lseg, - hdr->pgio_mirror_idx + 1, - &new_idx)) - goto out_layouterror; set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: @@ -1275,10 +1302,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, } return 0; -out_layouterror: - ff_layout_read_record_layoutstats_done(task, hdr); - ff_layout_send_layouterror(hdr->lseg); - hdr->pgio_mirror_idx = new_idx; out_eagain: rpc_restart_call_prepare(task); return -EAGAIN; @@ -1405,10 +1428,9 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); - if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { - ff_layout_send_layouterror(hdr->lseg); - pnfs_read_resend_pnfs(hdr); - } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + ff_layout_resend_pnfs_read(hdr); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b5baf36d4de5..40332c758d84 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2939,7 +2939,8 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, } /* Resend all requests through pnfs. */ -void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) +void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr, + unsigned int mirror_idx) { struct nfs_pageio_descriptor pgio; @@ -2950,6 +2951,7 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); + pgio.pg_mirror_idx = mirror_idx; hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); } } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8e0ada581b92..2661c44c62db 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -311,7 +311,7 @@ int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_pgio_header *); void pnfs_ld_read_done(struct nfs_pgio_header *); -void pnfs_read_resend_pnfs(struct nfs_pgio_header *); +void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, -- cgit From d96f27c80b65437a7b572647ecb4717ec9a50c98 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Aug 2020 17:53:34 +0800 Subject: ALSA: hda/hdmi: Use force connectivity quirk on another HP desktop There's another HP desktop has buggy BIOS which flags the Port Connectivity bit as no connection. Apply force connectivity quirk to enable DP/HDMI audio. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200811095336.32396-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4bbd12d3b1b5..b8c8490e568b 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1863,6 +1863,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), {} }; -- cgit From 24fb33d40d60bd7d196400e7d5b26ff566fd98b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Aug 2020 05:15:18 +0100 Subject: fix breakage in do_rmdir() syzbot reported and bisected a use-after-free due to the recent init cleanups. The putname() should happen only after we'd *not* branched to retry, same as it's done in do_unlinkat(). Reported-by: syzbot+bbeb1c88016c7db4aa24@syzkaller.appspotmail.com Fixes: e24ab0ef689d "fs: push the getname from do_rmdir into the callers" Cc: Christoph Hellwig Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index fde8fe086c09..9fa10c614de7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3770,11 +3770,11 @@ exit2: mnt_drop_write(path.mnt); exit1: path_put(&path); - putname(name); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } + putname(name); return error; } -- cgit From 5b32af91b5de6f95ad99e4eaaf57777376af124f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:14 -0700 Subject: percpu: return number of released bytes from pcpu_free_area() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: memcg accounting of percpu memory", v3. This patchset adds percpu memory accounting to memory cgroups. It's based on the rework of the slab controller and reuses concepts and features introduced for the per-object slab accounting. Percpu memory is becoming more and more widely used by various subsystems, and the total amount of memory controlled by the percpu allocator can make a good part of the total memory. As an example, bpf maps can consume a lot of percpu memory, and they are created by a user. Also, some cgroup internals (e.g. memory controller statistics) can be quite large. On a machine with many CPUs and big number of cgroups they can consume hundreds of megabytes. So the lack of memcg accounting is creating a breach in the memory isolation. Similar to the slab memory, percpu memory should be accounted by default. Percpu allocations by their nature are scattered over multiple pages, so they can't be tracked on the per-page basis. So the per-object tracking introduced by the new slab controller is reused. The patchset implements charging of percpu allocations, adds memcg-level statistics, enables accounting for percpu allocations made by memory cgroup internals and provides some basic tests. To implement the accounting of percpu memory without a significant memory and performance overhead the following approach is used: all accounted allocations are placed into a separate percpu chunk (or chunks). These chunks are similar to default chunks, except that they do have an attached vector of pointers to obj_cgroup objects, which is big enough to save a pointer for each allocated object. On the allocation, if the allocation has to be accounted (__GFP_ACCOUNT is passed, the allocating process belongs to a non-root memory cgroup, etc), the memory cgroup is getting charged and if the maximum limit is not exceeded the allocation is performed using a memcg-aware chunk. Otherwise -ENOMEM is returned or the allocation is forced over the limit, depending on gfp (as any other kernel memory allocation). The memory cgroup information is saved in the obj_cgroup vector at the corresponding offset. On the release time the memcg information is restored from the vector and the cgroup is getting uncharged. Unaccounted allocations (at this point the absolute majority of all percpu allocations) are performed in the old way, so no additional overhead is expected. To avoid pinning dying memory cgroups by outstanding allocations, obj_cgroup API is used instead of directly saving memory cgroup pointers. obj_cgroup is basically a pointer to a memory cgroup with a standalone reference counter. The trick is that it can be atomically swapped to point at the parent cgroup, so that the original memory cgroup can be released prior to all objects, which has been charged to it. Because all charges and statistics are fully recursive, it's perfectly correct to uncharge the parent cgroup instead. This scheme is used in the slab memory accounting, and percpu memory can just follow the scheme. This patch (of 5): To implement accounting of percpu memory we need the information about the size of freed object. Return it from pcpu_free_area(). Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Johannes Weiner Cc: Michal Hocko Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Pekka Enberg Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long cC: Michal Koutnýutny@suse.com> Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200623184515.4132564-1-guro@fb.com Link: http://lkml.kernel.org/r/20200608230819.832349-1-guro@fb.com Link: http://lkml.kernel.org/r/20200608230819.832349-2-guro@fb.com Signed-off-by: Linus Torvalds --- mm/percpu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index b626766160ce..5e9eefdce21f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1211,11 +1211,14 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits, * * This function determines the size of an allocation to free using * the boundary bitmap and clears the allocation map. + * + * RETURNS: + * Number of freed bytes. */ -static void pcpu_free_area(struct pcpu_chunk *chunk, int off) +static int pcpu_free_area(struct pcpu_chunk *chunk, int off) { struct pcpu_block_md *chunk_md = &chunk->chunk_md; - int bit_off, bits, end, oslot; + int bit_off, bits, end, oslot, freed; lockdep_assert_held(&pcpu_lock); pcpu_stats_area_dealloc(chunk); @@ -1230,8 +1233,10 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off) bits = end - bit_off; bitmap_clear(chunk->alloc_map, bit_off, bits); + freed = bits * PCPU_MIN_ALLOC_SIZE; + /* update metadata */ - chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE; + chunk->free_bytes += freed; /* update first free bit */ chunk_md->first_free = min(chunk_md->first_free, bit_off); @@ -1239,6 +1244,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off) pcpu_block_update_hint_free(chunk, bit_off, bits); pcpu_chunk_relocate(chunk, oslot); + + return freed; } static void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits) -- cgit From 3c7be18ac9a06bc67196bfdabb7c21e1bbacdc13 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:17 -0700 Subject: mm: memcg/percpu: account percpu memory to memory cgroups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Percpu memory is becoming more and more widely used by various subsystems, and the total amount of memory controlled by the percpu allocator can make a good part of the total memory. As an example, bpf maps can consume a lot of percpu memory, and they are created by a user. Also, some cgroup internals (e.g. memory controller statistics) can be quite large. On a machine with many CPUs and big number of cgroups they can consume hundreds of megabytes. So the lack of memcg accounting is creating a breach in the memory isolation. Similar to the slab memory, percpu memory should be accounted by default. To implement the perpcu accounting it's possible to take the slab memory accounting as a model to follow. Let's introduce two types of percpu chunks: root and memcg. What makes memcg chunks different is an additional space allocated to store memcg membership information. If __GFP_ACCOUNT is passed on allocation, a memcg chunk should be be used. If it's possible to charge the corresponding size to the target memory cgroup, allocation is performed, and the memcg ownership data is recorded. System-wide allocations are performed using root chunks, so there is no additional memory overhead. To implement a fast reparenting of percpu memory on memcg removal, we don't store mem_cgroup pointers directly: instead we use obj_cgroup API, introduced for slab accounting. [akpm@linux-foundation.org: fix CONFIG_MEMCG_KMEM=n build errors and warning] [akpm@linux-foundation.org: move unreachable code, per Roman] [cuibixuan@huawei.com: mm/percpu: fix 'defined but not used' warning] Link: http://lkml.kernel.org/r/6d41b939-a741-b521-a7a2-e7296ec16219@huawei.com Signed-off-by: Roman Gushchin Signed-off-by: Bixuan Cui Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200623184515.4132564-3-guro@fb.com Signed-off-by: Linus Torvalds --- mm/percpu-internal.h | 55 ++++++++++++++- mm/percpu-km.c | 5 +- mm/percpu-stats.c | 36 ++++++---- mm/percpu-vm.c | 5 +- mm/percpu.c | 185 +++++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 246 insertions(+), 40 deletions(-) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 0468ba500bd4..18b768ac7dca 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -5,6 +5,25 @@ #include #include +/* + * There are two chunk types: root and memcg-aware. + * Chunks of each type have separate slots list. + * + * Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is + * used to store memcg membership data of a percpu object. Obj_cgroups are + * ref-counted pointers to a memory cgroup with an ability to switch dynamically + * to the parent memory cgroup. This allows to reclaim a deleted memory cgroup + * without reclaiming of all outstanding objects, which hold a reference at it. + */ +enum pcpu_chunk_type { + PCPU_CHUNK_ROOT, +#ifdef CONFIG_MEMCG_KMEM + PCPU_CHUNK_MEMCG, +#endif + PCPU_NR_CHUNK_TYPES, + PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES +}; + /* * pcpu_block_md is the metadata block struct. * Each chunk's bitmap is split into a number of full blocks. @@ -54,6 +73,9 @@ struct pcpu_chunk { int end_offset; /* additional area required to have the region end page aligned */ +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup **obj_cgroups; /* vector of object cgroups */ +#endif int nr_pages; /* # of pages served by this chunk */ int nr_populated; /* # of populated pages */ @@ -63,7 +85,7 @@ struct pcpu_chunk { extern spinlock_t pcpu_lock; -extern struct list_head *pcpu_slot; +extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; extern int pcpu_nr_empty_pop_pages; @@ -106,6 +128,37 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) return pcpu_nr_pages_to_map_bits(chunk->nr_pages); } +#ifdef CONFIG_MEMCG_KMEM +static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) +{ + if (chunk->obj_cgroups) + return PCPU_CHUNK_MEMCG; + return PCPU_CHUNK_ROOT; +} + +static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) +{ + return chunk_type == PCPU_CHUNK_MEMCG; +} + +#else +static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) +{ + return PCPU_CHUNK_ROOT; +} + +static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) +{ + return false; +} +#endif + +static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type) +{ + return &pcpu_chunk_lists[pcpu_nr_slots * + pcpu_is_memcg_chunk(chunk_type)]; +} + #ifdef CONFIG_PERCPU_STATS #include diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 20d2b69a13b0..35c9941077ee 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -44,7 +44,8 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, /* nada */ } -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; struct pcpu_chunk *chunk; @@ -52,7 +53,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) unsigned long flags; int i; - chunk = pcpu_alloc_chunk(gfp); + chunk = pcpu_alloc_chunk(type, gfp); if (!chunk) return NULL; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 32558063c3f9..c8400a2adbc2 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -34,11 +34,15 @@ static int find_max_nr_alloc(void) { struct pcpu_chunk *chunk; int slot, max_nr_alloc; + enum pcpu_chunk_type type; max_nr_alloc = 0; - for (slot = 0; slot < pcpu_nr_slots; slot++) - list_for_each_entry(chunk, &pcpu_slot[slot], list) - max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc); + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + for (slot = 0; slot < pcpu_nr_slots; slot++) + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], + list) + max_nr_alloc = max(max_nr_alloc, + chunk->nr_alloc); return max_nr_alloc; } @@ -129,6 +133,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, P("cur_min_alloc", cur_min_alloc); P("cur_med_alloc", cur_med_alloc); P("cur_max_alloc", cur_max_alloc); +#ifdef CONFIG_MEMCG_KMEM + P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))); +#endif seq_putc(m, '\n'); } @@ -137,6 +144,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) struct pcpu_chunk *chunk; int slot, max_nr_alloc; int *buffer; + enum pcpu_chunk_type type; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -202,18 +210,18 @@ alloc_buffer: chunk_map_stats(m, pcpu_reserved_chunk, buffer); } - for (slot = 0; slot < pcpu_nr_slots; slot++) { - list_for_each_entry(chunk, &pcpu_slot[slot], list) { - if (chunk == pcpu_first_chunk) { - seq_puts(m, "Chunk: <- First Chunk\n"); - chunk_map_stats(m, chunk, buffer); - - - } else { - seq_puts(m, "Chunk:\n"); - chunk_map_stats(m, chunk, buffer); + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) { + for (slot = 0; slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], + list) { + if (chunk == pcpu_first_chunk) { + seq_puts(m, "Chunk: <- First Chunk\n"); + chunk_map_stats(m, chunk, buffer); + } else { + seq_puts(m, "Chunk:\n"); + chunk_map_stats(m, chunk, buffer); + } } - } } diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index a2b395acef89..e46f7a6917f9 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -328,12 +328,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, pcpu_free_pages(chunk, pages, page_start, page_end); } -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp) { struct pcpu_chunk *chunk; struct vm_struct **vms; - chunk = pcpu_alloc_chunk(gfp); + chunk = pcpu_alloc_chunk(type, gfp); if (!chunk) return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index 5e9eefdce21f..dc1a213293aa 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -37,9 +37,14 @@ * takes care of normal allocations. * * The allocator organizes chunks into lists according to free size and - * tries to allocate from the fullest chunk first. Each chunk is managed - * by a bitmap with metadata blocks. The allocation map is updated on - * every allocation and free to reflect the current state while the boundary + * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT + * flag should be passed. All memcg-aware allocations are sharing one set + * of chunks and all unaccounted allocations and allocations performed + * by processes belonging to the root memory cgroup are using the second set. + * + * The allocator tries to allocate from the fullest chunk first. Each chunk + * is managed by a bitmap with metadata blocks. The allocation map is updated + * on every allocation and free to reflect the current state while the boundary * map is only updated on allocation. Each metadata block contains * information to help mitigate the need to iterate over large portions * of the bitmap. The reverse mapping from page to chunk is stored in @@ -81,6 +86,7 @@ #include #include #include +#include #include #include @@ -160,7 +166,7 @@ struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ -struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ +struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ /* chunks which need their map areas extended, protected by pcpu_lock */ static LIST_HEAD(pcpu_map_extend_chunks); @@ -500,6 +506,9 @@ static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, bool move_front) { if (chunk != pcpu_reserved_chunk) { + struct list_head *pcpu_slot; + + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); if (move_front) list_move(&chunk->list, &pcpu_slot[slot]); else @@ -1341,6 +1350,10 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, panic("%s: Failed to allocate %zu bytes\n", __func__, alloc_size); +#ifdef CONFIG_MEMCG_KMEM + /* first chunk isn't memcg-aware */ + chunk->obj_cgroups = NULL; +#endif pcpu_init_md_blocks(chunk); /* manage populated page bitmap */ @@ -1380,7 +1393,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, return chunk; } -static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp) { struct pcpu_chunk *chunk; int region_bits; @@ -1408,6 +1421,16 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) if (!chunk->md_blocks) goto md_blocks_fail; +#ifdef CONFIG_MEMCG_KMEM + if (pcpu_is_memcg_chunk(type)) { + chunk->obj_cgroups = + pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * + sizeof(struct obj_cgroup *), gfp); + if (!chunk->obj_cgroups) + goto objcg_fail; + } +#endif + pcpu_init_md_blocks(chunk); /* init metadata */ @@ -1415,6 +1438,10 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) return chunk; +#ifdef CONFIG_MEMCG_KMEM +objcg_fail: + pcpu_mem_free(chunk->md_blocks); +#endif md_blocks_fail: pcpu_mem_free(chunk->bound_map); bound_map_fail: @@ -1429,6 +1456,9 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; +#ifdef CONFIG_MEMCG_KMEM + pcpu_mem_free(chunk->obj_cgroups); +#endif pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); @@ -1505,7 +1535,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int page_start, int page_end, gfp_t gfp); static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int page_start, int page_end); -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp); static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); static struct page *pcpu_addr_to_page(void *addr); static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); @@ -1547,6 +1578,77 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); } +#ifdef CONFIG_MEMCG_KMEM +static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, + struct obj_cgroup **objcgp) +{ + struct obj_cgroup *objcg; + + if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) || + memcg_kmem_bypass()) + return PCPU_CHUNK_ROOT; + + objcg = get_obj_cgroup_from_current(); + if (!objcg) + return PCPU_CHUNK_ROOT; + + if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { + obj_cgroup_put(objcg); + return PCPU_FAIL_ALLOC; + } + + *objcgp = objcg; + return PCPU_CHUNK_MEMCG; +} + +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, + struct pcpu_chunk *chunk, int off, + size_t size) +{ + if (!objcg) + return; + + if (chunk) { + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; + } else { + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); + obj_cgroup_put(objcg); + } +} + +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ + struct obj_cgroup *objcg; + + if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))) + return; + + objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; + + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); + + obj_cgroup_put(objcg); +} + +#else /* CONFIG_MEMCG_KMEM */ +static enum pcpu_chunk_type +pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp) +{ + return PCPU_CHUNK_ROOT; +} + +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, + struct pcpu_chunk *chunk, int off, + size_t size) +{ +} + +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ + /** * pcpu_alloc - the percpu allocator * @size: size of area to allocate in bytes @@ -1568,6 +1670,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t pcpu_gfp; bool is_atomic; bool do_warn; + enum pcpu_chunk_type type; + struct list_head *pcpu_slot; + struct obj_cgroup *objcg = NULL; static int warn_limit = 10; struct pcpu_chunk *chunk, *next; const char *err; @@ -1602,16 +1707,23 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, return NULL; } + type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg); + if (unlikely(type == PCPU_FAIL_ALLOC)) + return NULL; + pcpu_slot = pcpu_chunk_list(type); + if (!is_atomic) { /* * pcpu_balance_workfn() allocates memory under this mutex, * and it may wait for memory reclaim. Allow current task * to become OOM victim, in case of memory pressure. */ - if (gfp & __GFP_NOFAIL) + if (gfp & __GFP_NOFAIL) { mutex_lock(&pcpu_alloc_mutex); - else if (mutex_lock_killable(&pcpu_alloc_mutex)) + } else if (mutex_lock_killable(&pcpu_alloc_mutex)) { + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); return NULL; + } } spin_lock_irqsave(&pcpu_lock, flags); @@ -1666,7 +1778,7 @@ restart: } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(pcpu_gfp); + chunk = pcpu_create_chunk(type, pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1723,6 +1835,8 @@ area_found: trace_percpu_alloc_percpu(reserved, is_atomic, size, align, chunk->base_addr, off, ptr); + pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); + return ptr; fail_unlock: @@ -1744,6 +1858,9 @@ fail: } else { mutex_unlock(&pcpu_alloc_mutex); } + + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); + return NULL; } @@ -1803,8 +1920,8 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) } /** - * pcpu_balance_workfn - manage the amount of free chunks and populated pages - * @work: unused + * __pcpu_balance_workfn - manage the amount of free chunks and populated pages + * @type: chunk type * * Reclaim all fully free chunks except for the first one. This is also * responsible for maintaining the pool of empty populated pages. However, @@ -1813,11 +1930,12 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) * allocation causes the failure as it is possible that requests can be * serviced from already backed regions. */ -static void pcpu_balance_workfn(struct work_struct *work) +static void __pcpu_balance_workfn(enum pcpu_chunk_type type) { /* gfp flags passed to underlying allocators */ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; LIST_HEAD(to_free); + struct list_head *pcpu_slot = pcpu_chunk_list(type); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; int slot, nr_to_pop, ret; @@ -1915,7 +2033,7 @@ retry_pop: if (nr_to_pop) { /* ran out of chunks to populate, create a new one and retry */ - chunk = pcpu_create_chunk(gfp); + chunk = pcpu_create_chunk(type, gfp); if (chunk) { spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); @@ -1927,6 +2045,20 @@ retry_pop: mutex_unlock(&pcpu_alloc_mutex); } +/** + * pcpu_balance_workfn - manage the amount of free chunks and populated pages + * @work: unused + * + * Call __pcpu_balance_workfn() for each chunk type. + */ +static void pcpu_balance_workfn(struct work_struct *work) +{ + enum pcpu_chunk_type type; + + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + __pcpu_balance_workfn(type); +} + /** * free_percpu - free percpu area * @ptr: pointer to area to free @@ -1941,8 +2073,9 @@ void free_percpu(void __percpu *ptr) void *addr; struct pcpu_chunk *chunk; unsigned long flags; - int off; + int size, off; bool need_balance = false; + struct list_head *pcpu_slot; if (!ptr) return; @@ -1956,7 +2089,11 @@ void free_percpu(void __percpu *ptr) chunk = pcpu_chunk_addr_search(addr); off = addr - chunk->base_addr; - pcpu_free_area(chunk, off); + size = pcpu_free_area(chunk, off); + + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); + + pcpu_memcg_free_hook(chunk, off, size); /* if there are more than one fully free chunks, wake up grim reaper */ if (chunk->free_bytes == pcpu_unit_size) { @@ -2267,6 +2404,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, int map_size; unsigned long tmp_addr; size_t alloc_size; + enum pcpu_chunk_type type; #define PCPU_SETUP_BUG_ON(cond) do { \ if (unlikely(cond)) { \ @@ -2384,13 +2522,18 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, * empty chunks. */ pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; - pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]), - SMP_CACHE_BYTES); - if (!pcpu_slot) + pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots * + sizeof(pcpu_chunk_lists[0]) * + PCPU_NR_CHUNK_TYPES, + SMP_CACHE_BYTES); + if (!pcpu_chunk_lists) panic("%s: Failed to allocate %zu bytes\n", __func__, - pcpu_nr_slots * sizeof(pcpu_slot[0])); - for (i = 0; i < pcpu_nr_slots; i++) - INIT_LIST_HEAD(&pcpu_slot[i]); + pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) * + PCPU_NR_CHUNK_TYPES); + + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + for (i = 0; i < pcpu_nr_slots; i++) + INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]); /* * The end of the static region needs to be aligned with the -- cgit From 772616b031f06e05846488b01dab46a7c832da13 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:21 -0700 Subject: mm: memcg/percpu: per-memcg percpu memory statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Percpu memory can represent a noticeable chunk of the total memory consumption, especially on big machines with many CPUs. Let's track percpu memory usage for each memcg and display it in memory.stat. A percpu allocation is usually scattered over multiple pages (and nodes), and can be significantly smaller than a page. So let's add a byte-sized counter on the memcg level: MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs can be perfectly reused for percpu case. [guro@fb.com: v3] Link: http://lkml.kernel.org/r/20200623184515.4132564-4-guro@fb.com Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200608230819.832349-4-guro@fb.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/cgroup-v2.rst | 4 ++++ include/linux/memcontrol.h | 8 ++++++++ mm/memcontrol.c | 4 +++- mm/percpu.c | 10 ++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index fa4018afa5a4..6be43781ec7f 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1274,6 +1274,10 @@ PAGE_SIZE multiple when read back. Amount of memory used for storing in-kernel data structures. + percpu + Amount of memory used for storing per-cpu kernel + data structures. + sock Amount of memory used in network transmission buffers diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1bb49b600310..2c2d301eac33 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,7 @@ struct kmem_cache; enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, + MEMCG_PERCPU_B, MEMCG_NR_STAT, }; @@ -339,6 +340,13 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +static __always_inline bool memcg_stat_item_in_bytes(int idx) +{ + if (idx == MEMCG_PERCPU_B) + return true; + return vmstat_item_in_bytes(idx); +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8d9ceea7fe4d..36d5300f9b69 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -781,7 +781,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) if (mem_cgroup_disabled()) return; - if (vmstat_item_in_bytes(idx)) + if (memcg_stat_item_in_bytes(idx)) threshold <<= PAGE_SHIFT; x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); @@ -1488,6 +1488,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "slab %llu\n", (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B))); + seq_buf_printf(&s, "percpu %llu\n", + (u64)memcg_page_state(memcg, MEMCG_PERCPU_B)); seq_buf_printf(&s, "sock %llu\n", (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE); diff --git a/mm/percpu.c b/mm/percpu.c index dc1a213293aa..f4709629e6de 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1610,6 +1610,11 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, if (chunk) { chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; + + rcu_read_lock(); + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, + size * num_possible_cpus()); + rcu_read_unlock(); } else { obj_cgroup_uncharge(objcg, size * num_possible_cpus()); obj_cgroup_put(objcg); @@ -1628,6 +1633,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) obj_cgroup_uncharge(objcg, size * num_possible_cpus()); + rcu_read_lock(); + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, + -(size * num_possible_cpus())); + rcu_read_unlock(); + obj_cgroup_put(objcg); } -- cgit From 3e38e0aaca9eafb12b1c4b731d1c10975cbe7974 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:25 -0700 Subject: mm: memcg: charge memcg percpu memory to the parent cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory cgroups are using large chunks of percpu memory to store vmstat data. Yet this memory is not accounted at all, so in the case when there are many (dying) cgroups, it's not exactly clear where all the memory is. Because the size of memory cgroup internal structures can dramatically exceed the size of object or page which is pinning it in the memory, it's not a good idea to simply ignore it. It actually breaks the isolation between cgroups. Let's account the consumed percpu memory to the parent cgroup. [guro@fb.com: add WARN_ON_ONCE()s, per Johannes] Link: http://lkml.kernel.org/r/20200811170611.GB1507044@carbon.DHCP.thefacebook.com Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200623184515.4132564-5-guro@fb.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36d5300f9b69..f1fd265b9f9e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5131,13 +5131,18 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { kfree(pn); return 1; } - pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); + pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_cpu) { free_percpu(pn->lruvec_stat_local); kfree(pn); @@ -5211,11 +5216,16 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) goto fail; - memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); + memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_percpu) goto fail; @@ -5264,7 +5274,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg; long error = -ENOMEM; + memalloc_use_memcg(parent); memcg = mem_cgroup_alloc(); + memalloc_unuse_memcg(); if (IS_ERR(memcg)) return ERR_CAST(memcg); -- cgit From 90631e1dea55bfc14a8ee8c594aa136b243d4c88 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:29 -0700 Subject: kselftests: cgroup: add perpcu memory accounting test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a simple test to check the percpu memory accounting. The test creates a cgroup tree with 1000 child cgroups and checks values of memory.current and memory.stat::percpu. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Cc: Christoph Lameter Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Shakeel Butt Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Michal Koutný Cc: Bixuan Cui Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200608230819.832349-6-guro@fb.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/cgroup/test_kmem.c | 70 +++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 5224dae216e5..0941aa16157e 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -18,6 +18,15 @@ #include "cgroup_util.h" +/* + * Memory cgroup charging and vmstat data aggregation is performed using + * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum + * discrepancy between charge and vmstat entries is number of cpus multiplied + * by 32 pages multiplied by 2. + */ +#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) + + static int alloc_dcache(const char *cgroup, void *arg) { unsigned long i; @@ -180,7 +189,7 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; sum = slab + anon + file + kernel_stack; - if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) { + if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); @@ -331,6 +340,64 @@ cleanup: return ret; } +/* + * This test creates a sub-tree with 1000 memory cgroups. + * Then it checks that the memory.current on the parent level + * is greater than 0 and approximates matches the percpu value + * from memory.stat. + */ +static int test_percpu_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + long current, percpu; + int i; + + parent = cg_name(root, "percpu_basic_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) + goto cleanup_children; + + free(child); + } + + current = cg_read_long(parent, "memory.current"); + percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + + if (current > 0 && percpu > 0 && abs(current - percpu) < + MAX_VMSTAT_ERROR) + ret = KSFT_PASS; + else + printf("memory.current %ld\npercpu %ld\n", + current, percpu); + +cleanup_children: + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + cg_destroy(child); + free(child); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + #define T(x) { x, #x } struct kmem_test { int (*fn)(const char *root); @@ -341,6 +408,7 @@ struct kmem_test { T(test_kmem_proc_kpagecgroup), T(test_kmem_kernel_stacks), T(test_kmem_dead_cgroups), + T(test_percpu_basic), }; #undef T -- cgit From 8ca39e6874f812a393bb66d9fdbb7598d5f0451c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 11 Aug 2020 18:30:32 -0700 Subject: mm/hugetlb: add mempolicy check in the reservation routine In the reservation routine, we only check whether the cpuset meets the memory allocation requirements. But we ignore the mempolicy of MPOL_BIND case. If someone mmap hugetlb succeeds, but the subsequent memory allocation may fail due to mempolicy restrictions and receives the SIGBUS signal. This can be reproduced by the follow steps. 1) Compile the test case. cd tools/testing/selftests/vm/ gcc map_hugetlb.c -o map_hugetlb 2) Pre-allocate huge pages. Suppose there are 2 numa nodes in the system. Each node will pre-allocate one huge page. echo 2 > /proc/sys/vm/nr_hugepages 3) Run test case(mmap 4MB). We receive the SIGBUS signal. numactl --membind=3D0 ./map_hugetlb 4 With this patch applied, the mmap will fail in the step 3) and throw "mmap: Cannot allocate memory". [akpm@linux-foundation.org: include sched.h for `current'] Reported-by: Jianchao Guo Suggested-by: Michal Hocko Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: David Rientjes Cc: Mel Gorman Cc: Michel Lespinasse Cc: Baoquan He Link: http://lkml.kernel.org/r/20200728034938.14993-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 16 +++++++++++++++- mm/hugetlb.c | 22 ++++++++++++++++++---- mm/mempolicy.c | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ea9c15b60a96..5f1648c23e29 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -6,7 +6,7 @@ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 - +#include #include #include #include @@ -152,6 +152,15 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); +extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + struct mempolicy *mpol = get_task_policy(current); + + return policy_nodemask(gfp, mpol); +} + extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -281,5 +290,10 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, static inline void mpol_put_task_policy(struct task_struct *task) { } + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + return NULL; +} #endif /* CONFIG_NUMA */ #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e52c878940bb..dffafb5bf2ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3458,13 +3458,21 @@ static int __init default_hugepagesz_setup(char *s) } __setup("default_hugepagesz=", default_hugepagesz_setup); -static unsigned int cpuset_mems_nr(unsigned int *array) +static unsigned int allowed_mems_nr(struct hstate *h) { int node; unsigned int nr = 0; + nodemask_t *mpol_allowed; + unsigned int *array = h->free_huge_pages_node; + gfp_t gfp_mask = htlb_alloc_mask(h); + + mpol_allowed = policy_nodemask_current(gfp_mask); - for_each_node_mask(node, cpuset_current_mems_allowed) - nr += array[node]; + for_each_node_mask(node, cpuset_current_mems_allowed) { + if (!mpol_allowed || + (mpol_allowed && node_isset(node, *mpol_allowed))) + nr += array[node]; + } return nr; } @@ -3643,12 +3651,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta) * we fall back to check against current free page availability as * a best attempt and hopefully to minimize the impact of changing * semantics that cpuset has. + * + * Apart from cpuset, we also have memory policy mechanism that + * also determines from which node the kernel will allocate memory + * in a NUMA system. So similar to cpuset, we also should consider + * the memory policy of the current task. Similar to the description + * above. */ if (delta > 0) { if (gather_surplus_pages(h, delta) < 0) goto out; - if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { + if (delta > allowed_mems_nr(h)) { return_unused_surplus_pages(h, delta); goto out; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b9e85d467352..7af44d7cdd11 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1890,7 +1890,7 @@ static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) * Return a nodemask representing a mempolicy for filtering nodes for * page allocation */ -static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) +nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) { /* Lower zones don't get a nodemask applied for MPOL_BIND */ if (unlikely(policy->mode == MPOL_BIND) && -- cgit From ccc5dc67340c109e624e07e02790e9fbdec900d6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:36 -0700 Subject: mm/vmscan: make active/inactive ratio as 1:1 for anon lru Patch series "workingset protection/detection on the anonymous LRU list", v7. * PROBLEM In current implementation, newly created or swap-in anonymous page is started on the active list. Growing the active list results in rebalancing active/inactive list so old pages on the active list are demoted to the inactive list. Hence, hot page on the active list isn't protected at all. Following is an example of this situation. Assume that 50 hot pages on active list and system can contain total 100 pages. Numbers denote the number of pages on active/inactive list (active | inactive). (h) stands for hot pages and (uo) stands for used-once pages. 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(uo) | 50(h) 3. workload: another 50 newly created (used-once) pages 50(uo) | 50(uo), swap-out 50(h) As we can see, hot pages are swapped-out and it would cause swap-in later. * SOLUTION Since this is what we want to avoid, this patchset implements workingset protection. Like as the file LRU list, newly created or swap-in anonymous page is started on the inactive list. Also, like as the file LRU list, if enough reference happens, the page will be promoted. This simple modification changes the above example as following. 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(h) | 50(uo) 3. workload: another 50 newly created (used-once) pages 50(h) | 50(uo), swap-out 50(uo) hot pages remains in the active list. :) * EXPERIMENT I tested this scenario on my test bed and confirmed that this problem happens on current implementation. I also checked that it is fixed by this patchset. * SUBJECT workingset detection * PROBLEM Later part of the patchset implements the workingset detection for the anonymous LRU list. There is a corner case that workingset protection could cause thrashing. If we can avoid thrashing by workingset detection, we can get the better performance. Following is an example of thrashing due to the workingset protection. 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (will be hot) pages 50(h) | 50(wh) 3. workload: another 50 newly created (used-once) pages 50(h) | 50(uo), swap-out 50(wh) 4. workload: 50 (will be hot) pages 50(h) | 50(wh), swap-in 50(wh) 5. workload: another 50 newly created (used-once) pages 50(h) | 50(uo), swap-out 50(wh) 6. repeat 4, 5 Without workingset detection, this kind of workload cannot be promoted and thrashing happens forever. * SOLUTION Therefore, this patchset implements workingset detection. All the infrastructure for workingset detecion is already implemented, so there is not much work to do. First, extend workingset detection code to deal with the anonymous LRU list. Then, make swap cache handles the exceptional value for the shadow entry. Lastly, install/retrieve the shadow value into/from the swap cache and check the refault distance. * EXPERIMENT I made a test program to imitates above scenario and confirmed that problem exists. Then, I checked that this patchset fixes it. My test setup is a virtual machine with 8 cpus and 6100MB memory. But, the amount of the memory that the test program can use is about 280 MB. This is because the system uses large ram-backed swap and large ramdisk to capture the trace. Test scenario is like as below. 1. allocate cold memory (512MB) 2. allocate hot-1 memory (96MB) 3. activate hot-1 memory (96MB) 4. allocate another hot-2 memory (96MB) 5. access cold memory (128MB) 6. access hot-2 memory (96MB) 7. repeat 5, 6 Since hot-1 memory (96MB) is on the active list, the inactive list can contains roughly 190MB pages. hot-2 memory's re-access interval (96+128 MB) is more 190MB, so it cannot be promoted without workingset detection and swap-in/out happens repeatedly. With this patchset, workingset detection works and promotion happens. Therefore, swap-in/out occurs less. Here is the result. (average of 5 runs) type swap-in swap-out base 863240 989945 patch 681565 809273 As we can see, patched kernel do less swap-in/out. * OVERALL TEST (ebizzy using modified random function) ebizzy is the test program that main thread allocates lots of memory and child threads access them randomly during the given times. Swap-in will happen if allocated memory is larger than the system memory. The random function that represents the zipf distribution is used to make hot/cold memory. Hot/cold ratio is controlled by the parameter. If the parameter is high, hot memory is accessed much larger than cold one. If the parameter is low, the number of access on each memory would be similar. I uses various parameters in order to show the effect of patchset on various hot/cold ratio workload. My test setup is a virtual machine with 8 cpus, 1024 MB memory and 5120 MB ram swap. Result format is as following. param: 1-1024-0.1 - 1 (number of thread) - 1024 (allocated memory size, MB) - 0.1 (zipf distribution alpha, 0.1 works like as roughly uniform random, 1.3 works like as small portion of memory is hot and the others are cold) pswpin: smaller is better std: standard deviation improvement: negative is better * single thread param pswpin std improvement base 1-1024.0-0.1 14101983.40 79441.19 prot 1-1024.0-0.1 14065875.80 136413.01 ( -0.26 ) detect 1-1024.0-0.1 13910435.60 100804.82 ( -1.36 ) base 1-1024.0-0.7 7998368.80 43469.32 prot 1-1024.0-0.7 7622245.80 88318.74 ( -4.70 ) detect 1-1024.0-0.7 7618515.20 59742.07 ( -4.75 ) base 1-1024.0-1.3 1017400.80 38756.30 prot 1-1024.0-1.3 940464.60 29310.69 ( -7.56 ) detect 1-1024.0-1.3 945511.40 24579.52 ( -7.07 ) base 1-1280.0-0.1 22895541.40 50016.08 prot 1-1280.0-0.1 22860305.40 51952.37 ( -0.15 ) detect 1-1280.0-0.1 22705565.20 93380.35 ( -0.83 ) base 1-1280.0-0.7 13717645.60 46250.65 prot 1-1280.0-0.7 12935355.80 64754.43 ( -5.70 ) detect 1-1280.0-0.7 13040232.00 63304.00 ( -4.94 ) base 1-1280.0-1.3 1654251.40 4159.68 prot 1-1280.0-1.3 1522680.60 33673.50 ( -7.95 ) detect 1-1280.0-1.3 1599207.00 70327.89 ( -3.33 ) base 1-1536.0-0.1 31621775.40 31156.28 prot 1-1536.0-0.1 31540355.20 62241.36 ( -0.26 ) detect 1-1536.0-0.1 31420056.00 123831.27 ( -0.64 ) base 1-1536.0-0.7 19620760.60 60937.60 prot 1-1536.0-0.7 18337839.60 56102.58 ( -6.54 ) detect 1-1536.0-0.7 18599128.00 75289.48 ( -5.21 ) base 1-1536.0-1.3 2378142.40 20994.43 prot 1-1536.0-1.3 2166260.60 48455.46 ( -8.91 ) detect 1-1536.0-1.3 2183762.20 16883.24 ( -8.17 ) base 1-1792.0-0.1 40259714.80 90750.70 prot 1-1792.0-0.1 40053917.20 64509.47 ( -0.51 ) detect 1-1792.0-0.1 39949736.40 104989.64 ( -0.77 ) base 1-1792.0-0.7 25704884.40 69429.68 prot 1-1792.0-0.7 23937389.00 79945.60 ( -6.88 ) detect 1-1792.0-0.7 24271902.00 35044.30 ( -5.57 ) base 1-1792.0-1.3 3129497.00 32731.86 prot 1-1792.0-1.3 2796994.40 19017.26 ( -10.62 ) detect 1-1792.0-1.3 2886840.40 33938.82 ( -7.75 ) base 1-2048.0-0.1 48746924.40 50863.88 prot 1-2048.0-0.1 48631954.40 24537.30 ( -0.24 ) detect 1-2048.0-0.1 48509419.80 27085.34 ( -0.49 ) base 1-2048.0-0.7 32046424.40 78624.22 prot 1-2048.0-0.7 29764182.20 86002.26 ( -7.12 ) detect 1-2048.0-0.7 30250315.80 101282.14 ( -5.60 ) base 1-2048.0-1.3 3916723.60 24048.55 prot 1-2048.0-1.3 3490781.60 33292.61 ( -10.87 ) detect 1-2048.0-1.3 3585002.20 44942.04 ( -8.47 ) * multi thread param pswpin std improvement base 8-1024.0-0.1 16219822.60 329474.01 prot 8-1024.0-0.1 15959494.00 654597.45 ( -1.61 ) detect 8-1024.0-0.1 15773790.80 502275.25 ( -2.75 ) base 8-1024.0-0.7 9174107.80 537619.33 prot 8-1024.0-0.7 8571915.00 385230.08 ( -6.56 ) detect 8-1024.0-0.7 8489484.20 364683.00 ( -7.46 ) base 8-1024.0-1.3 1108495.60 83555.98 prot 8-1024.0-1.3 1038906.20 63465.20 ( -6.28 ) detect 8-1024.0-1.3 941817.80 32648.80 ( -15.04 ) base 8-1280.0-0.1 25776114.20 450480.45 prot 8-1280.0-0.1 25430847.00 465627.07 ( -1.34 ) detect 8-1280.0-0.1 25282555.00 465666.55 ( -1.91 ) base 8-1280.0-0.7 15218968.00 702007.69 prot 8-1280.0-0.7 13957947.80 492643.86 ( -8.29 ) detect 8-1280.0-0.7 14158331.20 238656.02 ( -6.97 ) base 8-1280.0-1.3 1792482.80 30512.90 prot 8-1280.0-1.3 1577686.40 34002.62 ( -11.98 ) detect 8-1280.0-1.3 1556133.00 22944.79 ( -13.19 ) base 8-1536.0-0.1 33923761.40 575455.85 prot 8-1536.0-0.1 32715766.20 300633.51 ( -3.56 ) detect 8-1536.0-0.1 33158477.40 117764.51 ( -2.26 ) base 8-1536.0-0.7 20628907.80 303851.34 prot 8-1536.0-0.7 19329511.20 341719.31 ( -6.30 ) detect 8-1536.0-0.7 20013934.00 385358.66 ( -2.98 ) base 8-1536.0-1.3 2588106.40 130769.20 prot 8-1536.0-1.3 2275222.40 89637.06 ( -12.09 ) detect 8-1536.0-1.3 2365008.40 124412.55 ( -8.62 ) base 8-1792.0-0.1 43328279.20 946469.12 prot 8-1792.0-0.1 41481980.80 525690.89 ( -4.26 ) detect 8-1792.0-0.1 41713944.60 406798.93 ( -3.73 ) base 8-1792.0-0.7 27155647.40 536253.57 prot 8-1792.0-0.7 24989406.80 502734.52 ( -7.98 ) detect 8-1792.0-0.7 25524806.40 263237.87 ( -6.01 ) base 8-1792.0-1.3 3260372.80 137907.92 prot 8-1792.0-1.3 2879187.80 63597.26 ( -11.69 ) detect 8-1792.0-1.3 2892962.20 33229.13 ( -11.27 ) base 8-2048.0-0.1 50583989.80 710121.48 prot 8-2048.0-0.1 49599984.40 228782.42 ( -1.95 ) detect 8-2048.0-0.1 50578596.00 660971.66 ( -0.01 ) base 8-2048.0-0.7 33765479.60 812659.55 prot 8-2048.0-0.7 30767021.20 462907.24 ( -8.88 ) detect 8-2048.0-0.7 32213068.80 211884.24 ( -4.60 ) base 8-2048.0-1.3 3941675.80 28436.45 prot 8-2048.0-1.3 3538742.40 76856.08 ( -10.22 ) detect 8-2048.0-1.3 3579397.80 58630.95 ( -9.19 ) As we can see, all the cases show improvement. Especially, test case with zipf distribution 1.3 show more improvements. It means that if there is a hot/cold tendency in anon pages, this patchset works better. This patch (of 6): Current implementation of LRU management for anonymous page has some problems. Most important one is that it doesn't protect the workingset, that is, pages on the active LRU list. Although, this problem will be fixed in the following patchset, the preparation is required and this patch does it. What following patch does is to implement workingset protection. After the following patchset, newly created or swap-in pages will start their lifetime on the inactive list. If inactive list is too small, there is not enough chance to be referenced and the page cannot become the workingset. In order to provide the newly anonymous or swap-in pages enough chance to be referenced again, this patch makes active/inactive LRU ratio as 1:1. This is just a temporary measure. Later patch in the series introduces workingset detection for anonymous LRU that will be used to better decide if pages should start on the active and inactive list. Afterwards this patch is effectively reverted. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Hugh Dickins Cc: Minchan Kim Cc: Mel Gorman Cc: Matthew Wilcox Link: http://lkml.kernel.org/r/1595490560-15117-1-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1595490560-15117-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 72da290b171b..e34fc04b7045 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2208,7 +2208,7 @@ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru) active = lruvec_page_state(lruvec, NR_LRU_BASE + active_lru); gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) + if (gb && is_file_lru(inactive_lru)) inactive_ratio = int_sqrt(10 * gb); else inactive_ratio = 1; -- cgit From b518154e59aab3ad0780a169c5cc84bd4ee4357e Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:40 -0700 Subject: mm/vmscan: protect the workingset on anonymous LRU In current implementation, newly created or swap-in anonymous page is started on active list. Growing active list results in rebalancing active/inactive list so old pages on active list are demoted to inactive list. Hence, the page on active list isn't protected at all. Following is an example of this situation. Assume that 50 hot pages on active list. Numbers denote the number of pages on active/inactive list (active | inactive). 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(uo) | 50(h) 3. workload: another 50 newly created (used-once) pages 50(uo) | 50(uo), swap-out 50(h) This patch tries to fix this issue. Like as file LRU, newly created or swap-in anonymous pages will be inserted to the inactive list. They are promoted to active list if enough reference happens. This simple modification changes the above example as following. 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(h) | 50(uo) 3. workload: another 50 newly created (used-once) pages 50(h) | 50(uo), swap-out 50(uo) As you can see, hot pages on active list would be protected. Note that, this implementation has a drawback that the page cannot be promoted and will be swapped-out if re-access interval is greater than the size of inactive list but less than the size of total(active+inactive). To solve this potential issue, following patch will apply workingset detection similar to the one that's already applied to file LRU. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- kernel/events/uprobes.c | 2 +- mm/huge_memory.c | 2 +- mm/khugepaged.c | 2 +- mm/memory.c | 9 ++++----- mm/migrate.c | 2 +- mm/swap.c | 13 +++++++------ mm/swapfile.c | 2 +- mm/userfaultfd.c | 2 +- mm/vmscan.c | 4 +--- 10 files changed, 19 insertions(+), 21 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7eb59bc552a5..51ec9cdb92c0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -352,7 +352,7 @@ extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void lru_cache_add_active_or_unevictable(struct page *page, +extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 25de10c904e6..49047d479c57 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, if (new_page) { get_page(new_page); page_add_new_anon_rmap(new_page, vma, addr, false); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); } else /* no new page, just dec_mm_counter for old_page */ dec_mm_counter(mm, MM_ANONPAGES); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 206f52b36ffb..863c495776d7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -640,7 +640,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr, true); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b52bd46ad146..15a9af791014 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1173,7 +1173,7 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address, true); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); diff --git a/mm/memory.c b/mm/memory.c index c39a13b09602..6fe8b5b22c57 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2715,7 +2715,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the @@ -3266,10 +3266,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* ksm created a completely new copy */ if (unlikely(page != swapcache && swapcache)) { page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); - activate_page(page); } swap_free(entry); @@ -3414,7 +3413,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); setpte: set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); @@ -3672,7 +3671,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); diff --git a/mm/migrate.c b/mm/migrate.c index d179657f8685..819e55130134 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2830,7 +2830,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, inc_mm_counter(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, addr, false); if (!is_zone_device_page(page)) - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); get_page(page); if (flush) { diff --git a/mm/swap.c b/mm/swap.c index de257c0a89b1..9285e60c7d6e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -476,23 +476,24 @@ void lru_cache_add(struct page *page) EXPORT_SYMBOL(lru_cache_add); /** - * lru_cache_add_active_or_unevictable + * lru_cache_add_inactive_or_unevictable * @page: the page to be added to LRU * @vma: vma in which page is mapped for determining reclaimability * - * Place @page on the active or unevictable LRU list, depending on its + * Place @page on the inactive or unevictable LRU list, depending on its * evictability. Note that if the page is not evictable, it goes * directly back onto it's zone's unevictable list, it does NOT use a * per cpu pagevec. */ -void lru_cache_add_active_or_unevictable(struct page *page, +void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma) { + bool unevictable; + VM_BUG_ON_PAGE(PageLRU(page), page); - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) - SetPageActive(page); - else if (!TestSetPageMlocked(page)) { + unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; + if (unlikely(unevictable) && !TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte diff --git a/mm/swapfile.c b/mm/swapfile.c index 6c26916e95fd..82183432fdd0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1915,7 +1915,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, page_add_anon_rmap(page, vma, addr, false); } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, addr, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } swap_free(entry); /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index b80419320c7d..9a3d451402d7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, inc_mm_counter(dst_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); - lru_cache_add_active_or_unevictable(page, dst_vma); + lru_cache_add_inactive_or_unevictable(page, dst_vma); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); diff --git a/mm/vmscan.c b/mm/vmscan.c index e34fc04b7045..783cd7fdc61a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -998,8 +998,6 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageSwapBacked(page)) - return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table * references from the instantiating fault, so we need @@ -1022,7 +1020,7 @@ static enum page_references page_check_references(struct page *page, /* * Activate file-backed executable pages after first usage. */ - if (vm_flags & VM_EXEC) + if ((vm_flags & VM_EXEC) && !PageSwapBacked(page)) return PAGEREF_ACTIVATE; return PAGEREF_KEEP; -- cgit From 170b04b7ae49634df103810dad67b22cf8a99aa6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:43 -0700 Subject: mm/workingset: prepare the workingset detection infrastructure for anon LRU To prepare the workingset detection for anon LRU, this patch splits workingset event counters for refault, activate and restore into anon and file variants, as well as the refaults counter in struct lruvec. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-4-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 16 +++++++++++----- mm/memcontrol.c | 16 +++++++++++----- mm/vmscan.c | 15 ++++++++++----- mm/vmstat.c | 9 ++++++--- mm/workingset.c | 8 +++++--- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 635a96cd9b1f..efbd95dd3bbf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -173,9 +173,15 @@ enum node_stat_item { NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, - WORKINGSET_REFAULT, - WORKINGSET_ACTIVATE, - WORKINGSET_RESTORE, + WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_FILE, + WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_FILE, + WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. @@ -277,8 +283,8 @@ struct lruvec { unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; - /* Refaults at the time of last reclaim cycle */ - unsigned long refaults; + /* Refaults at the time of last reclaim cycle, anon=0, file=1 */ + unsigned long refaults[2]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_MEMCG diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f1fd265b9f9e..c6c579217855 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1530,12 +1530,18 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "workingset_refault %lu\n", - memcg_page_state(memcg, WORKINGSET_REFAULT)); - seq_buf_printf(&s, "workingset_activate %lu\n", - memcg_page_state(memcg, WORKINGSET_ACTIVATE)); + seq_buf_printf(&s, "workingset_refault_anon %lu\n", + memcg_page_state(memcg, WORKINGSET_REFAULT_ANON)); + seq_buf_printf(&s, "workingset_refault_file %lu\n", + memcg_page_state(memcg, WORKINGSET_REFAULT_FILE)); + seq_buf_printf(&s, "workingset_activate_anon %lu\n", + memcg_page_state(memcg, WORKINGSET_ACTIVATE_ANON)); + seq_buf_printf(&s, "workingset_activate_file %lu\n", + memcg_page_state(memcg, WORKINGSET_ACTIVATE_FILE)); seq_buf_printf(&s, "workingset_restore %lu\n", - memcg_page_state(memcg, WORKINGSET_RESTORE)); + memcg_page_state(memcg, WORKINGSET_RESTORE_ANON)); + seq_buf_printf(&s, "workingset_restore %lu\n", + memcg_page_state(memcg, WORKINGSET_RESTORE_FILE)); seq_buf_printf(&s, "workingset_nodereclaim %lu\n", memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); diff --git a/mm/vmscan.c b/mm/vmscan.c index 783cd7fdc61a..017f323318a3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2683,7 +2683,10 @@ again: if (!sc->force_deactivate) { unsigned long refaults; - if (inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) sc->may_deactivate |= DEACTIVATE_ANON; else sc->may_deactivate &= ~DEACTIVATE_ANON; @@ -2694,8 +2697,8 @@ again: * rid of any stale active pages quickly. */ refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE); - if (refaults != target_lruvec->refaults || + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) sc->may_deactivate |= DEACTIVATE_FILE; else @@ -2972,8 +2975,10 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) unsigned long refaults; target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); - refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE); - target_lruvec->refaults = refaults; + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); + target_lruvec->refaults[0] = refaults; + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE); + target_lruvec->refaults[1] = refaults; } /* diff --git a/mm/vmstat.c b/mm/vmstat.c index 2b866cbab11d..fef03463a0cf 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1167,9 +1167,12 @@ const char * const vmstat_text[] = { "nr_isolated_anon", "nr_isolated_file", "workingset_nodes", - "workingset_refault", - "workingset_activate", - "workingset_restore", + "workingset_refault_anon", + "workingset_refault_file", + "workingset_activate_anon", + "workingset_activate_file", + "workingset_restore_anon", + "workingset_restore_file", "workingset_nodereclaim", "nr_anon_pages", "nr_mapped", diff --git a/mm/workingset.c b/mm/workingset.c index b199726924dd..941bbaa6c262 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -280,6 +281,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) */ void workingset_refault(struct page *page, void *shadow) { + bool file = page_is_file_lru(page); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -346,7 +348,7 @@ void workingset_refault(struct page *page, void *shadow) memcg = page_memcg(page); lruvec = mem_cgroup_lruvec(memcg, pgdat); - inc_lruvec_state(lruvec, WORKINGSET_REFAULT); + inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); /* * Compare the distance to the existing workingset size. We @@ -366,7 +368,7 @@ void workingset_refault(struct page *page, void *shadow) SetPageActive(page); workingset_age_nonresident(lruvec, hpage_nr_pages(page)); - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); /* Page was active prior to eviction */ if (workingset) { @@ -375,7 +377,7 @@ void workingset_refault(struct page *page, void *shadow) spin_lock_irq(&page_pgdat(page)->lru_lock); lru_note_cost_page(page); spin_unlock_irq(&page_pgdat(page)->lru_lock); - inc_lruvec_state(lruvec, WORKINGSET_RESTORE); + inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: rcu_read_unlock(); -- cgit From 3852f6768ede542ed48b9077bedf482c7ecb6327 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:47 -0700 Subject: mm/swapcache: support to handle the shadow entries Workingset detection for anonymous page will be implemented in the following patch and it requires to store the shadow entries into the swapcache. This patch implements an infrastructure to store the shadow entry in the swapcache. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/1595490560-15117-5-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 17 ++++++++++++---- mm/shmem.c | 3 ++- mm/swap_state.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++------ mm/swapfile.c | 2 ++ mm/vmscan.c | 2 +- 5 files changed, 69 insertions(+), 12 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 51ec9cdb92c0..8a4c592698a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -414,9 +414,13 @@ extern struct address_space *swapper_spaces[]; extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); -extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); -extern void __delete_from_swap_cache(struct page *, swp_entry_t entry); +extern int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp); +extern void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow); extern void delete_from_swap_cache(struct page *); +extern void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -570,13 +574,13 @@ static inline int add_to_swap(struct page *page) } static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp_mask) + gfp_t gfp_mask, void **shadowp) { return -1; } static inline void __delete_from_swap_cache(struct page *page, - swp_entry_t entry) + swp_entry_t entry, void *shadow) { } @@ -584,6 +588,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ +} + static inline int page_swapcount(struct page *page) { return 0; diff --git a/mm/shmem.c b/mm/shmem.c index eb6b36d89722..49bde088a7ea 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1434,7 +1434,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) list_add(&info->swaplist, &shmem_swaplist); if (add_to_swap_cache(page, swap, - __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN) == 0) { + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, + NULL) == 0) { spin_lock_irq(&info->lock); shmem_recalc_inode(inode); info->swapped++; diff --git a/mm/swap_state.c b/mm/swap_state.c index e82f4f8b1f63..a29b33c7c236 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -110,12 +110,14 @@ void show_swap_cache_info(void) * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) +int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp) { struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swp_offset(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); unsigned long i, nr = hpage_nr_pages(page); + void *old; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); @@ -125,16 +127,25 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) SetPageSwapCache(page); do { + unsigned long nr_shadows = 0; + xas_lock_irq(&xas); xas_create_range(&xas); if (xas_error(&xas)) goto unlock; for (i = 0; i < nr; i++) { VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); + old = xas_load(&xas); + if (xa_is_value(old)) { + nr_shadows++; + if (shadowp) + *shadowp = old; + } set_page_private(page + i, entry.val + i); xas_store(&xas, page); xas_next(&xas); } + address_space->nrexceptional -= nr_shadows; address_space->nrpages += nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); ADD_CACHE_INFO(add_total, nr); @@ -154,7 +165,8 @@ unlock: * This must be called only on pages that have * been verified to be in the swap cache. */ -void __delete_from_swap_cache(struct page *page, swp_entry_t entry) +void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow) { struct address_space *address_space = swap_address_space(entry); int i, nr = hpage_nr_pages(page); @@ -166,12 +178,14 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(PageWriteback(page), page); for (i = 0; i < nr; i++) { - void *entry = xas_store(&xas, NULL); + void *entry = xas_store(&xas, shadow); VM_BUG_ON_PAGE(entry != page, entry); set_page_private(page + i, 0); xas_next(&xas); } ClearPageSwapCache(page); + if (shadow) + address_space->nrexceptional += nr; address_space->nrpages -= nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); ADD_CACHE_INFO(del_total, nr); @@ -208,7 +222,7 @@ int add_to_swap(struct page *page) * Add it to the swap cache. */ err = add_to_swap_cache(page, entry, - __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); if (err) /* * add_to_swap_cache() doesn't return -EEXIST, so we can safely @@ -246,13 +260,44 @@ void delete_from_swap_cache(struct page *page) struct address_space *address_space = swap_address_space(entry); xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(page, entry); + __delete_from_swap_cache(page, entry, NULL); xa_unlock_irq(&address_space->i_pages); put_swap_page(page, entry); page_ref_sub(page, hpage_nr_pages(page)); } +void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ + unsigned long curr = begin; + void *old; + + for (;;) { + unsigned long nr_shadows = 0; + swp_entry_t entry = swp_entry(type, curr); + struct address_space *address_space = swap_address_space(entry); + XA_STATE(xas, &address_space->i_pages, curr); + + xa_lock_irq(&address_space->i_pages); + xas_for_each(&xas, old, end) { + if (!xa_is_value(old)) + continue; + xas_store(&xas, NULL); + nr_shadows++; + } + address_space->nrexceptional -= nr_shadows; + xa_unlock_irq(&address_space->i_pages); + + /* search the next swapcache until we meet end */ + curr >>= SWAP_ADDRESS_SPACE_SHIFT; + curr++; + curr <<= SWAP_ADDRESS_SPACE_SHIFT; + if (curr > end) + break; + } +} + /* * If we are the only user, then try to free up the swap cache. * @@ -429,7 +474,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, __SetPageSwapBacked(page); /* May fail (-ENOMEM) if XArray node allocation failed. */ - if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) { + if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, NULL)) { put_swap_page(page, entry); goto fail_unlock; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 82183432fdd0..e653eea1eb88 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -696,6 +696,7 @@ static void add_to_avail_list(struct swap_info_struct *p) static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { + unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); @@ -721,6 +722,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_slot_free_notify(si->bdev, offset); offset++; } + clear_shadow_from_swap_cache(si->type, begin, end); } static void set_cluster_next(struct swap_info_struct *si, unsigned long next) diff --git a/mm/vmscan.c b/mm/vmscan.c index 017f323318a3..e84c4dd08c4e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -896,7 +896,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; mem_cgroup_swapout(page, swap); - __delete_from_swap_cache(page, swap); + __delete_from_swap_cache(page, swap, NULL); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); workingset_eviction(page, target_memcg); -- cgit From aae466b0052e1888edd1d7f473d4310d64936196 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:50 -0700 Subject: mm/swap: implement workingset detection for anonymous LRU This patch implements workingset detection for anonymous LRU. All the infrastructure is implemented by the previous patches so this patch just activates the workingset detection by installing/retrieving the shadow entry and adding refault calculation. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ mm/memory.c | 11 ++++------- mm/swap_state.c | 23 ++++++++++++++++++----- mm/vmscan.c | 7 ++++--- mm/workingset.c | 15 +++++++++++---- 5 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 8a4c592698a9..661046994db4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -414,6 +414,7 @@ extern struct address_space *swapper_spaces[]; extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); +extern void *get_shadow_from_swap_cache(swp_entry_t entry); extern int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp, void **shadowp); extern void __delete_from_swap_cache(struct page *page, @@ -573,6 +574,11 @@ static inline int add_to_swap(struct page *page) return 0; } +static inline void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + return NULL; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask, void **shadowp) { diff --git a/mm/memory.c b/mm/memory.c index 6fe8b5b22c57..de311fc7639e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3098,6 +3098,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int locked; int exclusive = 0; vm_fault_t ret = 0; + void *shadow = NULL; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; @@ -3149,13 +3150,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_page; } - /* - * XXX: Move to lru_cache_add() when it - * supports new vs putback - */ - spin_lock_irq(&page_pgdat(page)->lru_lock); - lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); + shadow = get_shadow_from_swap_cache(entry); + if (shadow) + workingset_refault(page, shadow); lru_cache_add(page); swap_readpage(page, true); diff --git a/mm/swap_state.c b/mm/swap_state.c index a29b33c7c236..b73aabdfd35a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -106,6 +106,20 @@ void show_swap_cache_info(void) printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); } +void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + struct address_space *address_space = swap_address_space(entry); + pgoff_t idx = swp_offset(entry); + struct page *page; + + page = find_get_entry(address_space, idx); + if (xa_is_value(page)) + return page; + if (page) + put_page(page); + return NULL; +} + /* * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. @@ -406,6 +420,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, { struct swap_info_struct *si; struct page *page; + void *shadow = NULL; *new_page_allocated = false; @@ -474,7 +489,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, __SetPageSwapBacked(page); /* May fail (-ENOMEM) if XArray node allocation failed. */ - if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, NULL)) { + if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) { put_swap_page(page, entry); goto fail_unlock; } @@ -484,10 +499,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, goto fail_unlock; } - /* XXX: Move to lru_cache_add() when it supports new vs putback */ - spin_lock_irq(&page_pgdat(page)->lru_lock); - lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); + if (shadow) + workingset_refault(page, shadow); /* Caller will initiate read into locked page */ SetPageWorkingset(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index e84c4dd08c4e..66d73fea80e4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -854,6 +854,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, { unsigned long flags; int refcount; + void *shadow = NULL; BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); @@ -896,13 +897,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; mem_cgroup_swapout(page, swap); - __delete_from_swap_cache(page, swap, NULL); + if (reclaimed && !mapping_exiting(mapping)) + shadow = workingset_eviction(page, target_memcg); + __delete_from_swap_cache(page, swap, shadow); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); - workingset_eviction(page, target_memcg); } else { void (*freepage)(struct page *); - void *shadow = NULL; freepage = mapping->a_ops->freepage; /* diff --git a/mm/workingset.c b/mm/workingset.c index 941bbaa6c262..8cbe4e3cbe5c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -353,15 +353,22 @@ void workingset_refault(struct page *page, void *shadow) /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if - * all the memory was available to the page cache. Whether - * cache can compete with anon or not depends on having swap. + * all the memory was available to the workingset. Whether + * workingset competition needs to consider anon or not depends + * on having swap. */ workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); - if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { + if (!file) { workingset_size += lruvec_page_state(eviction_lruvec, - NR_INACTIVE_ANON); + NR_INACTIVE_FILE); + } + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { workingset_size += lruvec_page_state(eviction_lruvec, NR_ACTIVE_ANON); + if (file) { + workingset_size += lruvec_page_state(eviction_lruvec, + NR_INACTIVE_ANON); + } } if (refault_distance > workingset_size) goto out; -- cgit From 4002570c5c585c4c9a889e45dd104ed663257eec Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:54 -0700 Subject: mm/vmscan: restore active/inactive ratio for anonymous LRU Now that workingset detection is implemented for anonymous LRU, we don't need large inactive list to allow detecting frequently accessed pages before they are reclaimed, anymore. This effectively reverts the temporary measure put in by commit "mm/vmscan: make active/inactive ratio as 1:1 for anon lru". Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-7-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 66d73fea80e4..846b7e9b8d2e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2207,7 +2207,7 @@ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru) active = lruvec_page_state(lruvec, NR_LRU_BASE + active_lru); gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb && is_file_lru(inactive_lru)) + if (gb) inactive_ratio = int_sqrt(10 * gb); else inactive_ratio = 1; -- cgit From 471e78cc7687337abd10626196f5addb30b01824 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 11 Aug 2020 18:30:57 -0700 Subject: /proc/PID/smaps: consistent whitespace output format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The keys in smaps output are padded to fixed width with spaces. All except for THPeligible that uses tabs (only since commit c06306696f83 ("mm: thp: fix false negative of shmem vma's THP eligibility")). Unify the output formatting to save time debugging some naïve parsers. (Part of the unification is also aligning FilePmdMapped with others.) Signed-off-by: Michal Koutný Signed-off-by: Andrew Morton Acked-by: Yang Shi Cc: Alexey Dobriyan Cc: Matthew Wilcox Link: http://lkml.kernel.org/r/20200728083207.17531-1-mkoutny@suse.com Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbda4499a859..5066b0251ed8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -786,7 +786,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); - SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); @@ -816,7 +816,7 @@ static int show_smap(struct seq_file *m, void *v) __show_smap(m, &mss, false); - seq_printf(m, "THPeligible: %d\n", + seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); if (arch_pkeys_enabled()) -- cgit From facdaa917c4d5a376d09d25865f5a863f906234a Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 11 Aug 2020 18:31:00 -0700 Subject: mm: proactive compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some applications, we need to allocate almost all memory as hugepages. However, on a running system, higher-order allocations can fail if the memory is fragmented. Linux kernel currently does on-demand compaction as we request more hugepages, but this style of compaction incurs very high latency. Experiments with one-time full memory compaction (followed by hugepage allocations) show that kernel is able to restore a highly fragmented memory state to a fairly compacted memory state within <1 sec for a 32G system. Such data suggests that a more proactive compaction can help us allocate a large fraction of memory as hugepages keeping allocation latencies low. For a more proactive compaction, the approach taken here is to define a new sysctl called 'vm.compaction_proactiveness' which dictates bounds for external fragmentation which kcompactd tries to maintain. The tunable takes a value in range [0, 100], with a default of 20. Note that a previous version of this patch [1] was found to introduce too many tunables (per-order extfrag{low, high}), but this one reduces them to just one sysctl. Also, the new tunable is an opaque value instead of asking for specific bounds of "external fragmentation", which would have been difficult to estimate. The internal interpretation of this opaque value allows for future fine-tuning. Currently, we use a simple translation from this tunable to [low, high] "fragmentation score" thresholds (low=100-proactiveness, high=low+10%). The score for a node is defined as weighted mean of per-zone external fragmentation. A zone's present_pages determines its weight. To periodically check per-node score, we reuse per-node kcompactd threads, which are woken up every 500 milliseconds to check the same. If a node's score exceeds its high threshold (as derived from user-provided proactiveness value), proactive compaction is started until its score reaches its low threshold value. By default, proactiveness is set to 20, which implies threshold values of low=80 and high=90. This patch is largely based on ideas from Michal Hocko [2]. See also the LWN article [3]. Performance data ================ System: x64_64, 1T RAM, 80 CPU threads. Kernel: 5.6.0-rc3 + this patch echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/defrag Before starting the driver, the system was fragmented from a userspace program that allocates all memory and then for each 2M aligned section, frees 3/4 of base pages using munmap. The workload is mainly anonymous userspace pages, which are easy to move around. I intentionally avoided unmovable pages in this test to see how much latency we incur when hugepage allocations hit direct compaction. 1. Kernel hugepage allocation latencies With the system in such a fragmented state, a kernel driver then allocates as many hugepages as possible and measures allocation latency: (all latency values are in microseconds) - With vanilla 5.6.0-rc3 percentile latency –––––––––– ––––––– 5 7894 10 9496 25 12561 30 15295 40 18244 50 21229 60 27556 75 30147 80 31047 90 32859 95 33799 Total 2M hugepages allocated = 383859 (749G worth of hugepages out of 762G total free => 98% of free memory could be allocated as hugepages) - With 5.6.0-rc3 + this patch, with proactiveness=20 sysctl -w vm.compaction_proactiveness=20 percentile latency –––––––––– ––––––– 5 2 10 2 25 3 30 3 40 3 50 4 60 4 75 4 80 4 90 5 95 429 Total 2M hugepages allocated = 384105 (750G worth of hugepages out of 762G total free => 98% of free memory could be allocated as hugepages) 2. JAVA heap allocation In this test, we first fragment memory using the same method as for (1). Then, we start a Java process with a heap size set to 700G and request the heap to be allocated with THP hugepages. We also set THP to madvise to allow hugepage backing of this heap. /usr/bin/time java -Xms700G -Xmx700G -XX:+UseTransparentHugePages -XX:+AlwaysPreTouch The above command allocates 700G of Java heap using hugepages. - With vanilla 5.6.0-rc3 17.39user 1666.48system 27:37.89elapsed - With 5.6.0-rc3 + this patch, with proactiveness=20 8.35user 194.58system 3:19.62elapsed Elapsed time remains around 3:15, as proactiveness is further increased. Note that proactive compaction happens throughout the runtime of these workloads. The situation of one-time compaction, sufficient to supply hugepages for following allocation stream, can probably happen for more extreme proactiveness values, like 80 or 90. In the above Java workload, proactiveness is set to 20. The test starts with a node's score of 80 or higher, depending on the delay between the fragmentation step and starting the benchmark, which gives more-or-less time for the initial round of compaction. As t he benchmark consumes hugepages, node's score quickly rises above the high threshold (90) and proactive compaction starts again, which brings down the score to the low threshold level (80). Repeat. bpftrace also confirms proactive compaction running 20+ times during the runtime of this Java benchmark. kcompactd threads consume 100% of one of the CPUs while it tries to bring a node's score within thresholds. Backoff behavior ================ Above workloads produce a memory state which is easy to compact. However, if memory is filled with unmovable pages, proactive compaction should essentially back off. To test this aspect: - Created a kernel driver that allocates almost all memory as hugepages followed by freeing first 3/4 of each hugepage. - Set proactiveness=40 - Note that proactive_compact_node() is deferred maximum number of times with HPAGE_FRAG_CHECK_INTERVAL_MSEC of wait between each check (=> ~30 seconds between retries). [1] https://patchwork.kernel.org/patch/11098289/ [2] https://lore.kernel.org/linux-mm/20161230131412.GI13301@dhcp22.suse.cz/ [3] https://lwn.net/Articles/817905/ Signed-off-by: Nitin Gupta Signed-off-by: Andrew Morton Tested-by: Oleksandr Natalenko Reviewed-by: Vlastimil Babka Reviewed-by: Khalid Aziz Reviewed-by: Oleksandr Natalenko Cc: Vlastimil Babka Cc: Khalid Aziz Cc: Michal Hocko Cc: Mel Gorman Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Joonsoo Kim Cc: David Rientjes Cc: Nitin Gupta Cc: Oleksandr Natalenko Link: http://lkml.kernel.org/r/20200616204527.19185-1-nigupta@nvidia.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/vm.rst | 15 +++ include/linux/compaction.h | 2 + kernel/sysctl.c | 9 ++ mm/compaction.c | 183 +++++++++++++++++++++++++++++++- mm/internal.h | 1 + mm/vmstat.c | 18 ++++ 6 files changed, 223 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index d997cc3c26d0..4b9d2e8e9142 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -119,6 +119,21 @@ all zones are compacted such that free memory is available in contiguous blocks where possible. This can be important for example in the allocation of huge pages although processes will also directly compact memory as required. +compaction_proactiveness +======================== + +This tunable takes a value in the range [0, 100] with a default value of +20. This tunable determines how aggressively compaction is done in the +background. Setting it to 0 disables proactive compaction. + +Note that compaction has a non-trivial system-wide impact as pages +belonging to different processes are moved around, which could also lead +to latency spikes in unsuspecting applications. The kernel employs +various heuristics to avoid wasting CPU cycles if it detects that +proactive compaction is not being effective. + +Be careful when setting it to extreme values like 100, as that may +cause excessive background compaction activity. compact_unevictable_allowed =========================== diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6fa0eea3f530..7a242d46454e 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,11 +85,13 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; +extern int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; +extern int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f785de3caac0..ab72e52f8a7b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2851,6 +2851,15 @@ static struct ctl_table vm_table[] = { .mode = 0200, .proc_handler = sysctl_compaction_handler, }, + { + .procname = "compaction_proactiveness", + .data = &sysctl_compaction_proactiveness, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &one_hundred, + }, { .procname = "extfrag_threshold", .data = &sysctl_extfrag_threshold, diff --git a/mm/compaction.c b/mm/compaction.c index 86375605faa9..544a98811c82 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -50,6 +50,24 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) #define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) +/* + * Fragmentation score check interval for proactive compaction purposes. + */ +static const int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; + +/* + * Page order with-respect-to which proactive compaction + * calculates external fragmentation, which is used as + * the "fragmentation score" of a node/zone. + */ +#if defined CONFIG_TRANSPARENT_HUGEPAGE +#define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER +#elif defined HUGETLB_PAGE_ORDER +#define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER +#else +#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) +#endif + static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -1857,6 +1875,76 @@ static inline bool is_via_compact_memory(int order) return order == -1; } +static bool kswapd_is_running(pg_data_t *pgdat) +{ + return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING); +} + +/* + * A zone's fragmentation score is the external fragmentation wrt to the + * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value + * in the range [0, 100]. + * + * The scaling factor ensures that proactive compaction focuses on larger + * zones like ZONE_NORMAL, rather than smaller, specialized zones like + * ZONE_DMA32. For smaller zones, the score value remains close to zero, + * and thus never exceeds the high threshold for proactive compaction. + */ +static int fragmentation_score_zone(struct zone *zone) +{ + unsigned long score; + + score = zone->present_pages * + extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); + return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); +} + +/* + * The per-node proactive (background) compaction process is started by its + * corresponding kcompactd thread when the node's fragmentation score + * exceeds the high threshold. The compaction process remains active till + * the node's score falls below the low threshold, or one of the back-off + * conditions is met. + */ +static int fragmentation_score_node(pg_data_t *pgdat) +{ + unsigned long score = 0; + int zoneid; + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone; + + zone = &pgdat->node_zones[zoneid]; + score += fragmentation_score_zone(zone); + } + + return score; +} + +static int fragmentation_score_wmark(pg_data_t *pgdat, bool low) +{ + int wmark_low; + + /* + * Cap the low watermak to avoid excessive compaction + * activity in case a user sets the proactivess tunable + * close to 100 (maximum). + */ + wmark_low = max(100 - sysctl_compaction_proactiveness, 5); + return low ? wmark_low : min(wmark_low + 10, 100); +} + +static bool should_proactive_compact_node(pg_data_t *pgdat) +{ + int wmark_high; + + if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat)) + return false; + + wmark_high = fragmentation_score_wmark(pgdat, false); + return fragmentation_score_node(pgdat) > wmark_high; +} + static enum compact_result __compact_finished(struct compact_control *cc) { unsigned int order; @@ -1883,6 +1971,25 @@ static enum compact_result __compact_finished(struct compact_control *cc) return COMPACT_PARTIAL_SKIPPED; } + if (cc->proactive_compaction) { + int score, wmark_low; + pg_data_t *pgdat; + + pgdat = cc->zone->zone_pgdat; + if (kswapd_is_running(pgdat)) + return COMPACT_PARTIAL_SKIPPED; + + score = fragmentation_score_zone(cc->zone); + wmark_low = fragmentation_score_wmark(pgdat, true); + + if (score > wmark_low) + ret = COMPACT_CONTINUE; + else + ret = COMPACT_SUCCESS; + + goto out; + } + if (is_via_compact_memory(cc->order)) return COMPACT_CONTINUE; @@ -1941,6 +2048,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) } } +out: if (cc->contended || fatal_signal_pending(current)) ret = COMPACT_CONTENDED; @@ -2421,6 +2529,41 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, return rc; } +/* + * Compact all zones within a node till each zone's fragmentation score + * reaches within proactive compaction thresholds (as determined by the + * proactiveness tunable). + * + * It is possible that the function returns before reaching score targets + * due to various back-off conditions, such as, contention on per-node or + * per-zone locks. + */ +static void proactive_compact_node(pg_data_t *pgdat) +{ + int zoneid; + struct zone *zone; + struct compact_control cc = { + .order = -1, + .mode = MIGRATE_SYNC_LIGHT, + .ignore_skip_hint = true, + .whole_zone = true, + .gfp_mask = GFP_KERNEL, + .proactive_compaction = true, + }; + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + cc.zone = zone; + + compact_zone(&cc, NULL); + + VM_BUG_ON(!list_empty(&cc.freepages)); + VM_BUG_ON(!list_empty(&cc.migratepages)); + } +} /* Compact all zones within a node */ static void compact_node(int nid) @@ -2467,6 +2610,13 @@ static void compact_nodes(void) /* The written value is actually unused, all memory is compacted */ int sysctl_compact_memory; +/* + * Tunable for proactive compaction. It determines how + * aggressively the kernel should compact memory in the + * background. It takes values in the range [0, 100]. + */ +int __read_mostly sysctl_compaction_proactiveness = 20; + /* * This is the entry point for compacting all nodes via * /proc/sys/vm/compact_memory @@ -2646,6 +2796,7 @@ static int kcompactd(void *p) { pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; + unsigned int proactive_defer = 0; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); @@ -2661,12 +2812,34 @@ static int kcompactd(void *p) unsigned long pflags; trace_mm_compaction_kcompactd_sleep(pgdat->node_id); - wait_event_freezable(pgdat->kcompactd_wait, - kcompactd_work_requested(pgdat)); + if (wait_event_freezable_timeout(pgdat->kcompactd_wait, + kcompactd_work_requested(pgdat), + msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) { + + psi_memstall_enter(&pflags); + kcompactd_do_work(pgdat); + psi_memstall_leave(&pflags); + continue; + } - psi_memstall_enter(&pflags); - kcompactd_do_work(pgdat); - psi_memstall_leave(&pflags); + /* kcompactd wait timeout */ + if (should_proactive_compact_node(pgdat)) { + unsigned int prev_score, score; + + if (proactive_defer) { + proactive_defer--; + continue; + } + prev_score = fragmentation_score_node(pgdat); + proactive_compact_node(pgdat); + score = fragmentation_score_node(pgdat); + /* + * Defer proactive compaction if the fragmentation + * score did not go down i.e. no progress made. + */ + proactive_defer = score < prev_score ? + 0 : 1 << COMPACT_MAX_DEFER_SHIFT; + } } return 0; diff --git a/mm/internal.h b/mm/internal.h index 9886db20d94f..42cf0b610847 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -239,6 +239,7 @@ struct compact_control { bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ + bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock or sched contention */ bool rescan; /* Rescanning the same pageblock */ diff --git a/mm/vmstat.c b/mm/vmstat.c index fef03463a0cf..f183aa37994e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1096,6 +1096,24 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); } +/* + * Calculates external fragmentation within a zone wrt the given order. + * It is defined as the percentage of pages found in blocks of size + * less than 1 << order. It returns values in range [0, 100]. + */ +int extfrag_for_order(struct zone *zone, unsigned int order) +{ + struct contig_page_info info; + + fill_contig_page_info(zone, order, &info); + if (info.free_pages == 0) + return 0; + + return div_u64((info.free_pages - + (info.free_blocks_suitable << order)) * 100, + info.free_pages); +} + /* Same as __fragmentation index but allocs contig_page_info on stack */ int fragmentation_index(struct zone *zone, unsigned int order) { -- cgit From 25788738eb9ce46fe6a0fd84a3ceef5c795d41f0 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 11 Aug 2020 18:31:04 -0700 Subject: mm: fix compile error due to COMPACTION_HPAGE_ORDER Fix compile error when COMPACTION_HPAGE_ORDER is assigned to HUGETLB_PAGE_ORDER. The correct way to check if this constant is defined is to check for CONFIG_HUGETLBFS. Reported-by: Nathan Chancellor Signed-off-by: Nitin Gupta Signed-off-by: Andrew Morton Tested-by: Nathan Chancellor Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200623064544.25766-1-nigupta@nvidia.com Signed-off-by: Linus Torvalds --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index 544a98811c82..ed8ea1511634 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -62,7 +62,7 @@ static const int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; */ #if defined CONFIG_TRANSPARENT_HUGEPAGE #define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER -#elif defined HUGETLB_PAGE_ORDER +#elif defined CONFIG_HUGETLBFS #define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER #else #define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) -- cgit From d34c0a7599ea8c301bc471dfa1eb2bf2db6752d1 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 11 Aug 2020 18:31:07 -0700 Subject: mm: use unsigned types for fragmentation score Proactive compaction uses per-node/zone "fragmentation score" which is always in range [0, 100], so use unsigned type of these scores as well as for related constants. Signed-off-by: Nitin Gupta Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Vlastimil Babka Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200618010319.13159-1-nigupta@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- kernel/sysctl.c | 2 +- mm/compaction.c | 18 +++++++++--------- mm/vmstat.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 7a242d46454e..25a521d299c1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,13 +85,13 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; -extern int sysctl_compaction_proactiveness; +extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; -extern int extfrag_for_order(struct zone *zone, unsigned int order); +extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ab72e52f8a7b..287862f91717 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2854,7 +2854,7 @@ static struct ctl_table vm_table[] = { { .procname = "compaction_proactiveness", .data = &sysctl_compaction_proactiveness, - .maxlen = sizeof(int), + .maxlen = sizeof(sysctl_compaction_proactiveness), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, diff --git a/mm/compaction.c b/mm/compaction.c index ed8ea1511634..b7d433f1706a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -53,7 +53,7 @@ static inline void count_compact_events(enum vm_event_item item, long delta) /* * Fragmentation score check interval for proactive compaction purposes. */ -static const int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; /* * Page order with-respect-to which proactive compaction @@ -1890,7 +1890,7 @@ static bool kswapd_is_running(pg_data_t *pgdat) * ZONE_DMA32. For smaller zones, the score value remains close to zero, * and thus never exceeds the high threshold for proactive compaction. */ -static int fragmentation_score_zone(struct zone *zone) +static unsigned int fragmentation_score_zone(struct zone *zone) { unsigned long score; @@ -1906,9 +1906,9 @@ static int fragmentation_score_zone(struct zone *zone) * the node's score falls below the low threshold, or one of the back-off * conditions is met. */ -static int fragmentation_score_node(pg_data_t *pgdat) +static unsigned int fragmentation_score_node(pg_data_t *pgdat) { - unsigned long score = 0; + unsigned int score = 0; int zoneid; for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { @@ -1921,17 +1921,17 @@ static int fragmentation_score_node(pg_data_t *pgdat) return score; } -static int fragmentation_score_wmark(pg_data_t *pgdat, bool low) +static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low) { - int wmark_low; + unsigned int wmark_low; /* * Cap the low watermak to avoid excessive compaction * activity in case a user sets the proactivess tunable * close to 100 (maximum). */ - wmark_low = max(100 - sysctl_compaction_proactiveness, 5); - return low ? wmark_low : min(wmark_low + 10, 100); + wmark_low = max(100U - sysctl_compaction_proactiveness, 5U); + return low ? wmark_low : min(wmark_low + 10, 100U); } static bool should_proactive_compact_node(pg_data_t *pgdat) @@ -2615,7 +2615,7 @@ int sysctl_compact_memory; * aggressively the kernel should compact memory in the * background. It takes values in the range [0, 100]. */ -int __read_mostly sysctl_compaction_proactiveness = 20; +unsigned int __read_mostly sysctl_compaction_proactiveness = 20; /* * This is the entry point for compacting all nodes via diff --git a/mm/vmstat.c b/mm/vmstat.c index f183aa37994e..3bb034c99887 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1101,7 +1101,7 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in * It is defined as the percentage of pages found in blocks of size * less than 1 << order. It returns values in range [0, 100]. */ -int extfrag_for_order(struct zone *zone, unsigned int order) +unsigned int extfrag_for_order(struct zone *zone, unsigned int order) { struct contig_page_info info; -- cgit From 860b32729a21e0565585a9e2ecea2e5244d65acd Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 11 Aug 2020 18:31:10 -0700 Subject: mm/compaction: correct the comments of compact_defer_shift There is no compact_defer_limit. It should be compact_defer_shift in use. and add compact_order_failed explanation. Signed-off-by: Alex Shi Signed-off-by: Andrew Morton Reviewed-by: Alexander Duyck Link: http://lkml.kernel.org/r/3bd60e1b-a74e-050d-ade4-6e8f54e00b92@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + mm/compaction.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index efbd95dd3bbf..8379432f4f2f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -536,6 +536,7 @@ struct zone { * On compaction failure, 1< Date: Tue, 11 Aug 2020 18:31:13 -0700 Subject: mm: mempolicy: fix kerneldoc of numa_map_to_online_node() Fix W=1 compile warnings (invalid kerneldoc): mm/mempolicy.c:137: warning: Function parameter or member 'node' not described in 'numa_map_to_online_node' mm/mempolicy.c:137: warning: Excess function parameter 'nid' description in 'numa_map_to_online_node' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200728171109.28687-3-krzk@kernel.org Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7af44d7cdd11..92030ead2506 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -129,7 +129,7 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /** * numa_map_to_online_node - Find closest online node - * @nid: Node id to start the search + * @node: Node id to start the search * * Lookup the next closest node by distance if @nid is not online. */ -- cgit From 4605f057aace9291e80bf9982cc7c8babc917f56 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Tue, 11 Aug 2020 18:31:16 -0700 Subject: mm/mempolicy.c: check parameters first in kernel_get_mempolicy Previous implementatoin calls untagged_addr() before error check, while if the error check failed and return EINVAL, the untagged_addr() call is just useless work. Signed-off-by: Wenchao Hao Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200801090825.5597-1-haowenchao22@gmail.com Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 92030ead2506..25b7e412c20b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1632,11 +1632,11 @@ static int kernel_get_mempolicy(int __user *policy, int pval; nodemask_t nodes; - addr = untagged_addr(addr); - if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; + addr = untagged_addr(addr); + err = do_get_mempolicy(&pval, &nodes, addr, flags); if (err) -- cgit From f3f3416c2234c0a9372316bec5c48cbd58b565c5 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Tue, 11 Aug 2020 18:31:19 -0700 Subject: include/linux/mempolicy.h: fix typo Change "interlave" to "interleave". Signed-off-by: Yanfei Xu Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200810063454.9357-1-yanfei.xu@windriver.com Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f1648c23e29..5f1c74df264d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -28,7 +28,7 @@ struct mm_struct; * the process policy is used. Interrupts ignore the memory policy * of the current process. * - * Locking policy for interlave: + * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. -- cgit From 9066e5cfb73cdbcdbb49e87999482ab615e9fc76 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 11 Aug 2020 18:31:22 -0700 Subject: mm, oom: make the calculation of oom badness more accurate Recently we found an issue on our production environment that when memcg oom is triggered the oom killer doesn't chose the process with largest resident memory but chose the first scanned process. Note that all processes in this memcg have the same oom_score_adj, so the oom killer should chose the process with largest resident memory. Bellow is part of the oom info, which is enough to analyze this issue. [7516987.983223] memory: usage 16777216kB, limit 16777216kB, failcnt 52843037 [7516987.983224] memory+swap: usage 16777216kB, limit 9007199254740988kB, failcnt 0 [7516987.983225] kmem: usage 301464kB, limit 9007199254740988kB, failcnt 0 [...] [7516987.983293] [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name [7516987.983510] [ 5740] 0 5740 257 1 32768 0 -998 pause [7516987.983574] [58804] 0 58804 4594 771 81920 0 -998 entry_point.bas [7516987.983577] [58908] 0 58908 7089 689 98304 0 -998 cron [7516987.983580] [58910] 0 58910 16235 5576 163840 0 -998 supervisord [7516987.983590] [59620] 0 59620 18074 1395 188416 0 -998 sshd [7516987.983594] [59622] 0 59622 18680 6679 188416 0 -998 python [7516987.983598] [59624] 0 59624 1859266 5161 548864 0 -998 odin-agent [7516987.983600] [59625] 0 59625 707223 9248 983040 0 -998 filebeat [7516987.983604] [59627] 0 59627 416433 64239 774144 0 -998 odin-log-agent [7516987.983607] [59631] 0 59631 180671 15012 385024 0 -998 python3 [7516987.983612] [61396] 0 61396 791287 3189 352256 0 -998 client [7516987.983615] [61641] 0 61641 1844642 29089 946176 0 -998 client [7516987.983765] [ 9236] 0 9236 2642 467 53248 0 -998 php_scanner [7516987.983911] [42898] 0 42898 15543 838 167936 0 -998 su [7516987.983915] [42900] 1000 42900 3673 867 77824 0 -998 exec_script_vr2 [7516987.983918] [42925] 1000 42925 36475 19033 335872 0 -998 python [7516987.983921] [57146] 1000 57146 3673 848 73728 0 -998 exec_script_J2p [7516987.983925] [57195] 1000 57195 186359 22958 491520 0 -998 python2 [7516987.983928] [58376] 1000 58376 275764 14402 290816 0 -998 rosmaster [7516987.983931] [58395] 1000 58395 155166 4449 245760 0 -998 rosout [7516987.983935] [58406] 1000 58406 18285584 3967322 37101568 0 -998 data_sim [7516987.984221] oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=3aa16c9482ae3a6f6b78bda68a55d32c87c99b985e0f11331cddf05af6c4d753,mems_allowed=0-1,oom_memcg=/kubepods/podf1c273d3-9b36-11ea-b3df-246e9693c184,task_memcg=/kubepods/podf1c273d3-9b36-11ea-b3df-246e9693c184/1f246a3eeea8f70bf91141eeaf1805346a666e225f823906485ea0b6c37dfc3d,task=pause,pid=5740,uid=0 [7516987.984254] Memory cgroup out of memory: Killed process 5740 (pause) total-vm:1028kB, anon-rss:4kB, file-rss:0kB, shmem-rss:0kB [7516988.092344] oom_reaper: reaped process 5740 (pause), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB We can find that the first scanned process 5740 (pause) was killed, but its rss is only one page. That is because, when we calculate the oom badness in oom_badness(), we always ignore the negtive point and convert all of these negtive points to 1. Now as oom_score_adj of all the processes in this targeted memcg have the same value -998, the points of these processes are all negtive value. As a result, the first scanned process will be killed. The oom_socre_adj (-998) in this memcg is set by kubelet, because it is a a Guaranteed pod, which has higher priority to prevent from being killed by system oom. To fix this issue, we should make the calculation of oom point more accurate. We can achieve it by convert the chosen_point from 'unsigned long' to 'long'. [cai@lca.pw: reported a issue in the previous version] [mhocko@suse.com: fixed the issue reported by Cai] [mhocko@suse.com: add the comment in proc_oom_score()] [laoar.shao@gmail.com: v3] Link: http://lkml.kernel.org/r/1594396651-9931-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Tested-by: Naresh Kamboju Acked-by: Michal Hocko Cc: David Rientjes Cc: Qian Cai Link: http://lkml.kernel.org/r/1594309987-9919-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- fs/proc/base.c | 11 ++++++++++- include/linux/oom.h | 4 ++-- mm/oom_kill.c | 22 ++++++++++------------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index a333caeca291..617db4e0faa0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -551,8 +551,17 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, { unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; + long badness; + + badness = oom_badness(task, totalpages); + /* + * Special case OOM_SCORE_ADJ_MIN for all others scale the + * badness value into [0, 2000] range which we have been + * exporting for a long time so userspace might depend on it. + */ + if (badness != LONG_MIN) + points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; - points = oom_badness(task, totalpages) * 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; diff --git a/include/linux/oom.h b/include/linux/oom.h index c696c265f019..f022f581ac29 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -48,7 +48,7 @@ struct oom_control { /* Used by oom implementation, do not set */ unsigned long totalpages; struct task_struct *chosen; - unsigned long chosen_points; + long chosen_points; /* Used to print the constraint info. */ enum oom_constraint constraint; @@ -107,7 +107,7 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) bool __oom_reap_task_mm(struct mm_struct *mm); -extern unsigned long oom_badness(struct task_struct *p, +long oom_badness(struct task_struct *p, unsigned long totalpages); extern bool out_of_memory(struct oom_control *oc); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d30ce75f23fb..48e0db54d838 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -196,17 +196,17 @@ static bool is_dump_unreclaim_slabs(void) * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) +long oom_badness(struct task_struct *p, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p)) - return 0; + return LONG_MIN; p = find_lock_task_mm(p); if (!p) - return 0; + return LONG_MIN; /* * Do not even consider tasks which are explicitly marked oom @@ -218,7 +218,7 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) test_bit(MMF_OOM_SKIP, &p->mm->flags) || in_vfork(p)) { task_unlock(p); - return 0; + return LONG_MIN; } /* @@ -233,11 +233,7 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) adj *= totalpages / 1000; points += adj; - /* - * Never return 0 for an eligible task regardless of the root bonus and - * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). - */ - return points > 0 ? points : 1; + return points; } static const char * const oom_constraint_text[] = { @@ -310,7 +306,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc) static int oom_evaluate_task(struct task_struct *task, void *arg) { struct oom_control *oc = arg; - unsigned long points; + long points; if (oom_unkillable_task(task)) goto next; @@ -336,12 +332,12 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) { - points = ULONG_MAX; + points = LONG_MAX; goto select; } points = oom_badness(task, oc->totalpages); - if (!points || points < oc->chosen_points) + if (points == LONG_MIN || points < oc->chosen_points) goto next; select: @@ -365,6 +361,8 @@ abort: */ static void select_bad_process(struct oom_control *oc) { + oc->chosen_points = LONG_MIN; + if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { -- cgit From de3f32e1424ca5c751d4cd78cbed6d17ca261c24 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 11 Aug 2020 18:31:25 -0700 Subject: doc, mm: sync up oom_score_adj documentation There are at least two notes in the oom section. The 3% discount for root processes is gone since d46078b28889 ("mm, oom: remove 3% bonus for CAP_SYS_ADMIN processes"). Likewise children of the selected oom victim are not sacrificed since bbbe48029720 ("mm, oom: remove 'prefer children over parent' heuristic") Drop both of them. Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: David Rientjes Cc: Yafang Shao Link: http://lkml.kernel.org/r/20200709062603.18480-1-mhocko@kernel.org Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.rst | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index e024a9efffd8..eb96a440a064 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1633,9 +1633,6 @@ may allocate from based on an estimation of its current memory and swap use. For example, if a task is using all allowed memory, its badness score will be 1000. If it is using half of its allowed memory, its score will be 500. -There is an additional factor included in the badness score: the current memory -and swap usage is discounted by 3% for root processes. - The amount of "allowed" memory depends on the context in which the oom killer was called. If it is due to the memory assigned to the allocating task's cpuset being exhausted, the allowed memory represents the set of mems assigned to that @@ -1671,11 +1668,6 @@ The value of /proc//oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. -Caveat: when a parent task is selected, the oom killer will sacrifice any first -generation children with separate address spaces instead, if possible. This -avoids servers and important system daemons from being killed and loses the -minimal amount of work. - 3.2 /proc//oom_score - Display current oom-killer score ------------------------------------------------------------- -- cgit From b1aa7c9377bdff904efa0fbde1f96e1769730d8a Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 11 Aug 2020 18:31:28 -0700 Subject: doc, mm: clarify /proc//oom_score value range The exported value includes oom_score_adj so the range is no [0, 1000] as described in the previous section but rather [0, 2000]. Mention that fact explicitly. Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: David Rientjes Cc: Yafang Shao Link: http://lkml.kernel.org/r/20200709062603.18480-2-mhocko@kernel.org Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index eb96a440a064..533c79e8d2cd 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1676,6 +1676,9 @@ This file can be used to check the current score used by the oom-killer for any given . Use it together with /proc//oom_score_adj to tune which process should be killed in an out-of-memory situation. +Please note that the exported value includes oom_score_adj so it is +effectively in range [0,2000]. + 3.3 /proc//io - Display the IO accounting fields ------------------------------------------------------- -- cgit From 619b5b469bcab84ea3bee1d8d04451c781d23feb Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 11 Aug 2020 18:31:32 -0700 Subject: mm, oom: show process exiting information in __oom_kill_process() When the OOM killer finds a victim and tryies to kill it, if the victim is already exiting, the task mm will be NULL and no process will be killed. But the dump_header() has been already executed, so it will be strange to dump so much information without killing a process. We'd better show some helpful information to indicate why this happens. Suggested-by: David Rientjes Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Qian Cai Link: http://lkml.kernel.org/r/20200721010127.17238-1-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 48e0db54d838..e90f25d6385d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -861,6 +861,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) p = find_lock_task_mm(victim); if (!p) { + pr_info("%s: OOM victim %d (%s) is already exiting. Skip killing the task\n", + message, task_pid_nr(victim), victim->comm); put_task_struct(victim); return; } else if (victim != p) { -- cgit From 15568299b7d9988063afce60731df605ab236e2a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Aug 2020 18:31:35 -0700 Subject: hugetlbfs: prevent filesystem stacking of hugetlbfs syzbot found issues with having hugetlbfs on a union/overlay as reported in [1]. Due to the limitations (no write) and special functionality of hugetlbfs, it does not work well in filesystem stacking. There are no know use cases for hugetlbfs stacking. Rather than making modifications to get hugetlbfs working in such environments, simply prevent stacking. [1] https://lore.kernel.org/linux-mm/000000000000b4684e05a2968ca6@google.com/ Reported-by: syzbot+d6ec23007e951dadf3de@syzkaller.appspotmail.com Suggested-by: Amir Goldstein Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Acked-by: Miklos Szeredi Cc: Al Viro Cc: Matthew Wilcox Cc: Colin Walters Link: http://lkml.kernel.org/r/80f869aa-810d-ef6c-8888-b46cee135907@oracle.com Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 523954d00dff..b5c109703daa 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1364,6 +1364,12 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; + + /* + * Due to the special and limited functionality of hugetlbfs, it does + * not work well as a stacking filesystem. + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); if (!sb->s_root) goto out_free; -- cgit From 34ae204f18519f0920bd50a644abd6fefc8dbfcf Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Aug 2020 18:31:38 -0700 Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem in at least read mode. This is because the explicit locking in huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring the code, the call to huge_pte_alloc in the else block at the beginning of hugetlb_fault was missed. Unfortunately, that else clause is exercised when there is no page table entry. This will likely lead to a call to huge_pmd_share. If huge_pmd_share thinks pmd sharing is possible, it will traverse the mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is modifying the tree, bad things such as addressing exceptions or worse could happen. Simply remove the else clause. It should have been removed previously. The code following the else will call huge_pte_alloc with the appropriate locking. To prevent this type of issue in the future, add routines to assert that i_mmap_rwsem is held, and call these routines in huge pmd sharing routines. Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Suggested-by: Matthew Wilcox Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Andrea Arcangeli Cc: "Kirill A.Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 ++++++++++ include/linux/hugetlb.h | 8 +++++--- mm/hugetlb.c | 15 +++++++-------- mm/rmap.c | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 011af396aa17..7c69dd7c6160 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) up_read(&mapping->i_mmap_rwsem); } +static inline void i_mmap_assert_locked(struct address_space *mapping) +{ + lockdep_assert_held(&mapping->i_mmap_rwsem); +} + +static inline void i_mmap_assert_write_locked(struct address_space *mapping) +{ + lockdep_assert_held_write(&mapping->i_mmap_rwsem); +} + /* * Might pages of this file be mapped into userspace? */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 50650d0d01b9..a520bf26e5d8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -203,8 +204,9 @@ static inline struct address_space *hugetlb_page_mapping_lock_write( return NULL; } -static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, - pte_t *ptep) +static inline int huge_pmd_unshare(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dffafb5bf2ed..8a18f1234e80 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3967,7 +3967,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); /* * We just unmapped a page of PMDs by clearing a PUD. @@ -4554,10 +4554,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); - } else { - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); - if (!ptep) - return VM_FAULT_OOM; } /* @@ -5034,7 +5030,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { pages++; spin_unlock(ptl); shared_pmd = true; @@ -5415,12 +5411,14 @@ out: * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); p4d_t *p4d = p4d_offset(pgd, *addr); pud_t *pud = pud_offset(p4d, *addr); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) return 0; @@ -5438,7 +5436,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return NULL; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } diff --git a/mm/rmap.c b/mm/rmap.c index 5fe2dedce1fc..6cce9ef06753 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { /* * huge_pmd_unshare unmapped an entire PMD * page. There is no way of knowing exactly -- cgit From 0744f2807a4ffb111a31c0769878ec8b125b5241 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 11 Aug 2020 18:31:41 -0700 Subject: mm/migrate: optimize migrate_vma_setup() for holes Patch series "mm/migrate: optimize migrate_vma_setup() for holes". A simple optimization for migrate_vma_*() when the source vma is not an anonymous vma and a new test case to exercise it. This patch (of 2): When migrating system memory to device private memory, if the source address range is a valid VMA range and there is no memory or a zero page, the source PFN array is marked as valid but with no PFN. This lets the device driver allocate private memory and clear it, then insert the new device private struct page into the CPU's page tables when migrate_vma_pages() is called. migrate_vma_pages() only inserts the new page if the VMA is an anonymous range. There is no point in telling the device driver to allocate device private memory and then not migrate the page. Instead, mark the source PFN array entries as not migrating to avoid this overhead. [rcampbell@nvidia.com: v2] Link: http://lkml.kernel.org/r/20200710194840.7602-2-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Cc: Jerome Glisse Cc: John Hubbard Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: "Bharata B Rao" Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200710194840.7602-1-rcampbell@nvidia.com Link: http://lkml.kernel.org/r/20200709165711.26584-1-rcampbell@nvidia.com Link: http://lkml.kernel.org/r/20200709165711.26584-2-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- mm/migrate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 819e55130134..b52f30113366 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2168,6 +2168,16 @@ static int migrate_vma_collect_hole(unsigned long start, struct migrate_vma *migrate = walk->private; unsigned long addr; + /* Only allow populating anonymous memory. */ + if (!vma_is_anonymous(walk->vma)) { + for (addr = start; addr < end; addr += PAGE_SIZE) { + migrate->src[migrate->npages] = 0; + migrate->dst[migrate->npages] = 0; + migrate->npages++; + } + return 0; + } + for (addr = start; addr < end; addr += PAGE_SIZE) { migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; migrate->dst[migrate->npages] = 0; @@ -2260,8 +2270,10 @@ again: pte = *ptep; if (pte_none(pte)) { - mpfn = MIGRATE_PFN_MIGRATE; - migrate->cpages++; + if (vma_is_anonymous(vma)) { + mpfn = MIGRATE_PFN_MIGRATE; + migrate->cpages++; + } goto next; } -- cgit From b0fc0f3fca80cf8ea8631eaec6b922bca4c4c2dd Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 11 Aug 2020 18:31:45 -0700 Subject: mm/migrate: add migrate-shared test for migrate_vma_*() Add a migrate_vma_*() self test for mmap(MAP_SHARED) to verify that !vma_anonymous() ranges won't be migrated. Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Cc: Jerome Glisse Cc: John Hubbard Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: "Bharata B Rao" Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200710194840.7602-3-rcampbell@nvidia.com Link: http://lkml.kernel.org/r/20200709165711.26584-3-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/hmm-tests.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 91d38a29956b..93fc5cadce61 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -941,6 +941,41 @@ TEST_F(hmm, migrate_fault) hmm_buffer_free(buffer); } +/* + * Migrate anonymous shared memory to device private memory. + */ +TEST_F(hmm, migrate_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, -ENOENT); + + hmm_buffer_free(buffer); +} + /* * Try to migrate various memory types to device private memory. */ -- cgit From 4958e4d86ecb011a81f5d80ce2023ef9f10692d8 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 11 Aug 2020 18:31:48 -0700 Subject: mm: thp: remove debug_cow switch Since commit 3917c80280c93a7123f ("thp: change CoW semantics for anon-THP"), the CoW page fault of THP has been rewritten, debug_cow is not used anymore. So, just remove it. Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Cc: Kirill A. Shutemov Link: http://lkml.kernel.org/r/1592270980-116062-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 7 ------- mm/huge_memory.c | 21 --------------------- 2 files changed, 28 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 17c4c4975145..467302056e17 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -181,13 +181,6 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Tue, 11 Aug 2020 18:31:51 -0700 Subject: mm/vmstat: add events for THP migration without split Add following new vmstat events which will help in validating THP migration without split. Statistics reported through these new VM events will help in performance debugging. 1. THP_MIGRATION_SUCCESS 2. THP_MIGRATION_FAILURE 3. THP_MIGRATION_SPLIT In addition, these new events also update normal page migration statistics appropriately via PGMIGRATE_SUCCESS and PGMIGRATE_FAILURE. While here, this updates current trace event 'mm_migrate_pages' to accommodate now available THP statistics. [akpm@linux-foundation.org: s/hpage_nr_pages/thp_nr_pages/] [ziy@nvidia.com: v2] Link: http://lkml.kernel.org/r/C5E3C65C-8253-4638-9D3C-71A61858BB8B@nvidia.com [anshuman.khandual@arm.com: s/thp_nr_pages/hpage_nr_pages/] Link: http://lkml.kernel.org/r/1594287583-16568-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Zi Yan Signed-off-by: Andrew Morton Reviewed-by: Daniel Jordan Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Zi Yan Cc: John Hubbard Cc: Naoya Horiguchi Link: http://lkml.kernel.org/r/1594080415-27924-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- Documentation/vm/page_migration.rst | 27 +++++++++++++++++++ include/linux/vm_event_item.h | 3 +++ include/trace/events/migrate.h | 17 +++++++++--- mm/migrate.c | 52 +++++++++++++++++++++++++++++++------ mm/vmstat.c | 3 +++ 5 files changed, 91 insertions(+), 11 deletions(-) diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index 1d6cd7db4e43..68883ac485fa 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -253,5 +253,32 @@ which are function pointers of struct address_space_operations. PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag for own purpose. +Monitoring Migration +===================== + +The following events (counters) can be used to monitor page migration. + +1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a + page was migrated. If the page was a non-THP page, then this counter is + increased by one. If the page was a THP, then this counter is increased by + the number of THP subpages. For example, migration of a single 2MB THP that + has 4KB-size base pages (subpages) will cause this counter to increase by + 512. + +2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for + _SUCCESS, above: this will be increased by the number of subpages, if it was + a THP. + +3. THP_MIGRATION_SUCCESS: A THP was migrated without being split. + +4. THP_MIGRATION_FAIL: A THP could not be migrated nor it could be split. + +5. THP_MIGRATION_SPLIT: A THP was migrated, but not as such: first, the THP had + to be split. After splitting, a migration retry was used for it's sub-pages. + +THP_MIGRATION_* events also update the appropriate PGMIGRATE_SUCCESS or +PGMIGRATE_FAIL events. For example, a THP migration failure will cause both +THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase. + Christoph Lameter, May 8, 2006. Minchan Kim, Mar 28, 2016. diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 24fc7c3ae7d6..2e6ca53b9bbd 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -56,6 +56,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, + THP_MIGRATION_SUCCESS, + THP_MIGRATION_FAIL, + THP_MIGRATION_SPLIT, #endif #ifdef CONFIG_COMPACTION COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 705b33d1e395..4d434398d64d 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -46,13 +46,18 @@ MIGRATE_REASON TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, - enum migrate_mode mode, int reason), + unsigned long thp_succeeded, unsigned long thp_failed, + unsigned long thp_split, enum migrate_mode mode, int reason), - TP_ARGS(succeeded, failed, mode, reason), + TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, + thp_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) __field( unsigned long, failed) + __field( unsigned long, thp_succeeded) + __field( unsigned long, thp_failed) + __field( unsigned long, thp_split) __field( enum migrate_mode, mode) __field( int, reason) ), @@ -60,13 +65,19 @@ TRACE_EVENT(mm_migrate_pages, TP_fast_assign( __entry->succeeded = succeeded; __entry->failed = failed; + __entry->thp_succeeded = thp_succeeded; + __entry->thp_failed = thp_failed; + __entry->thp_split = thp_split; __entry->mode = mode; __entry->reason = reason; ), - TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s", + TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, + __entry->thp_succeeded, + __entry->thp_failed, + __entry->thp_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); diff --git a/mm/migrate.c b/mm/migrate.c index b52f30113366..5ea275878383 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1418,22 +1418,35 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, enum migrate_mode mode, int reason) { int retry = 1; + int thp_retry = 1; int nr_failed = 0; int nr_succeeded = 0; + int nr_thp_succeeded = 0; + int nr_thp_failed = 0; + int nr_thp_split = 0; int pass = 0; + bool is_thp = false; struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; - int rc; + int rc, nr_subpages; if (!swapwrite) current->flags |= PF_SWAPWRITE; - for(pass = 0; pass < 10 && retry; pass++) { + for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { retry = 0; + thp_retry = 0; list_for_each_entry_safe(page, page2, from, lru) { retry: + /* + * THP statistics is based on the source huge page. + * Capture required information that might get lost + * during migration. + */ + is_thp = PageTransHuge(page); + nr_subpages = hpage_nr_pages(page); cond_resched(); if (PageHuge(page)) @@ -1464,15 +1477,30 @@ retry: unlock_page(page); if (!rc) { list_safe_reset_next(page, page2, lru); + nr_thp_split++; goto retry; } } + if (is_thp) { + nr_thp_failed++; + nr_failed += nr_subpages; + goto out; + } nr_failed++; goto out; case -EAGAIN: + if (is_thp) { + thp_retry++; + break; + } retry++; break; case MIGRATEPAGE_SUCCESS: + if (is_thp) { + nr_thp_succeeded++; + nr_succeeded += nr_subpages; + break; + } nr_succeeded++; break; default: @@ -1482,19 +1510,27 @@ retry: * removed from migration page list and not * retried in the next outer loop. */ + if (is_thp) { + nr_thp_failed++; + nr_failed += nr_subpages; + break; + } nr_failed++; break; } } } - nr_failed += retry; + nr_failed += retry + thp_retry; + nr_thp_failed += thp_retry; rc = nr_failed; out: - if (nr_succeeded) - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - if (nr_failed) - count_vm_events(PGMIGRATE_FAIL, nr_failed); - trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason); + count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); + count_vm_events(PGMIGRATE_FAIL, nr_failed); + count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); + count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); + count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); + trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded, + nr_thp_failed, nr_thp_split, mode, reason); if (!swapwrite) current->flags &= ~PF_SWAPWRITE; diff --git a/mm/vmstat.c b/mm/vmstat.c index 3bb034c99887..727a26d1ec1d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1277,6 +1277,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_MIGRATION "pgmigrate_success", "pgmigrate_fail", + "thp_migration_success", + "thp_migration_fail", + "thp_migration_split", #endif #ifdef CONFIG_COMPACTION "compact_migrate_scanned", -- cgit From 835832ba01bb444c7e45139e4b807527c119dafc Mon Sep 17 00:00:00 2001 From: Jianqun Xu Date: Tue, 11 Aug 2020 18:31:54 -0700 Subject: mm/cma.c: fix NULL pointer dereference when cma could not be activated In some case the cma area could not be activated, but the cma_alloc be used under this case, then the kernel will crash caused by NULL pointer dereference. Add bitmap valid check in cma_alloc to avoid this issue. Signed-off-by: Jianqun Xu Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Link: http://lkml.kernel.org/r/20200615010123.15596-1-jay.xu@rock-chips.com Signed-off-by: Linus Torvalds --- mm/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index 26ecff818881..3a18f8d8ea5e 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -425,7 +425,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, struct page *page = NULL; int ret = -ENOMEM; - if (!cma || !cma->count) + if (!cma || !cma->count || !cma->bitmap) return NULL; pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma, -- cgit From 18e98e56f4407b8c766b6f6d9f2850fd2a081e4d Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 11 Aug 2020 18:31:57 -0700 Subject: mm: cma: fix the name of CMA areas Patch series "mm: fix the names of general cma and hugetlb cma", v2. The current code of CMA can only work when users pass a const string as name parameter. we need to fix the way to handle names in CMA. On the other hand, to avoid name conflicts after enabling CMA_DEBUGFS, each hugetlb should get a different CMA name. This patch (of 2): If users give a name saved in stack, the current code will generate magic pointer. if users don't give a name(NULL), kasprintf() will always return NULL as we are at the early stage. that means cma_init_reserved_mem() will return -ENOMEM if users set name parameter as NULL. [natechancellor@gmail.com: return cma->name directly in cma_get_name] Link: https://github.com/ClangBuiltLinux/linux/issues/1063 Link: http://lkml.kernel.org/r/20200623015840.621964-1-natechancellor@gmail.com Signed-off-by: Barry Song Signed-off-by: Nathan Chancellor Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Roman Gushchin Link: http://lkml.kernel.org/r/20200616223131.33828-2-song.bao.hua@hisilicon.com Signed-off-by: Linus Torvalds --- mm/cma.c | 15 +++++++-------- mm/cma.h | 4 +++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 3a18f8d8ea5e..aa2df0bd49c3 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -52,7 +52,7 @@ unsigned long cma_get_size(const struct cma *cma) const char *cma_get_name(const struct cma *cma) { - return cma->name ? cma->name : "(undefined)"; + return cma->name; } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, @@ -202,13 +202,12 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * subsystems (like slab allocator) are available. */ cma = &cma_areas[cma_area_count]; - if (name) { - cma->name = name; - } else { - cma->name = kasprintf(GFP_KERNEL, "cma%d\n", cma_area_count); - if (!cma->name) - return -ENOMEM; - } + + if (name) + snprintf(cma->name, CMA_MAX_NAME, name); + else + snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); + cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; diff --git a/mm/cma.h b/mm/cma.h index 6698fa63279b..20f6e24bc477 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -4,6 +4,8 @@ #include +#define CMA_MAX_NAME 64 + struct cma { unsigned long base_pfn; unsigned long count; @@ -15,7 +17,7 @@ struct cma { spinlock_t mem_head_lock; struct debugfs_u32_array dfs_bitmap; #endif - const char *name; + char name[CMA_MAX_NAME]; }; extern struct cma cma_areas[MAX_CMA_AREAS]; -- cgit From 29d0f41d232393482ced6a233126832df3429ad2 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 11 Aug 2020 18:32:00 -0700 Subject: mm: hugetlb: fix the name of hugetlb CMA Once we enable CMA_DEBUGFS, we will get the below errors: directory 'cma-hugetlb' with parent 'cma' already present. We should have different names for different CMA areas. Signed-off-by: Barry Song Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Roman Gushchin Link: http://lkml.kernel.org/r/20200616223131.33828-3-song.bao.hua@hisilicon.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8a18f1234e80..6b6b0d5cc642 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5707,12 +5707,14 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; + char name[20]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); + snprintf(name, 20, "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, - 0, false, "hugetlb", + 0, false, name, &hugetlb_cma[nid], nid); if (res) { pr_warn("hugetlb_cma: reservation failed: err %d, node %d", -- cgit From 3a5139f1c5bb76d69756fb8f13fffa173e261153 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Aug 2020 18:32:03 -0700 Subject: cma: don't quit at first error when activating reserved areas The routine cma_init_reserved_areas is designed to activate all reserved cma areas. It quits when it first encounters an error. This can leave some areas in a state where they are reserved but not activated. There is no feedback to code which performed the reservation. Attempting to allocate memory from areas in such a state will result in a BUG. Modify cma_init_reserved_areas to always attempt to activate all areas. The called routine, cma_activate_area is responsible for leaving the area in a valid state. No one is making active use of returned error codes, so change the routine to void. How to reproduce: This example uses kernelcore, hugetlb and cma as an easy way to reproduce. However, this is a more general cma issue. Two node x86 VM 16GB total, 8GB per node Kernel command line parameters, kernelcore=4G hugetlb_cma=8G Related boot time messages, hugetlb_cma: reserve 8192 MiB, up to 4096 MiB per node cma: Reserved 4096 MiB at 0x0000000100000000 hugetlb_cma: reserved 4096 MiB on node 0 cma: Reserved 4096 MiB at 0x0000000300000000 hugetlb_cma: reserved 4096 MiB on node 1 cma: CMA area hugetlb could not be activated # echo 8 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI ... Call Trace: bitmap_find_next_zero_area_off+0x51/0x90 cma_alloc+0x1a5/0x310 alloc_fresh_huge_page+0x78/0x1a0 alloc_pool_huge_page+0x6f/0xf0 set_max_huge_pages+0x10c/0x250 nr_hugepages_store_common+0x92/0x120 ? __kmalloc+0x171/0x270 kernfs_fop_write+0xc1/0x1a0 vfs_write+0xc7/0x1f0 ksys_write+0x5f/0xe0 do_syscall_64+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator") Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Reviewed-by: Roman Gushchin Acked-by: Barry Song Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Kyungmin Park Cc: Joonsoo Kim Cc: Link: http://lkml.kernel.org/r/20200730163123.6451-1-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- mm/cma.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index aa2df0bd49c3..7f415d7cda9f 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -93,17 +93,15 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, mutex_unlock(&cma->lock); } -static int __init cma_activate_area(struct cma *cma) +static void __init cma_activate_area(struct cma *cma) { unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; unsigned i = cma->count >> pageblock_order; struct zone *zone; cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); - if (!cma->bitmap) { - cma->count = 0; - return -ENOMEM; - } + if (!cma->bitmap) + goto out_error; WARN_ON_ONCE(!pfn_valid(pfn)); zone = page_zone(pfn_to_page(pfn)); @@ -133,25 +131,22 @@ static int __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->mem_head_lock); #endif - return 0; + return; not_in_zone: - pr_err("CMA area %s could not be activated\n", cma->name); bitmap_free(cma->bitmap); +out_error: cma->count = 0; - return -EINVAL; + pr_err("CMA area %s could not be activated\n", cma->name); + return; } static int __init cma_init_reserved_areas(void) { int i; - for (i = 0; i < cma_area_count; i++) { - int ret = cma_activate_area(&cma_areas[i]); - - if (ret) - return ret; - } + for (i = 0; i < cma_area_count; i++) + cma_activate_area(&cma_areas[i]); return 0; } -- cgit From af161bee93332a1ff10ba029f41936d21850ae82 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Aug 2020 18:32:06 -0700 Subject: include/linux/sched/mm.h: optimize current_gfp_context() The current_gfp_context() converts a number of PF_MEMALLOC_* per-process flags into the corresponding GFP_* flags for memory allocation. In that function, current->flags is accessed 3 times. That may lead to duplicated access of the same memory location. This is not usually a problem with minimal debug config options on as the compiler can optimize away the duplicated memory accesses. With most of the debug config options on, however, that may not be the case. For example, the x86-64 object size of the __need_fs_reclaim() in a debug kernel that calls current_gfp_context() was 309 bytes. With this patch applied, the object size is reduced to 202 bytes. This is a saving of 107 bytes and will probably be slightly faster too. Use READ_ONCE() to access current->flags to prevent the compiler from possibly accessing current->flags multiple times. Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mathieu Desnoyers Cc: Michel Lespinasse Link: http://lkml.kernel.org/r/20200618212936.9776-1-longman@redhat.com Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 85023ddc2dc2..f889e332912f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -178,14 +178,16 @@ static inline bool in_vfork(struct task_struct *tsk) */ static inline gfp_t current_gfp_context(gfp_t flags) { - if (unlikely(current->flags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { + unsigned int pflags = READ_ONCE(current->flags); + + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence */ - if (current->flags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); - else if (current->flags & PF_MEMALLOC_NOFS) + else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; } return flags; -- cgit From d49653f35adff8c778e7c5fbd4dbdf929594eca8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 11 Aug 2020 18:32:09 -0700 Subject: mm: mmu_notifier: fix and extend kerneldoc Fix W=1 compile warnings (invalid kerneldoc): mm/mmu_notifier.c:187: warning: Function parameter or member 'interval_sub' not described in 'mmu_interval_read_bgin' mm/mmu_notifier.c:708: warning: Function parameter or member 'subscription' not described in 'mmu_notifier_registr' mm/mmu_notifier.c:708: warning: Excess function parameter 'mn' description in 'mmu_notifier_register' mm/mmu_notifier.c:880: warning: Function parameter or member 'subscription' not described in 'mmu_notifier_put' mm/mmu_notifier.c:880: warning: Excess function parameter 'mn' description in 'mmu_notifier_put' mm/mmu_notifier.c:982: warning: Function parameter or member 'ops' not described in 'mmu_interval_notifier_insert' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Jason Gunthorpe Link: http://lkml.kernel.org/r/20200728171109.28687-4-krzk@kernel.org Signed-off-by: Linus Torvalds --- mm/mmu_notifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 352bb9f3ecc0..4fc918163dd3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -166,7 +166,7 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) /** * mmu_interval_read_begin - Begin a read side critical section against a VA * range - * interval_sub: The interval subscription + * @interval_sub: The interval subscription * * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a * collision-retry scheme similar to seqcount for the VA range under @@ -686,7 +686,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register); /** * mmu_notifier_register - Register a notifier on a mm - * @mn: The notifier to attach + * @subscription: The notifier to attach * @mm: The mm to attach the notifier to * * Must not hold mmap_lock nor any other VM related lock when calling @@ -856,7 +856,7 @@ static void mmu_notifier_free_rcu(struct rcu_head *rcu) /** * mmu_notifier_put - Release the reference on the notifier - * @mn: The notifier to act on + * @subscription: The notifier to act on * * This function must be paired with each mmu_notifier_get(), it releases the * reference obtained by the get. If this is the last reference then process @@ -965,7 +965,8 @@ static int __mmu_interval_notifier_insert( * @interval_sub: Interval subscription to register * @start: Starting virtual address to monitor * @length: Length of the range to monitor - * @mm : mm_struct to attach to + * @mm: mm_struct to attach to + * @ops: Interval notifier operations to be called on matching events * * This function subscribes the interval notifier for notifications from the * mm. Upon return the ops related to mmu_interval_notifier will be called -- cgit From fe124c95df9e2acf202910b0510300e37afe074b Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 11 Aug 2020 18:32:12 -0700 Subject: x86/mm: use max memory block size on bare metal Some of our servers spend significant time at kernel boot initializing memory block sysfs directories and then creating symlinks between them and the corresponding nodes. The slowness happens because the machines get stuck with the smallest supported memory block size on x86 (128M), which results in 16,288 directories to cover the 2T of installed RAM. The search for each memory block is noticeable even with commit 4fb6eabf1037 ("drivers/base/memory.c: cache memory blocks in xarray to accelerate lookup"). Commit 078eb6aa50dc ("x86/mm/memory_hotplug: determine block size based on the end of boot memory") chooses the block size based on alignment with memory end. That addresses hotplug failures in qemu guests, but for bare metal systems whose memory end isn't aligned to even the smallest size, it leaves them at 128M. Make kernels that aren't running on a hypervisor use the largest supported size (2G) to minimize overhead on big machines. Kernel boot goes 7% faster on the aforementioned servers, shaving off half a second. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20200714205450.945834-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Cc: Andy Lutomirski Cc: Dave Hansen Cc: David Hildenbrand Cc: Michal Hocko Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Steven Sistare Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200609225451.3542648-1-daniel.m.jordan@oracle.com Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b246ae40c8f..a4ac13cc3fdc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void) goto done; } + /* + * Use max block size to minimize overhead on bare metal, where + * alignment for memory hotplug isn't a concern. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Find the largest allowed block size that aligns to memory end */ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { if (IS_ALIGNED(boot_mem_end, bz)) -- cgit From d622ecec5f57ee7e27fbc6db222d8cbaf3c59478 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 11 Aug 2020 18:32:16 -0700 Subject: mm/memory_hotplug: introduce default dummy memory_add_physaddr_to_nid() This is to introduce a general dummy helper. memory_add_physaddr_to_nid() is a fallback option to get the nid in case NUMA_NO_NID is detected. After this patch, arm64/sh/s390 can simply use the general dummy version. PowerPC/x86/ia64 will still use their specific version. This is the preparation to set a fallback value for dev_dax->target_node. Signed-off-by: Jia He Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Cc: Dan Williams Cc: Michal Hocko Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Vishal Verma Cc: Dave Jiang Cc: Baoquan He Cc: Chuhong Yuan Cc: Mike Rapoport Cc: Logan Gunthorpe Cc: Masahiro Yamada Cc: Jonathan Cameron Cc: Kaly Xin Link: http://lkml.kernel.org/r/20200710031619.18762-2-justin.he@arm.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/numa.c | 10 ---------- arch/ia64/mm/numa.c | 2 -- arch/sh/mm/init.c | 9 --------- arch/x86/mm/numa.c | 1 - mm/memory_hotplug.c | 10 ++++++++++ 5 files changed, 10 insertions(+), 22 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..73f8b49d485c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -461,13 +461,3 @@ void __init arm64_numa_init(void) numa_init(dummy_numa_init); } - -/* - * We hope that we will be hotplugging memory on nodes we already know about, - * such that acpi_get_node() succeeds and we never fall back to this... - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr); - return 0; -} diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5e1015eb6d0d..f34964271101 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr) return 0; return nid; } - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 613de8096335..cd1379360f08 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -425,15 +425,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 addr) -{ - /* Node 0 for now.. */ - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index b05f45e5e8e2..aa76ec2d359b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ac6961abaa10..6289a909ea36 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -350,6 +350,16 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, return err; } +#ifdef CONFIG_NUMA +int __weak memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, -- cgit From b4223a510e2ab1bf0f971d50af7c1431014b25ad Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 11 Aug 2020 18:32:20 -0700 Subject: mm/memory_hotplug: fix unpaired mem_hotplug_begin/done When check_memblock_offlined_cb() returns failed rc(e.g. the memblock is online at that time), mem_hotplug_begin/done is unpaired in such case. Therefore a warning: Call Trace: percpu_up_write+0x33/0x40 try_remove_memory+0x66/0x120 ? _cond_resched+0x19/0x30 remove_memory+0x2b/0x40 dev_dax_kmem_remove+0x36/0x72 [kmem] device_release_driver_internal+0xf0/0x1c0 device_release_driver+0x12/0x20 bus_remove_device+0xe1/0x150 device_del+0x17b/0x3e0 unregister_dev_dax+0x29/0x60 devm_action_release+0x15/0x20 release_nodes+0x19a/0x1e0 devres_release_all+0x3f/0x50 device_release_driver_internal+0x100/0x1c0 driver_detach+0x4c/0x8f bus_remove_driver+0x5c/0xd0 driver_unregister+0x31/0x50 dax_pmem_exit+0x10/0xfe0 [dax_pmem] Fixes: f1037ec0cc8a ("mm/memory_hotplug: fix remove_memory() lockdep splat") Signed-off-by: Jia He Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Dan Williams Cc: [5.6+] Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chuhong Yuan Cc: Dave Hansen Cc: Dave Jiang Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Kaly Xin Cc: Logan Gunthorpe Cc: Masahiro Yamada Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Tony Luck Cc: Vishal Verma Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200710031619.18762-3-justin.he@arm.com Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6289a909ea36..ce3d858319bd 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1757,7 +1757,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) */ rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) - goto done; + return rc; /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); @@ -1781,9 +1781,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) try_offline_node(nid); -done: mem_hotplug_done(); - return rc; + return 0; } /** -- cgit From de1193f0be66f88e2c6d0fd965137668fc2ec4a5 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 11 Aug 2020 18:32:24 -0700 Subject: mm, memory_hotplug: update pcp lists everytime onlining a memory block When onlining a first memory block in a zone, pcp lists are not updated thus pcp struct will have the default setting of ->high = 0,->batch = 1. This means till the second memory block in a zone(if it have) is onlined the pcp lists of this zone will not contain any pages because pcp's ->count is always greater than ->high thus free_pcppages_bulk() is called to free batch size(=1) pages every time system wants to add a page to the pcp list through free_unref_page(). To put this in a word, system is not using benefits offered by the pcp lists when there is a single onlineable memory block in a zone. Correct this by always updating the pcp lists when memory block is onlined. Fixes: 1f522509c77a ("mem-hotplug: avoid multiple zones sharing same boot strapping boot_pageset") Signed-off-by: Charan Teja Reddy Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Vinayak Menon Link: http://lkml.kernel.org/r/1596372896-15336-1-git-send-email-charante@codeaurora.org Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ce3d858319bd..0a9e1972fbe7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -854,8 +854,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, node_states_set_node(nid, &arg); if (need_zonelists_rebuild) build_all_zonelists(NULL); - else - zone_pcp_update(zone); + zone_pcp_update(zone); init_per_zone_wmark_min(); -- cgit From 1067b261cc973d68c29de54ef0587c56786e6bd5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:27 -0700 Subject: mm: drop duplicated words in Drop the doubled words "used" and "by". Drop the repeated acronym "TLB" and make several other fixes around it. (capital letters, spellos) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: SeongJae Park Link: http://lkml.kernel.org/r/2bb6e13e-44df-4920-52d9-4d3539945f73@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 53e97da1e8e2..a124c21e3204 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -804,7 +804,7 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, /* * No-op macros that just return the current protection value. Defined here - * because these macros can be used used even if CONFIG_MMU is not defined. + * because these macros can be used even if CONFIG_MMU is not defined. */ #ifndef pgprot_nx @@ -1234,7 +1234,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd) * Technically a PTE can be PROTNONE even when not doing NUMA balancing but * the only case the kernel cares is for NUMA balancing and is only ever set * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked - * _PAGE_PROTNONE so by by default, implement the helper as "always no". It + * _PAGE_PROTNONE so by default, implement the helper as "always no". It * is the responsibility of the caller to distinguish between PROT_NONE * protections and NUMA hinting fault protections. */ @@ -1318,10 +1318,10 @@ static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* * ARCHes with special requirements for evicting THP backing TLB entries can * implement this. Otherwise also, it can help optimize normal TLB flush in - * THP regime. stock flush_tlb_range() typically has optimization to nuke the - * entire TLB TLB if flush span is greater than a threshold, which will - * likely be true for a single huge page. Thus a single thp flush will - * invalidate the entire TLB which is not desitable. + * THP regime. Stock flush_tlb_range() typically has optimization to nuke the + * entire TLB if flush span is greater than a threshold, which will + * likely be true for a single huge page. Thus a single THP flush will + * invalidate the entire TLB which is not desirable. * e.g. see arch/arc: flush_pmd_tlb_range */ #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) -- cgit From 11192337206d2211bce3ba4ec1575439b6045654 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:30 -0700 Subject: mm: drop duplicated words in Drop the doubled words "to" and "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: SeongJae Park Link: http://lkml.kernel.org/r/d9fae8d6-0d60-4d52-9385-3199ee98de49@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f6a82f9bccd7..f97b10117d44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -479,7 +479,7 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* - * vm_fault is filled by the the pagefault handler and passed to the vma's + * vm_fault is filled by the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * @@ -2599,7 +2599,7 @@ extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ +/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ extern int expand_downwards(struct vm_area_struct *vma, unsigned long address); #if VM_GROWSUP -- cgit From 3ecabd31341fb4307f156a2bf4467c3f8fa9101b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:33 -0700 Subject: include/linux/highmem.h: fix duplicated words in a comment Change the doubled word "is" in a comment to "it is". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/ad605959-0083-4794-8d31-6b073300dd6f@infradead.org Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d6e82e3de027..14e6202ce47f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -73,7 +73,7 @@ static inline void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. * * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap -- cgit From c82f16b52cb3b50ac1b47e6c590e3dddefc5a97c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:36 -0700 Subject: include/linux/frontswap.h: drop duplicated word in a comment Drop the doubled word "in" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/3af7ed91-ad62-8445-40a4-9e07a64b9523@infradead.org Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6d775984905b..b07d88c92bb2 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -10,7 +10,7 @@ /* * Return code to denote that requested number of * frontswap pages are unused(moved to page cache). - * Used in in shmem_unuse and try_to_unuse. + * Used in shmem_unuse and try_to_unuse. */ #define FRONTSWAP_PAGES_UNUSED 2 -- cgit From 0845f83122d93ae3bafa7ea10209de2148a3b9bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:40 -0700 Subject: include/linux/memcontrol.h: drop duplicate word and fix spello Drop the doubled word "for" in a comment. Fix spello of "incremented". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Chris Down Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/b04aa2e4-7c95-12f0-599d-43d07fb28134@infradead.org Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2c2d301eac33..385237e4cb44 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -65,8 +65,8 @@ struct mem_cgroup_id { /* * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremated by the number of pages. This counter is used for - * for trigger some periodic events. This is straightforward and better + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better * than using jiffies etc. to handle periodic memcg event. */ enum mem_cgroup_events_target { -- cgit From c56771b3095d4f1af82847081b6781fc03a13904 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Aug 2020 18:32:43 -0700 Subject: sh/mm: drop unused MAX_PHYSADDR_BITS The macro is not used anywhere, so remove the definition. Signed-off-by: Arvind Sankar Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Dave Hansen Acked-by: Mike Rapoport Link: http://lkml.kernel.org/r/20200723231544.17274-3-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- arch/sh/include/asm/sparsemem.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h index 4eb899751e45..084706bb8cca 100644 --- a/arch/sh/include/asm/sparsemem.h +++ b/arch/sh/include/asm/sparsemem.h @@ -5,11 +5,9 @@ #ifdef __KERNEL__ /* * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + * MAX_PHYSMEM_BITS 2^N: how much physical address space we have */ #define SECTION_SIZE_BITS 26 -#define MAX_PHYSADDR_BITS 32 #define MAX_PHYSMEM_BITS 32 #endif -- cgit From 6f8c00ff5aa88c847af9af35131d72fae480b566 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Aug 2020 18:32:46 -0700 Subject: sparc: drop unused MAX_PHYSADDR_BITS The macro is not used anywhere, so remove the definition. Signed-off-by: Arvind Sankar Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Dave Hansen Acked-by: David S. Miller Acked-by: Mike Rapoport Link: http://lkml.kernel.org/r/20200723231544.17274-4-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/sparsemem.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index 1dd1b61432db..aa9a676bc341 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -7,7 +7,6 @@ #include #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS #define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ -- cgit From a1c1dbeb2e1a995c374d4954b36269070725f3a0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:49 -0700 Subject: mm/compaction.c: delete duplicated word Drop the repeated word "a". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-2-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index 5e8f2e22b73e..b89581bf859c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1477,7 +1477,7 @@ static void isolate_freepages(struct compact_control *cc) * this pfn aligned down to the pageblock boundary, because we do * block_start_pfn -= pageblock_nr_pages in the for loop. * For ending point, take care when isolating in last pageblock of a - * a zone which ends in the middle of a pageblock. + * zone which ends in the middle of a pageblock. * The low boundary is the end of the pageblock the migration scanner * is using. */ -- cgit From ce89fddfe0b73e99b2edd8cfd21afd461258a2df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:53 -0700 Subject: mm/filemap.c: delete duplicated word Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-3-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index f2bb5ff0293d..8e75bce0346d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2885,7 +2885,7 @@ filler: * Case a, the page will be up to date when the page is unlocked. * There is no need to serialise on the page lock here as the page * is pinned so the lock gives no additional protection. Even if the - * the page is truncated, the data is still valid if PageUptodate as + * page is truncated, the data is still valid if PageUptodate as * it's a race vs truncate race. * Case b, the page will not be up to date * Case c, the page may be truncated but in itself, the data may still -- cgit From 0cb80a2fb5bcd10f8aa7747e35511b52053232ae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:56 -0700 Subject: mm/hmm.c: delete duplicated word Drop the repeated word "pages". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-4-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hmm.c b/mm/hmm.c index 0809baee49d0..bb279319bf40 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -249,7 +249,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages pages, but just report + * Never fault in device private pages, but just report * the PFN even if not present. */ if (hmm_is_device_private_entry(range, entry)) { -- cgit From 9e7ee40097ec654d5b9c7803b8e6dc560d17bbe3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:59 -0700 Subject: mm/hugetlb.c: delete duplicated words Drop the repeated word "the" in two places. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-5-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6b6b0d5cc642..b66bf74e999e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -133,7 +133,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool) /* * Subpool accounting for allocating and reserving pages. * Return -ENOMEM if there are not enough resources to satisfy the - * the request. Otherwise, return the number of pages by which the + * request. Otherwise, return the number of pages by which the * global pools must be adjusted (upward). The returned value may * only be different than the passed value (delta) in the case where * a subpool minimum size must be maintained. @@ -2167,7 +2167,7 @@ static void return_unused_surplus_pages(struct hstate *h, * evenly across all nodes with memory. Iterate across these nodes * until we can no longer free unreserved surplus pages. This occurs * when the nodes with surplus pages have no free pages. - * free_pool_huge_page() will balance the the freed pages across the + * free_pool_huge_page() will balance the freed pages across the * on-line nodes with memory and will handle the hstate accounting. * * Note that we decrement resv_huge_pages as we free the pages. If -- cgit From ac5ddd0fcea9bfee1d48884e7fa94e706d213aac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:02 -0700 Subject: mm/memcontrol.c: delete duplicated words Drop the repeated word "down". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-6-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c6c579217855..d59fd9af6e63 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2422,7 +2422,7 @@ static void high_work_func(struct work_struct *work) * * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the * overage ratio to a delay. - * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the + * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down the * proposed penalty in order to reduce to a reasonable number of jiffies, and * to produce a reasonable delay curve. * -- cgit From a1a0aea592c1ef05283bc7af8d66ac3de87b2e07 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:05 -0700 Subject: mm/memory.c: delete duplicated words Drop the repeated word "to" in two places. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-7-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index de311fc7639e..325bb575e7ec 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1800,7 +1800,7 @@ out_unlock: * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_pfn(), except that it allows drivers to + * This is exactly like vmf_insert_pfn(), except that it allows drivers * to override pgprot on a per-page basis. * * This only makes sense for IO mappings, and it makes no sense for @@ -1936,7 +1936,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * This is exactly like vmf_insert_mixed(), except that it allows drivers * to override pgprot on a per-page basis. * * Typically this function should be used by drivers to set caching- and -- cgit From eaf444deed482f908395defa4d9b67ce55b72208 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:08 -0700 Subject: mm/migrate.c: delete duplicated word Drop the repeated word "and". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-8-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 5ea275878383..52896b4921a7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2667,7 +2667,7 @@ restore: /** * migrate_vma_setup() - prepare to migrate a range of memory - * @args: contains the vma, start, and and pfns arrays for the migration + * @args: contains the vma, start, and pfns arrays for the migration * * Returns: negative errno on failures, 0 when 0 or more pages were migrated * without an error. -- cgit From c08b342c214c4473b329f828cdc516d2f50a192f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:11 -0700 Subject: mm/nommu.c: delete duplicated words Drop the repeated word "that" in two places. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-9-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/nommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 340ae7774c13..75a327149af1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1762,8 +1762,8 @@ EXPORT_SYMBOL_GPL(access_process_vm); * @newsize: The proposed filesize of the inode * * Check the shared mappings on an inode on behalf of a shrinking truncate to - * make sure that that any outstanding VMAs aren't broken and then shrink the - * vm_regions that extend that beyond so that do_mmap() doesn't + * make sure that any outstanding VMAs aren't broken and then shrink the + * vm_regions that extend beyond so that do_mmap() doesn't * automatically grant mappings that are too large. */ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, -- cgit From 047b9967d59940f9f64ed13d02e98e9ca7cb5ffb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:14 -0700 Subject: mm/page_alloc.c: delete or fix duplicated words Drop the repeated word "them" and "that". Change "the the" to "to the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-10-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 167732f4d124..d80c9bac489c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4282,7 +4282,7 @@ retry: /* * If an allocation failed after direct reclaim, it could be because * pages are pinned on the per-cpu lists or in high alloc reserves. - * Shrink them them and try again + * Shrink them and try again */ if (!page && !drained) { unreserve_highatomic_pageblock(ac, false); @@ -6192,7 +6192,7 @@ static int zone_batchsize(struct zone *zone) * locking. * * Any new users of pcp->batch and pcp->high should ensure they can cope with - * those fields changing asynchronously (acording the the above rule). + * those fields changing asynchronously (acording to the above rule). * * mutex_is_locked(&pcp_batch_high_lock) required when calling this function * outside of boot time (or some other assurance that no concurrent updaters @@ -8203,7 +8203,7 @@ void *__init alloc_large_system_hash(const char *tablename, * race condition. So you can't expect this function should be exact. * * Returns a page without holding a reference. If the caller wants to - * dereference that page (e.g., dumping), it has to make sure that that it + * dereference that page (e.g., dumping), it has to make sure that it * cannot get removed (e.g., via memory unplug) concurrently. * */ -- cgit From af44c12fe7c9704c4d185446a8ffdea18792017d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:17 -0700 Subject: mm/shmem.c: delete duplicated word Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-11-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 49bde088a7ea..271548ca20f3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1686,7 +1686,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, * Swap in the page pointed to by *pagep. * Caller has to make sure that *pagep contains a valid swapped page. * Returns 0 and the page in pagep if success. On failure, returns the - * the error code and NULL in *pagep. + * error code and NULL in *pagep. */ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, -- cgit From 081a06fa29906643c62ea9477bc374c23417e6f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:19 -0700 Subject: mm/slab_common.c: delete duplicated word Drop the repeated word "and". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-12-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index a513f3237155..f9ccd5dc13f3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -419,7 +419,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) /* * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed - * through RCU and and the associated kmem_cache are dereferenced + * through RCU and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only * after the pending RCU operations are finished. As rcu_barrier() * is a pretty slow operation, we batch all pending destructions -- cgit From 5ce1be0e40fe64cbf540ba54dd10824a8cef99e1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:23 -0700 Subject: mm/usercopy.c: delete duplicated word Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-13-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 660717a1ea5c..b3de3c4eefba 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -43,7 +43,7 @@ static noinline int check_stack_object(const void *obj, unsigned long len) /* * Reject: object partially overlaps the stack (passing the - * the check above means at least one end is within the stack, + * check above means at least one end is within the stack, * so if this check fails, the other end is outside the stack). */ if (obj < stack || stackend < obj + len) -- cgit From 1eba09c15decaf4e390aba0cd409e8a4e821c2a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:26 -0700 Subject: mm/vmscan.c: delete or fix duplicated words Drop the repeated word "marked". Change "time time" to "same time". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-14-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 846b7e9b8d2e..738115ed75e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2798,7 +2798,7 @@ again: set_bit(PGDAT_DIRTY, &pgdat->flags); /* - * If kswapd scans pages marked marked for immediate + * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU * faster than they are written so also forcibly stall. @@ -3373,7 +3373,7 @@ static bool pgdat_watermark_boosted(pg_data_t *pgdat, int highest_zoneidx) /* * Check for watermark boosts top-down as the higher zones * are more likely to be boosted. Both watermarks and boosts - * should not be checked at the time time as reclaim would + * should not be checked at the same time as reclaim would * start prematurely when there is no boosting and a lower * zone is balanced. */ -- cgit From b6aa2c83428d49f1331d89a9e3d77850ee0696ba Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:28 -0700 Subject: mm/zpool.c: delete duplicated word and fix grammar Drop the repeated word "if". Fix subject/verb agreement. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-15-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/zpool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/zpool.c b/mm/zpool.c index 863669212070..3744a2d1a624 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -239,15 +239,15 @@ const char *zpool_get_type(struct zpool *zpool) } /** - * zpool_malloc_support_movable() - Check if the zpool support - * allocate movable memory + * zpool_malloc_support_movable() - Check if the zpool supports + * allocating movable memory * @zpool: The zpool to check * - * This returns if the zpool support allocate movable memory. + * This returns if the zpool supports allocating movable memory. * * Implementations must guarantee this to be thread-safe. * - * Returns: true if if the zpool support allocate movable memory, false if not + * Returns: true if the zpool supports allocating movable memory, false if not */ bool zpool_malloc_support_movable(struct zpool *zpool) { -- cgit From b956b5ac28cd709567f10c0ef67e65898ded2e45 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:31 -0700 Subject: mm/zsmalloc.c: fix duplicated words Change "as as" to "as a". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Zi Yan Link: http://lkml.kernel.org/r/20200801173822.14973-16-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 952a01e45c6a..c36fdff9a371 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -79,7 +79,7 @@ /* * Object location (, ) is encoded as - * as single (unsigned long) handle value. + * a single (unsigned long) handle value. * * Note that object index starts from 0. * -- cgit From bfe00c5bbd9ee37b99c281429556c335271d027b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:34 -0700 Subject: syscalls: use uaccess_kernel in addr_limit_user_check Patch series "clean up address limit helpers", v2. In preparation for eventually phasing out direct use of set_fs(), this series removes the segment_eq() arch helper that is only used to implement or duplicate the uaccess_kernel() API, and then adds descriptive helpers to force the kernel address limit. This patch (of 6): Use the uaccess_kernel helper instead of duplicating it. [hch@lst.de: arm: don't call addr_limit_user_check for nommu] Link: http://lkml.kernel.org/r/20200721045834.GA9613@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Guenter Roeck Acked-by: Linus Torvalds Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200714105505.935079-1-hch@lst.de Link: http://lkml.kernel.org/r/20200710135706.537715-1-hch@lst.de Link: http://lkml.kernel.org/r/20200710135706.537715-2-hch@lst.de Signed-off-by: Linus Torvalds --- arch/arm/kernel/signal.c | 2 ++ include/linux/syscalls.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb..c9dc912b83f0 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -713,7 +713,9 @@ struct page *get_signal_page(void) /* Defer to generic check */ asmlinkage void addr_limit_check_failed(void) { +#ifdef CONFIG_MMU addr_limit_user_check(); +#endif } #ifdef CONFIG_DEBUG_RSEQ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a2429d336593..dc2b827c81e5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -263,7 +263,7 @@ static inline void addr_limit_user_check(void) return; #endif - if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), + if (CHECK_DATA_CORRUPTION(uaccess_kernel(), "Invalid address limit on user-mode return")) force_sig(SIGKILL); -- cgit From 9af0f90aea46081566bf2014b6dfeb87b792005c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:38 -0700 Subject: nds32: use uaccess_kernel in show_regs Use the uaccess_kernel helper instead of duplicating it. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Greentime Hu Cc: Nick Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200710135706.537715-3-hch@lst.de Signed-off-by: Linus Torvalds --- arch/nds32/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index e85bbbadc0e7..e01ad5d17224 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs) regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]); pr_info(" IRQs o%s Segment %s\n", interrupts_enabled(regs) ? "n" : "ff", - segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user"); + uaccess_kernel() ? "kernel" : "user"); } EXPORT_SYMBOL(show_regs); -- cgit From efbfc62e1d9ce835fd3ea5f25db1844e1496ced6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:41 -0700 Subject: riscv: include in To ensure TASK_SIZE is defined for USER_DS. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Palmer Dabbelt Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200710135706.537715-4-hch@lst.de Signed-off-by: Linus Torvalds --- arch/riscv/include/asm/uaccess.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 8ce9d607b53d..22de922d6ecb 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -8,6 +8,8 @@ #ifndef _ASM_RISCV_UACCESS_H #define _ASM_RISCV_UACCESS_H +#include /* for TASK_SIZE */ + /* * User space memory access functions */ -- cgit From 428e2976a5bf7e7f5554286d7a5a33b8147b106a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:44 -0700 Subject: uaccess: remove segment_eq segment_eq is only used to implement uaccess_kernel. Just open code uaccess_kernel in the arch uaccess headers and remove one layer of indirection. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven Cc: Nick Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Link: http://lkml.kernel.org/r/20200710135706.537715-5-hch@lst.de Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/uaccess.h | 2 +- arch/arc/include/asm/segment.h | 3 +-- arch/arm/include/asm/uaccess.h | 4 ++-- arch/arm64/include/asm/uaccess.h | 2 +- arch/csky/include/asm/segment.h | 2 +- arch/h8300/include/asm/segment.h | 2 +- arch/ia64/include/asm/uaccess.h | 2 +- arch/m68k/include/asm/segment.h | 2 +- arch/microblaze/include/asm/uaccess.h | 2 +- arch/mips/include/asm/uaccess.h | 2 +- arch/nds32/include/asm/uaccess.h | 2 +- arch/nios2/include/asm/uaccess.h | 2 +- arch/openrisc/include/asm/uaccess.h | 2 +- arch/parisc/include/asm/uaccess.h | 2 +- arch/powerpc/include/asm/uaccess.h | 3 +-- arch/riscv/include/asm/uaccess.h | 4 +--- arch/s390/include/asm/uaccess.h | 2 +- arch/sh/include/asm/segment.h | 3 +-- arch/sparc/include/asm/uaccess_32.h | 2 +- arch/sparc/include/asm/uaccess_64.h | 2 +- arch/x86/include/asm/uaccess.h | 2 +- arch/xtensa/include/asm/uaccess.h | 2 +- include/asm-generic/uaccess.h | 4 ++-- include/linux/uaccess.h | 2 -- 24 files changed, 25 insertions(+), 32 deletions(-) diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 1fe2b56cb861..1b6f25efa247 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -20,7 +20,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * Is a address valid? This does a straightforward calculation rather diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h index 6a2a5be5026d..871f8ab11bfd 100644 --- a/arch/arc/include/asm/segment.h +++ b/arch/arc/include/asm/segment.h @@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t; #define KERNEL_DS MAKE_MM_SEG(0) #define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #endif /* __ASSEMBLY__ */ #endif /* __ASMARC_SEGMENT_H */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b5fdd30252f8..a13d90206472 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs) modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * We use 33-bit arithmetic here. Success returns zero, failure returns @@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long); */ #define USER_DS KERNEL_DS -#define segment_eq(a, b) (1) +#define uaccess_kernel() (true) #define __addr_ok(addr) ((void)(addr), 1) #define __range_ok(addr, size) ((void)(addr), 0) #define get_fs() (KERNEL_DS) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8d7c466f809b..991dd5f031e4 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs) CONFIG_ARM64_UAO)); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * Test whether a block of memory is a valid user space address. diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h index db2640d5f575..79ede9b1a646 100644 --- a/arch/csky/include/asm/segment.h +++ b/arch/csky/include/asm/segment.h @@ -13,6 +13,6 @@ typedef struct { #define USER_DS ((mm_segment_t) { 0x80000000UL }) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASM_CSKY_SEGMENT_H */ diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h index a407978f9f9f..37950725d9b9 100644 --- a/arch/h8300/include/asm/segment.h +++ b/arch/h8300/include/asm/segment.h @@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void) return USER_DS; } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 8aa473a4b0f4..179243c3dfc7 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -50,7 +50,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * When accessing user memory, we need to make sure the entire area really is in diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h index c6686559e9b7..2b5e68a71ef7 100644 --- a/arch/m68k/include/asm/segment.h +++ b/arch/m68k/include/asm/segment.h @@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val) #define set_fs(x) (current_thread_info()->addr_limit = (x)) #endif -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 6723c56ec378..304b04ffea2f 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -41,7 +41,7 @@ # define get_fs() (current_thread_info()->addr_limit) # define set_fs(val) (current_thread_info()->addr_limit = (val)) -# define segment_eq(a, b) ((a).seg == (b).seg) +# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #ifndef CONFIG_MMU diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 62b298c50905..61fc01f177a6 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -72,7 +72,7 @@ extern u64 __ua_limit; #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * eva_kernel_access() - determine whether kernel memory access on an EVA system diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 3a9219f53ee0..010ba5f1d7dd 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f9..a741abbed6fb 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -30,7 +30,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(seg) (current_thread_info()->addr_limit = (seg)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __access_ok(addr, len) \ (((signed long)(((long)get_fs().seg) & \ diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..48b691530d3e 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -43,7 +43,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* Ensure that the range from addr to addr+size is all within the process' * address space diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebbb9ffe038c..ed2cd4fb479b 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -14,7 +14,7 @@ #define KERNEL_DS ((mm_segment_t){0}) #define USER_DS ((mm_segment_t){1}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 64c04ab09112..00699903f1ef 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) #ifdef __powerpc64__ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 22de922d6ecb..f56c66b3f5fe 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -64,11 +64,9 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) - /** * access_ok: - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 324438889fe1..f09444d6aeab 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define USER_DS_SACF (3) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) +#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) void set_fs(mm_segment_t fs); diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h index 33d1d28057cb..02e54a3335d6 100644 --- a/arch/sh/include/asm/segment.h +++ b/arch/sh/include/asm/segment.h @@ -24,8 +24,7 @@ typedef struct { #define USER_DS KERNEL_DS #endif -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index d6d8413eca83..0a2d3ebc4bb8 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -28,7 +28,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) ((current->thread.current_ds) = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test * can be fairly lightweight. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index bf9d330073b2..698cf69f74e9 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -32,7 +32,7 @@ #define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define set_fs(val) \ do { \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2f3e8f2a958f..ecefaffd15d4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (current->thread.addr_limit.seg) /* diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index e57f0d0a88d8..b9758119feca 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -35,7 +35,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) (current->thread.current_ds = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __kernel_ok (uaccess_kernel()) #define __user_ok(addr, size) \ diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index e935318804f8..ba68ee4dabfa 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -86,8 +86,8 @@ static inline void set_fs(mm_segment_t fs) } #endif -#ifndef segment_eq -#define segment_eq(a, b) ((a).seg == (b).seg) +#ifndef uaccess_kernel +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif #define access_ok(addr, size) __access_ok((unsigned long)(addr),(size)) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 0a76ddc07d59..5c62d0c6f15b 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -6,8 +6,6 @@ #include #include -#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) - #include /* -- cgit From 3d13f313ce4c34c524ccc37986fe77172f601ff3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:47 -0700 Subject: uaccess: add force_uaccess_{begin,end} helpers Add helpers to wrap the get_fs/set_fs magic for undoing any damange done by set_fs(KERNEL_DS). There is no real functional benefit, but this documents the intent of these calls better, and will allow stubbing the functions out easily for kernels builds that do not allow address space overrides in the future. [hch@lst.de: drop two incorrect hunks, fix a commit log typo] Link: http://lkml.kernel.org/r/20200714105505.935079-6-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Mark Rutland Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven Cc: Nick Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Link: http://lkml.kernel.org/r/20200710135706.537715-6-hch@lst.de Signed-off-by: Linus Torvalds --- arch/arm64/kernel/sdei.c | 2 +- arch/m68k/include/asm/tlbflush.h | 6 +++--- arch/mips/kernel/unaligned.c | 27 +++++++++++++-------------- arch/nds32/mm/alignment.c | 7 +++---- arch/sh/kernel/traps_32.c | 12 +++++------- drivers/firmware/arm_sdei.c | 5 ++--- include/linux/uaccess.h | 18 ++++++++++++++++++ kernel/events/callchain.c | 5 ++--- kernel/events/core.c | 5 ++--- kernel/kthread.c | 5 ++--- kernel/stacktrace.c | 5 ++--- mm/maccess.c | 22 ++++++++++------------ 12 files changed, 63 insertions(+), 56 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index dab88260b137..7689f2031c0c 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, /* * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s set_fs(USER_DS) call. + * by sdei_event_handler()s force_uaccess_begin() call. */ __uaccess_enable_hw_pan(); diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h index 191e75a6bb24..5337bc2c262f 100644 --- a/arch/m68k/include/asm/tlbflush.h +++ b/arch/m68k/include/asm/tlbflush.h @@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (vma->vm_mm == current->active_mm) { - mm_segment_t old_fs = get_fs(); - set_fs(USER_DS); + mm_segment_t old_fs = force_uaccess_begin(); + __flush_tlb_one(addr); - set_fs(old_fs); + force_uaccess_end(old_fs); } } diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 0adce604fa44..126a5f3f4e4c 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs, * memory, so we need to "switch" the address limit to * user space, so that address check can work properly. */ - seg = get_fs(); - set_fs(USER_DS); + seg = force_uaccess_begin(); switch (insn.spec3_format.func) { case lhe_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lwe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lhue_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWUE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case she_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; case swe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; default: - set_fs(seg); + force_uaccess_end(seg); goto sigill; } - set_fs(seg); + force_uaccess_end(seg); } #endif break; diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index c8b9061a2ee3..1eb7ded6992b 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) { unsigned long inst; int ret = -EFAULT; - mm_segment_t seg = get_fs(); + mm_segment_t seg; inst = get_inst(regs->ipc); @@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - set_fs(USER_DS); - + seg = force_uaccess_begin(); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0xffff, regs); else ret = do_32(inst, regs); - set_fs(seg); + force_uaccess_end(seg); return ret; } diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 058c6181bb30..b62ad0ba2395 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs, error_code = lookup_exception_vector(); #endif - oldfs = get_fs(); - if (user_mode(regs)) { int si_code = BUS_ADRERR; unsigned int user_action; @@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs, local_irq_enable(); inc_unaligned_user_access(); - set_fs(USER_DS); + oldfs = force_uaccess_begin(); if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), sizeof(instruction))) { - set_fs(oldfs); + force_uaccess_end(oldfs); goto uspace_segv; } - set_fs(oldfs); + force_uaccess_end(oldfs); /* shout about userspace fixups */ unaligned_fixups_notify(current, instruction, regs); @@ -520,11 +518,11 @@ fixup: goto uspace_segv; } - set_fs(USER_DS); + oldfs = force_uaccess_begin(); tmp = handle_unaligned_access(instruction, regs, &user_mem_access, 0, address); - set_fs(oldfs); + force_uaccess_end(oldfs); if (tmp == 0) return; /* sorted */ diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index e7e36aab2386..b4b9ce97f415 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1136,15 +1136,14 @@ int sdei_event_handler(struct pt_regs *regs, * access kernel memory. * Do the same here because this doesn't come via the same entry code. */ - orig_addr_limit = get_fs(); - set_fs(USER_DS); + orig_addr_limit = force_uaccess_begin(); err = arg->callback(event_num, regs, arg->callback_arg); if (err) pr_err_ratelimited("event %u on CPU %u failed with error: %d\n", event_num, smp_processor_id(), err); - set_fs(orig_addr_limit); + force_uaccess_end(orig_addr_limit); return err; } diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5c62d0c6f15b..94b285411659 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -8,6 +8,24 @@ #include +/* + * Force the uaccess routines to be wired up for actual userspace access, + * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone + * using force_uaccess_end below. + */ +static inline mm_segment_t force_uaccess_begin(void) +{ + mm_segment_t fs = get_fs(); + + set_fs(USER_DS); + return fs; +} + +static inline void force_uaccess_end(mm_segment_t oldfs) +{ + set_fs(oldfs); +} + /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c6ce894e4ce9..58cbe357fb2b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,10 +217,9 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); perf_callchain_user(&ctx, regs); - set_fs(fs); + force_uaccess_end(fs); } } diff --git a/kernel/events/core.c b/kernel/events/core.c index d1f0a7e5b182..6961333ebad5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6453,10 +6453,9 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, /* Data. */ sp = perf_user_stack_pointer(regs); - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); rem = __output_copy_user(handle, (void *) sp, dump_size); - set_fs(fs); + force_uaccess_end(fs); dyn_size = dump_size - rem; perf_output_skip(handle, rem); diff --git a/kernel/kthread.c b/kernel/kthread.c index b2807e7be772..3edaa380dc7b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1258,8 +1258,7 @@ void kthread_use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); - to_kthread(tsk)->oldfs = get_fs(); - set_fs(USER_DS); + to_kthread(tsk)->oldfs = force_uaccess_begin(); } EXPORT_SYMBOL_GPL(kthread_use_mm); @@ -1274,7 +1273,7 @@ void kthread_unuse_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(!tsk->mm); - set_fs(to_kthread(tsk)->oldfs); + force_uaccess_end(to_kthread(tsk)->oldfs); task_lock(tsk); sync_mm_rss(mm); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 2af66e449aa6..946f44a9e86a 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -233,10 +233,9 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) if (current->flags & PF_KTHREAD) return 0; - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); arch_stack_walk_user(consume_entry, &c, task_pt_regs(current)); - set_fs(fs); + force_uaccess_end(fs); return c.len; } diff --git a/mm/maccess.c b/mm/maccess.c index f98ff91e32c6..3bd70405f2d8 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -205,15 +205,14 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs = force_uaccess_begin(); - set_fs(USER_DS); if (access_ok(src, size)) { pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); } - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret) return -EFAULT; @@ -233,15 +232,14 @@ EXPORT_SYMBOL_GPL(copy_from_user_nofault); long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs = force_uaccess_begin(); - set_fs(USER_DS); if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret) return -EFAULT; @@ -270,17 +268,17 @@ EXPORT_SYMBOL_GPL(copy_to_user_nofault); long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs; long ret; if (unlikely(count <= 0)) return 0; - set_fs(USER_DS); + old_fs = force_uaccess_begin(); pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret >= count) { ret = count; @@ -310,14 +308,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs; int ret; - set_fs(USER_DS); + old_fs = force_uaccess_begin(); pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); - set_fs(old_fs); + force_uaccess_end(old_fs); return ret; } -- cgit From fe81417596fa8b6577fedb7e206ff3e4c7015c13 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:50 -0700 Subject: exec: use force_uaccess_begin during exec and exit Both exec and exit want to ensure that the uaccess routines actually do access user pointers. Use the newly added force_uaccess_begin helper instead of an open coded set_fs for that to prepare for kernel builds where set_fs() does not exist. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200710135706.537715-7-hch@lst.de Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ++++++- kernel/exit.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 3698252719a3..29ef78ae9f50 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1402,7 +1402,12 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out_unlock; - set_fs(USER_DS); + /* + * Ensure that the uaccess routines can actually operate on userspace + * pointers: + */ + force_uaccess_begin(); + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); diff --git a/kernel/exit.c b/kernel/exit.c index e731c414e024..c2d2961576f2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -732,7 +732,7 @@ void __noreturn do_exit(long code) * mm_release()->clear_child_tid() from writing to a user-controlled * kernel address. */ - set_fs(USER_DS); + force_uaccess_begin(); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", -- cgit From bd72866b8da499e60633ff28f8a4f6e09ca78efe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 11 Aug 2020 18:33:54 -0700 Subject: alpha: fix annotation of io{read,write}{16,32}be() These accessors must be used to read/write a big-endian bus. The value returned or written is native-endian. However, these accessors are defined using be{16,32}_to_cpu() or cpu_to_be{16,32}() to make the endian conversion but these expect a __be{16,32} when none is present. Keeping them would need a force cast that would solve nothing at all. So, do the conversion using swab{16,32}, like done in asm-generic for similar situations. Reported-by: kernel test robot Signed-off-by: Luc Van Oostenryck Signed-off-by: Andrew Morton Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Stephen Boyd Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/20200622114232.80039-1-luc.vanoostenryck@gmail.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/io.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index a4d0c19f1e79..640e1a2f57b4 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) } #endif -#define ioread16be(p) be16_to_cpu(ioread16(p)) -#define ioread32be(p) be32_to_cpu(ioread32(p)) -#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p)) -#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p)) +#define ioread16be(p) swab16(ioread16(p)) +#define ioread32be(p) swab32(ioread32(p)) +#define iowrite16be(v,p) iowrite16(swab16(v), (p)) +#define iowrite32be(v,p) iowrite32(swab32(v), (p)) #define inb_p inb #define inw_p inw -- cgit From c5f748e2f2ad970078de11bd6b12bcd81147c636 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:57 -0700 Subject: include/linux/compiler-clang.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Nathan Chancellor Link: http://lkml.kernel.org/r/6a18c301-3505-742f-4dd7-0f38d0e537b9@infradead.org Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 8a072d00e688..cee0c728d39a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -40,7 +40,7 @@ #endif /* - * Not all versions of clang implement the the type-generic versions + * Not all versions of clang implement the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements * __has_builtin allowing us to avoid awkward version * checks. Unfortunately, we don't know which version of gcc clang -- cgit From cd1a406fa46f81b1a859ef874b8413a6fa6ac7e8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:00 -0700 Subject: include/linux/exportfs.h: drop duplicated word in a comment Drop the doubled word "a" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Alexander Viro Link: http://lkml.kernel.org/r/c61b707a-8fd8-5b1b-aab0-679122881543@infradead.org Signed-off-by: Linus Torvalds --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d896b8657085..3ceb72b67a7a 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -178,7 +178,7 @@ struct fid { * get_name: * @get_name should find a name for the given @child in the given @parent * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a a %NAME_MAX+1 sized + * understanding that it is already pointing to a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code * or error. @get_name will be called without @parent->i_mutex held. * -- cgit From 121ae8da9cd49f7139bf80f26352337458e63fe1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:04 -0700 Subject: include/linux/async_tx.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Dan Williams Link: http://lkml.kernel.org/r/e85802f7-8f48-8b4c-29b3-ea237a2c7ae9@infradead.org Signed-off-by: Linus Torvalds --- include/linux/async_tx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 75e582b8d2d9..4c328fef403c 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -36,7 +36,7 @@ struct dma_chan_ref { /** * async_tx_flags - modifiers for the async_* calls * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the - * the destination address is not a source. The asynchronous case handles this + * destination address is not a source. The asynchronous case handles this * implicitly, the synchronous case needs to zero the destination block. * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is * also one of the source addresses. In the synchronous case the destination -- cgit From f48ff83e9c1a8fc2e8bfedb4855933eb1ed159b2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:07 -0700 Subject: include/linux/xz.h: drop duplicated word Drop the doubled word "than" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Cc: Lasse Collin Link: http://lkml.kernel.org/r/05ebba7a-c1e4-01ae-fc7b-15c081b33f3e@infradead.org Signed-off-by: Linus Torvalds --- include/linux/xz.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/xz.h b/include/linux/xz.h index 64cffa6ddfce..24ad7d875977 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -28,7 +28,7 @@ * enum xz_mode - Operation mode * * @XZ_SINGLE: Single-call mode. This uses less RAM than - * than multi-call modes, because the LZMA2 + * multi-call modes, because the LZMA2 * dictionary doesn't need to be allocated as * part of the decoder state. All required data * structures are allocated at initialization, -- cgit From 8043fc147a97ec2eefc582487f344f2cbe86d12e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:34:10 -0700 Subject: kernel: add a kernel_wait helper Add a helper that waits for a pid and stores the status in the passed in kernel pointer. Use it to fix the usage of kernel_wait4 in call_usermodehelper_exec_sync that only happens to work due to the implicit set_fs(KERNEL_DS) for kernel threads. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: "Eric W. Biederman" Cc: Luis Chamberlain Link: http://lkml.kernel.org/r/20200721130449.5008-1-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/sched/task.h | 1 + kernel/exit.c | 16 ++++++++++++++++ kernel/umh.c | 29 ++++------------------------- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ae3060f0b0c9..a98965007eef 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,6 +88,7 @@ struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); +int kernel_wait(pid_t pid, int *stat); extern void free_task(struct task_struct *tsk); diff --git a/kernel/exit.c b/kernel/exit.c index c2d2961576f2..733e80f334e7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1626,6 +1626,22 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options, return ret; } +int kernel_wait(pid_t pid, int *stat) +{ + struct wait_opts wo = { + .wo_type = PIDTYPE_PID, + .wo_pid = find_get_pid(pid), + .wo_flags = WEXITED, + }; + int ret; + + ret = do_wait(&wo); + if (ret > 0 && wo.wo_stat) + *stat = wo.wo_stat; + put_pid(wo.wo_pid); + return ret; +} + SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, int, options, struct rusage __user *, ru) { diff --git a/kernel/umh.c b/kernel/umh.c index a25433f9cd9a..fcf3ee803630 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -119,37 +119,16 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) { pid_t pid; - /* If SIGCLD is ignored kernel_wait4 won't populate the status. */ + /* If SIGCLD is ignored do_wait won't populate the status. */ kernel_sigaction(SIGCHLD, SIG_DFL); pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); - if (pid < 0) { + if (pid < 0) sub_info->retval = pid; - } else { - int ret = -ECHILD; - /* - * Normally it is bogus to call wait4() from in-kernel because - * wait4() wants to write the exit code to a userspace address. - * But call_usermodehelper_exec_sync() always runs as kernel - * thread (workqueue) and put_user() to a kernel address works - * OK for kernel threads, due to their having an mm_segment_t - * which spans the entire address space. - * - * Thus the __user pointer cast is valid here. - */ - kernel_wait4(pid, (int __user *)&ret, 0, NULL); - - /* - * If ret is 0, either call_usermodehelper_exec_async failed and - * the real error code is already in sub_info->retval or - * sub_info->retval is 0 anyway, so don't mess with it then. - */ - if (ret) - sub_info->retval = ret; - } + else + kernel_wait(pid, &sub_info->retval); /* Restore default kernel sig handler */ kernel_sigaction(SIGCHLD, SIG_IGN); - umh_complete(sub_info); } -- cgit From 09c60546f04f732d194a171b3a4ccc9ae1e704ba Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 11 Aug 2020 18:34:13 -0700 Subject: ./Makefile: add debug option to enable function aligned on 32 bytes Recently 0day reported many strange performance changes (regression or improvement), in which there was no obvious relation between the culprit commit and the benchmark at the first look, and it causes people to doubt the test itself is wrong. Upon further check, many of these cases are caused by the change to the alignment of kernel text or data, as whole text/data of kernel are linked together, change in one domain may affect alignments of other domains. gcc has an option '-falign-functions=n' to force text aligned, and with that option enabled, some of those performance changes will be gone, like [1][2][3]. Add this option so that developers and 0day can easily find performance bump caused by text alignment change, as tracking these strange bump is quite time consuming. Though it can't help in other cases like data alignment changes like [4]. Following is some size data for v5.7 kernel built with a RHEL config used in 0day: text data bss dec filename 19738771 13292906 5554236 38585913 vmlinux.noalign 19758591 13297002 5529660 38585253 vmlinux.align32 Raw vmlinux size in bytes: v5.7 v5.7+align32 253950832 254018000 +0.02% Some benchmark data, most of them have no big change: * hackbench: [ -1.8%, +0.5%] * fsmark: [ -3.2%, +3.4%] # ext4/xfs/btrfs * kbuild: [ -2.0%, +0.9%] * will-it-scale: [ -0.5%, +1.8%] # mmap1/pagefault3 * netperf: - TCP_CRR [+16.6%, +97.4%] - TCP_RR [-18.5%, -1.8%] - TCP_STREAM [ -1.1%, +1.9%] [1] https://lore.kernel.org/lkml/20200114085637.GA29297@shao2-debian/ [2] https://lore.kernel.org/lkml/20200330011254.GA14393@feng-iot/ [3] https://lore.kernel.org/lkml/1d98d1f0-fe84-6df7-f5bd-f4cb2cdb7f45@intel.com/ [4] https://lore.kernel.org/lkml/20200205123216.GO12867@shao2-debian/ Signed-off-by: Feng Tang Signed-off-by: Andrew Morton Cc: Masahiro Yamada Cc: Michal Marek Cc: Andi Kleen Cc: Huang Ying Cc: Andy Shevchenko Link: http://lkml.kernel.org/r/1595475001-90945-1-git-send-email-feng.tang@intel.com Signed-off-by: Linus Torvalds --- Makefile | 4 ++++ lib/Kconfig.debug | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/Makefile b/Makefile index 168dd19cad7c..254e80a96b23 100644 --- a/Makefile +++ b/Makefile @@ -893,6 +893,10 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif +ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B +KBUILD_CFLAGS += -falign-functions=32 +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a164785c3b48..ba12a58b2947 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -365,6 +365,17 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. +config DEBUG_FORCE_FUNCTION_ALIGN_32B + bool "Force all function address 32B aligned" if EXPERT + help + There are cases that a commit from one domain changes the function + address alignment of other domains, and cause magic performance + bump (regression or improvement). Enable this option will help to + verify if the bump is caused by function alignment changes, while + it will slightly increase the kernel size and affect icache usage. + + It is mainly for debug and performance tuning use. + # # Select this config option from the architecture Kconfig, if it # is preferred to always offer frame pointers as a config -- cgit From 376653435dacf84a8aca87e66aff94079a817cf2 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Aug 2020 18:34:16 -0700 Subject: kernel.h: remove duplicate include of asm/div64.h This seems to have been added inadvertently in commit 72deb455b5ec ("block: remove CONFIG_LBDAF") Fixes: 72deb455b5ec ("block: remove CONFIG_LBDAF") Signed-off-by: Arvind Sankar Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Link: http://lkml.kernel.org/r/20200727034852.2813453-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7339a00c895e..e19c13616666 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,7 +17,6 @@ #include #include #include -#include #define STACK_MAGIC 0xdeadbeef -- cgit From 7f317d34906c1033f0752fc137dda04e43979bb8 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 11 Aug 2020 18:34:19 -0700 Subject: include/: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Link: http://lkml.kernel.org/r/20200726110117.16346-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds --- include/clocksource/timer-ti-dm.h | 2 +- include/linux/btree.h | 2 +- include/linux/delay.h | 2 +- include/linux/dma/k3-psil.h | 2 +- include/linux/dma/k3-udma-glue.h | 2 +- include/linux/dma/ti-cppi5.h | 2 +- include/linux/irqchip/irq-omap-intc.h | 2 +- include/linux/jhash.h | 2 +- include/linux/leds-ti-lmu-common.h | 2 +- include/linux/platform_data/davinci-cpufreq.h | 2 +- include/linux/platform_data/davinci_asp.h | 2 +- include/linux/platform_data/elm.h | 2 +- include/linux/platform_data/gpio-davinci.h | 2 +- include/linux/platform_data/gpmc-omap.h | 2 +- include/linux/platform_data/mtd-davinci-aemif.h | 2 +- include/linux/platform_data/omap-twl4030.h | 2 +- include/linux/platform_data/uio_pruss.h | 2 +- include/linux/platform_data/usb-omap.h | 2 +- include/linux/soc/ti/k3-ringacc.h | 2 +- include/linux/soc/ti/knav_qmss.h | 2 +- include/linux/soc/ti/ti-msgmgr.h | 2 +- include/linux/wkup_m3_ipc.h | 2 +- include/linux/xxhash.h | 2 +- include/linux/xz.h | 2 +- include/linux/zlib.h | 2 +- include/soc/arc/aux.h | 2 +- include/uapi/linux/elf.h | 2 +- include/uapi/linux/map_to_7segment.h | 2 +- include/uapi/linux/types.h | 2 +- include/uapi/linux/usb/ch9.h | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h index 531ca87fcd08..4c61dade8835 100644 --- a/include/clocksource/timer-ti-dm.h +++ b/include/clocksource/timer-ti-dm.h @@ -1,7 +1,7 @@ /* * OMAP Dual-Mode Timers * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2010 Texas Instruments Incorporated - https://www.ti.com/ * Tarun Kanti DebBarma * Thara Gopinath * diff --git a/include/linux/btree.h b/include/linux/btree.h index 68f858c831b1..243ee544397a 100644 --- a/include/linux/btree.h +++ b/include/linux/btree.h @@ -10,7 +10,7 @@ * * A B+Tree is a data structure for looking up arbitrary (currently allowing * unsigned long, u32, u64 and 2 * u64) keys into pointers. The data structure - * is described at http://en.wikipedia.org/wiki/B-tree, we currently do not + * is described at https://en.wikipedia.org/wiki/B-tree, we currently do not * use binary search to find the key on lookups. * * Each B+Tree consists of a head, that contains bookkeeping information and diff --git a/include/linux/delay.h b/include/linux/delay.h index 5e016a4029d9..1d0e2ce6b6d9 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -16,7 +16,7 @@ * 3. CPU clock rate changes. * * Please see this thread: - * http://lists.openwall.net/linux-kernel/2011/01/09/56 + * https://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include diff --git a/include/linux/dma/k3-psil.h b/include/linux/dma/k3-psil.h index 61d5cc0ad601..1962f75fa2d3 100644 --- a/include/linux/dma/k3-psil.h +++ b/include/linux/dma/k3-psil.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_PSIL_H_ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index caadbab1632a..5eb34ad973a7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_UDMA_GLUE_H_ diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 579356ae447e..5896441ee604 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -2,7 +2,7 @@ /* * CPPI5 descriptors interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __TI_CPPI5_H__ diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h index 216e5adf80ce..dca379c0d7eb 100644 --- a/include/linux/irqchip/irq-omap-intc.h +++ b/include/linux/irqchip/irq-omap-intc.h @@ -2,7 +2,7 @@ /** * irq-omap-intc.h - INTC Idle Functions * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * * Author: Felipe Balbi */ diff --git a/include/linux/jhash.h b/include/linux/jhash.h index ba2f6a9776b6..19ddd43aee68 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -5,7 +5,7 @@ * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * - * http://burtleburtle.net/bob/hash/ + * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * diff --git a/include/linux/leds-ti-lmu-common.h b/include/linux/leds-ti-lmu-common.h index 5eb111f38803..420b61e5a213 100644 --- a/include/linux/leds-ti-lmu-common.h +++ b/include/linux/leds-ti-lmu-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ // TI LMU Common Core -// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ +// Copyright (C) 2018 Texas Instruments Incorporated - https://www.ti.com/ #ifndef _TI_LMU_COMMON_H_ #define _TI_LMU_COMMON_H_ diff --git a/include/linux/platform_data/davinci-cpufreq.h b/include/linux/platform_data/davinci-cpufreq.h index 3fbf9f2793b5..bc208c64e3d7 100644 --- a/include/linux/platform_data/davinci-cpufreq.h +++ b/include/linux/platform_data/davinci-cpufreq.h @@ -2,7 +2,7 @@ /* * TI DaVinci CPUFreq platform support. * - * Copyright (C) 2009 Texas Instruments, Inc. http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments, Inc. https://www.ti.com/ */ #ifndef _MACH_DAVINCI_CPUFREQ_H diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index 7fe80f1c7e08..5d1fb0d78a22 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -1,7 +1,7 @@ /* * TI DaVinci Audio Serial Port support * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 0f491d8abfdd..3cc78f0447b1 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -2,7 +2,7 @@ /* * BCH Error Location Module * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef __ELM_H diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index a93841bfb9f7..e182a46e609f 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -1,7 +1,7 @@ /* * DaVinci GPIO Platform Related Defines * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index ef663e570552..c9cc4e32435d 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -2,7 +2,7 @@ /* * OMAP GPMC Platform data * - * Copyright (C) 2014 Texas Instruments, Inc. - http://www.ti.com + * Copyright (C) 2014 Texas Instruments, Inc. - https://www.ti.com * Roger Quadros */ diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h index a403dd51dacc..a49826214a39 100644 --- a/include/linux/platform_data/mtd-davinci-aemif.h +++ b/include/linux/platform_data/mtd-davinci-aemif.h @@ -1,7 +1,7 @@ /* * TI DaVinci AEMIF support * - * Copyright 2010 (C) Texas Instruments, Inc. http://www.ti.com/ + * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/ * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 8419c8caf54e..0dd851ea1c72 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -3,7 +3,7 @@ * omap-twl4030.h - ASoC machine driver for TI SoC based boards with twl4030 * codec, header. * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * All rights reserved. * * Author: Peter Ujfalusi diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h index 3d47d219827f..31f2e22661bc 100644 --- a/include/linux/platform_data/uio_pruss.h +++ b/include/linux/platform_data/uio_pruss.h @@ -3,7 +3,7 @@ * * Platform data for uio_pruss driver * - * Copyright (C) 2010-11 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2010-11 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index fa579b4c666b..5e70d667031c 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -1,7 +1,7 @@ /* * usb-omap.h - Platform data for the various OMAP USB IPs * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * * This software is distributed under the terms of the GNU General Public * License ("GPL") version 2, as published by the Free Software Foundation. diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 7ac115432fa1..5a472eca5ee4 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -2,7 +2,7 @@ /* * K3 Ring Accelerator (RA) subsystem interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __SOC_TI_K3_RINGACC_API_H_ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9745df6ed9d3..c75ef99c99ca 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -1,7 +1,7 @@ /* * Keystone Navigator Queue Management Sub-System header * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * Author: Sandeep Nair * Cyril Chemparathy * Santosh Shilimkar diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h index eac8e0c6fe11..1f6e76d423cf 100644 --- a/include/linux/soc/ti/ti-msgmgr.h +++ b/include/linux/soc/ti/ti-msgmgr.h @@ -1,7 +1,7 @@ /* * Texas Instruments' Message Manager * - * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015-2016 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h index e497e621dbb7..3f496967b538 100644 --- a/include/linux/wkup_m3_ipc.h +++ b/include/linux/wkup_m3_ipc.h @@ -1,7 +1,7 @@ /* * TI Wakeup M3 for AMx3 SoCs Power Management Routines * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Dave Gerlach * * This program is free software; you can redistribute it and/or diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index 52b073fea17f..df42511438d0 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -34,7 +34,7 @@ * ("BSD"). * * You can contact the author at: - * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ diff --git a/include/linux/xz.h b/include/linux/xz.h index 24ad7d875977..9884c8440188 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -2,7 +2,7 @@ * XZ decompressor * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/include/linux/zlib.h b/include/linux/zlib.h index c757d848a758..78ede944c082 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -23,7 +23,7 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + Comments) 1950 to 1952 in the files https://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ diff --git a/include/soc/arc/aux.h b/include/soc/arc/aux.h index e223c4ffa153..9c2eff6140b6 100644 --- a/include/soc/arc/aux.h +++ b/include/soc/arc/aux.h @@ -22,7 +22,7 @@ static inline int read_aux_reg(u32 r) /* * function helps elide unused variable warning - * see: http://lists.infradead.org/pipermail/linux-snps-arc/2016-November/001748.html + * see: https://lists.infradead.org/pipermail/linux-snps-arc/2016-November/001748.html */ static inline void write_aux_reg(u32 r, u32 v) { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c6dd0215482e..22220945a5fd 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -53,7 +53,7 @@ typedef __s64 Elf64_Sxword; * * - Oracle: Linker and Libraries. * Part No: 817–1984–19, August 2011. - * http://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf * * - System V ABI AMD64 Architecture Processor Supplement * Draft Version 0.99.4, diff --git a/include/uapi/linux/map_to_7segment.h b/include/uapi/linux/map_to_7segment.h index f9ed18134b83..13a06e5e966e 100644 --- a/include/uapi/linux/map_to_7segment.h +++ b/include/uapi/linux/map_to_7segment.h @@ -24,7 +24,7 @@ * of (ASCII) characters to a 7-segments notation. * * The 7 segment's wikipedia notation below is used as standard. - * See: http://en.wikipedia.org/wiki/Seven_segment_display + * See: https://en.wikipedia.org/wiki/Seven_segment_display * * Notation: +-a-+ * f b diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 2fce8b6876e9..f6d2f83cbe29 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -7,7 +7,7 @@ #ifndef __ASSEMBLY__ #ifndef __KERNEL__ #ifndef __EXPORTED_HEADERS__ -#warning "Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders" +#warning "Attempt to use kernel headers from user space, see https://kernelnewbies.org/KernelHeaders" #endif /* __EXPORTED_HEADERS__ */ #endif diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 48766fdf6580..0f865ae4ba89 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -1229,7 +1229,7 @@ struct usb_set_sel_req { * As per USB compliance update, a device that is actively drawing * more than 100mA from USB must report itself as bus-powered in * the GetStatus(DEVICE) call. - * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 + * https://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 */ #define USB_SELF_POWER_VBUS_MAX_DRAW 100 -- cgit From 9e58c5e2fcd8d8d8820ea277e0f577f563eed1f1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 11 Aug 2020 18:34:23 -0700 Subject: include/linux/poison.h: remove obsolete comment When the definition was changed, the comment became stale. Just remove it since there isn't anything useful to say here. Fixes: b8a0255db958 ("include/linux/poison.h: use POISON_POINTER_DELTA for poison pointers") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Vasily Kulikov Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200730174108.GJ23808@casper.infradead.org Signed-off-by: Linus Torvalds --- include/linux/poison.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/poison.h b/include/linux/poison.h index df34330b4e34..dc8ae5d8db03 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -24,10 +24,6 @@ #define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ -/* - * Magic number "tsta" to indicate a static timer initializer - * for the object debugging code. - */ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ -- cgit From 25fd529c34d063d1bef23742f2e8f8341c639dc3 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 11 Aug 2020 18:34:26 -0700 Subject: sparse: group the defines by functionality By popular demand, reorder the defines for sparse annotations and group them by functionality. Signed-off-by: Luc Van Oostenryck Signed-off-by: Andrew Morton Acked-by: Miguel Ojeda Cc: Geert Uytterhoeven Link: lore.kernel.org/r/CAMuHMdWQsirja-h3wBcZezk+H2Q_HShhAks8Hc8ps5fTAp=ObQ@mail.gmail.com Link: http://lkml.kernel.org/r/20200621143652.53798-1-luc.vanoostenryck@gmail.com Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 44 ++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2e231ba8fe3f..4b33cb385f96 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,48 +5,54 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ +/* address spaces */ # define __kernel __attribute__((address_space(0))) # define __user __attribute__((noderef, address_space(__user))) -# define __safe __attribute__((safe)) -# define __force __attribute__((force)) -# define __nocast __attribute__((nocast)) # define __iomem __attribute__((noderef, address_space(__iomem))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) +extern void __chk_user_ptr(const volatile void __user *); +extern void __chk_io_ptr(const volatile void __iomem *); +/* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(__percpu))) -# define __rcu __attribute__((noderef, address_space(__rcu))) +/* other */ +# define __force __attribute__((force)) +# define __nocast __attribute__((nocast)) +# define __safe __attribute__((safe)) # define __private __attribute__((noderef)) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) #else /* __CHECKER__ */ +/* address spaces */ +# define __kernel # ifdef STRUCTLEAK_PLUGIN -# define __user __attribute__((user)) +# define __user __attribute__((user)) # else # define __user # endif -# define __kernel -# define __safe -# define __force -# define __nocast # define __iomem -# define __chk_user_ptr(x) (void)0 -# define __chk_io_ptr(x) (void)0 -# define __builtin_warning(x, y...) (1) +# define __percpu +# define __rcu +# define __chk_user_ptr(x) (void)0 +# define __chk_io_ptr(x) (void)0 +/* context/locking */ # define __must_hold(x) # define __acquires(x) # define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 +# define __acquire(x) (void)0 +# define __release(x) (void)0 # define __cond_lock(x,c) (c) -# define __percpu -# define __rcu +/* other */ +# define __force +# define __nocast +# define __safe # define __private # define ACCESS_PRIVATE(p, member) ((p)->member) +# define __builtin_warning(x, y...) (1) #endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ -- cgit From 5959f829a93c18ccf15715768317b58f5d9bf2d4 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 11 Aug 2020 18:34:29 -0700 Subject: lib/bitmap.c: fix bitmap_cut() for partial overlapping case Patch series "lib: Fix bitmap_cut() for overlaps, add test" This patch (of 2): Yury Norov reports that bitmap_cut() will not produce the right outcome if src and dst partially overlap, with src pointing at some location after dst, because the memmove() affects src before we store the bits that we need to keep, that is, the bits preceding the cut -- as long as we the beginning of the cut is not aligned to a long. Fix this by storing those bits before the memmove(). Note that this is just a theoretical concern so far, as the only user of this function, pipapo_drop() from the nftables set back-end implemented in net/netfilter/nft_set_pipapo.c, always supplies entirely overlapping src and dst. Fixes: 2092767168f0 ("bitmap: Introduce bitmap_cut(): cut bits and shift remaining") Reported-by: Yury Norov Signed-off-by: Stefano Brivio Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Pablo Neira Ayuso Link: http://lkml.kernel.org/r/cover.1592155364.git.sbrivio@redhat.com Link: http://lkml.kernel.org/r/003e38d4428cd6091ef00b5b03354f1bd7d9091e.1592155364.git.sbrivio@redhat.com Signed-off-by: Linus Torvalds --- lib/bitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 0364452b1617..c13d859bc7ab 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -212,13 +212,13 @@ void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned long keep = 0, carry; int i; - memmove(dst, src, len * sizeof(*dst)); - if (first % BITS_PER_LONG) { keep = src[first / BITS_PER_LONG] & (~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG)); } + memmove(dst, src, len * sizeof(*dst)); + while (cut--) { for (i = first / BITS_PER_LONG; i < len; i++) { if (i < len - 1) -- cgit From bcb32a1d82614ffb8a56f14c648c3970bcd2cf89 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 11 Aug 2020 18:34:32 -0700 Subject: lib/test_bitmap.c: add test for bitmap_cut() Inspired by an original patch from Yury Norov: introduce a test for bitmap_cut() that also makes sure functionality is as described for partially overlapping src and dst. Signed-off-by: Stefano Brivio Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Pablo Neira Ayuso Cc: Rasmus Villemoes Cc: Yury Norov Link: http://lkml.kernel.org/r/5fc45e6bbd4fa837cd9577f8a0c1d639df90a4ce.1592155364.git.sbrivio@redhat.com Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6b13150667f5..df903c53952b 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -610,6 +610,63 @@ static void __init test_for_each_set_clump8(void) expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump); } +struct test_bitmap_cut { + unsigned int first; + unsigned int cut; + unsigned int nbits; + unsigned long in[4]; + unsigned long expected[4]; +}; + +static struct test_bitmap_cut test_cut[] = { + { 0, 0, 8, { 0x0000000aUL, }, { 0x0000000aUL, }, }, + { 0, 0, 32, { 0xdadadeadUL, }, { 0xdadadeadUL, }, }, + { 0, 3, 8, { 0x000000aaUL, }, { 0x00000015UL, }, }, + { 3, 3, 8, { 0x000000aaUL, }, { 0x00000012UL, }, }, + { 0, 1, 32, { 0xa5a5a5a5UL, }, { 0x52d2d2d2UL, }, }, + { 0, 8, 32, { 0xdeadc0deUL, }, { 0x00deadc0UL, }, }, + { 1, 1, 32, { 0x5a5a5a5aUL, }, { 0x2d2d2d2cUL, }, }, + { 0, 15, 32, { 0xa5a5a5a5UL, }, { 0x00014b4bUL, }, }, + { 0, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, }, + { 15, 15, 32, { 0xa5a5a5a5UL, }, { 0x000125a5UL, }, }, + { 15, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, }, + { 16, 15, 32, { 0xa5a5a5a5UL, }, { 0x0001a5a5UL, }, }, + + { BITS_PER_LONG, BITS_PER_LONG, BITS_PER_LONG, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + }, + { 1, BITS_PER_LONG - 1, BITS_PER_LONG, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + { 0x00000001UL, 0x00000001UL, }, + }, + + { 0, BITS_PER_LONG * 2, BITS_PER_LONG * 2 + 1, + { 0xa5a5a5a5UL, 0x00000001UL, 0x00000001UL, 0x00000001UL }, + { 0x00000001UL, }, + }, + { 16, BITS_PER_LONG * 2 + 1, BITS_PER_LONG * 2 + 1 + 16, + { 0x0000ffffUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL }, + { 0x2d2dffffUL, }, + }, +}; + +static void __init test_bitmap_cut(void) +{ + unsigned long b[5], *in = &b[1], *out = &b[0]; /* Partial overlap */ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cut); i++) { + struct test_bitmap_cut *t = &test_cut[i]; + + memcpy(in, t->in, sizeof(t->in)); + + bitmap_cut(out, in, t->first, t->cut, t->nbits); + + expect_eq_bitmap(t->expected, out, t->nbits); + } +} + static void __init selftest(void) { test_zero_clear(); @@ -623,6 +680,7 @@ static void __init selftest(void) test_bitmap_parselist_user(); test_mem_optimisations(); test_for_each_set_clump8(); + test_bitmap_cut(); } KSTM_MODULE_LOADERS(test_bitmap); -- cgit From 0a650e472d2039a5f768acd82f2a7068bf338dfd Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 11 Aug 2020 18:34:35 -0700 Subject: lib/generic-radix-tree.c: remove unneeded __rcu struct __genradix is defined as having its member 'root' annotated as __rcu. But in the corresponding API RCU is not used. Sparse reports this type mismatch as: lib/generic-radix-tree.c:56:35: warning: incorrect type in initializer (different address spaces) lib/generic-radix-tree.c:56:35: expected struct genradix_root *r lib/generic-radix-tree.c:56:35: got struct genradix_root [noderef] *__val with 6 other ones. So, correct root's type by removing this unneeded __rcu. Signed-off-by: Luc Van Oostenryck Signed-off-by: Andrew Morton Cc: Kent Overstreet Link: http://lkml.kernel.org/r/20200621161745.55396-1-luc.vanoostenryck@gmail.com Signed-off-by: Linus Torvalds --- include/linux/generic-radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 02393c0c98f9..bfd00320c7f3 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -44,7 +44,7 @@ struct genradix_root; struct __genradix { - struct genradix_root __rcu *root; + struct genradix_root *root; }; /* -- cgit From 403f177304354990c36d5a7d125bce2b39bcbe2c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Aug 2020 18:34:38 -0700 Subject: lib/test_bitops: do the full test during module init Currently, the bitops test consists of two parts: one part is executed during module load, the second part during module unload. This is cumbersome for the user, as he has to perform two steps to execute all tests, and is different from most (all?) other tests. Merge the two parts, so both are executed during module load. Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Wei Yang Cc: Jesse Brandeburg Link: http://lkml.kernel.org/r/20200706112900.7097-1-geert@linux-m68k.org Signed-off-by: Linus Torvalds --- lib/test_bitops.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/test_bitops.c b/lib/test_bitops.c index ced25e3a779b..471141ddd691 100644 --- a/lib/test_bitops.c +++ b/lib/test_bitops.c @@ -52,9 +52,9 @@ static unsigned long order_comb_long[][2] = { static int __init test_bitops_startup(void) { - int i; + int i, bit_set; - pr_warn("Loaded test module\n"); + pr_info("Starting bitops test\n"); set_bit(BITOPS_4, g_bitmap); set_bit(BITOPS_7, g_bitmap); set_bit(BITOPS_11, g_bitmap); @@ -81,12 +81,8 @@ static int __init test_bitops_startup(void) order_comb_long[i][0]); } #endif - return 0; -} -static void __exit test_bitops_unstartup(void) -{ - int bit_set; + barrier(); clear_bit(BITOPS_4, g_bitmap); clear_bit(BITOPS_7, g_bitmap); @@ -98,7 +94,13 @@ static void __exit test_bitops_unstartup(void) if (bit_set != BITOPS_LAST) pr_err("ERROR: FOUND SET BIT %d\n", bit_set); - pr_warn("Unloaded test module\n"); + pr_info("Completed bitops test\n"); + + return 0; +} + +static void __exit test_bitops_unstartup(void) +{ } module_init(test_bitops_startup); -- cgit From f36331770406b8e693a3d8d71ab3ccbbeabc7142 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 11 Aug 2020 18:34:41 -0700 Subject: lib/test_lockup.c: make symbol 'test_works' static Fix sparse build warning: lib/test_lockup.c:403:1: warning: symbol '__pcpu_scope_test_works' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200707112252.9047-1-weiyongjun1@huawei.com Signed-off-by: Linus Torvalds --- lib/test_lockup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index ff26f36d729f..0f81252837b9 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -400,7 +400,7 @@ static void test_lockup(bool master) test_unlock(master, true); } -DEFINE_PER_CPU(struct work_struct, test_works); +static DEFINE_PER_CPU(struct work_struct, test_works); static void test_work_fn(struct work_struct *work) { -- cgit From 63646bc9f95f7faf5345e54a59fd1ecdd4ba5316 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:34:44 -0700 Subject: lib/Kconfig.debug: make TEST_LOCKUP depend on module Since test_lockup is a test module to generate lockups, it is better to limit TEST_LOCKUP to module (=m) or disabled (=n) because we can not use the module parameters when CONFIG_TEST_LOCKUP=y. Signed-off-by: Tiezhu Yang Signed-off-by: Andrew Morton Reviewed-by: Guenter Roeck Cc: Konstantin Khlebnikov Cc: Kees Cook Link: http://lkml.kernel.org/r/1595555407-29875-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba12a58b2947..32300eb79d46 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1078,6 +1078,7 @@ config WQ_WATCHDOG config TEST_LOCKUP tristate "Test module to generate lockups" + depends on m help This builds the "test_lockup" module that helps to make sure that watchdogs and lockup detectors are working properly. -- cgit From 3adf3bae0d612357da516d39e1584f1547eb6e86 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:34:47 -0700 Subject: lib/test_lockup.c: fix return value of test_lockup_init() Since filp_open() returns an error pointer, we should use IS_ERR() to check the return value and then return PTR_ERR() if failed to get the actual return value instead of always -EINVAL. E.g. without this patch: [root@localhost loongson]# ls no_such_file ls: cannot access no_such_file: No such file or directory [root@localhost loongson]# modprobe test_lockup file_path=no_such_file lock_sb_umount time_secs=60 state=S modprobe: ERROR: could not insert 'test_lockup': Invalid argument [root@localhost loongson]# dmesg | tail -1 [ 126.100596] test_lockup: cannot find file_path With this patch: [root@localhost loongson]# ls no_such_file ls: cannot access no_such_file: No such file or directory [root@localhost loongson]# modprobe test_lockup file_path=no_such_file lock_sb_umount time_secs=60 state=S modprobe: ERROR: could not insert 'test_lockup': Unknown symbol in module, or unknown parameter (see dmesg) [root@localhost loongson]# dmesg | tail -1 [ 95.134362] test_lockup: failed to open no_such_file: -2 Fixes: aecd42df6d39 ("lib/test_lockup.c: add parameters for locking generic vfs locks") Signed-off-by: Tiezhu Yang Signed-off-by: Andrew Morton Reviewed-by: Guenter Roeck Cc: Konstantin Khlebnikov Cc: Kees Cook Link: http://lkml.kernel.org/r/1595555407-29875-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Linus Torvalds --- lib/test_lockup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index 0f81252837b9..f1a020bcc763 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -512,8 +512,8 @@ static int __init test_lockup_init(void) if (test_file_path[0]) { test_file = filp_open(test_file_path, O_RDONLY, 0); if (IS_ERR(test_file)) { - pr_err("cannot find file_path\n"); - return -EINVAL; + pr_err("failed to open %s: %ld\n", test_file_path, PTR_ERR(test_file)); + return PTR_ERR(test_file); } test_inode = file_inode(test_file); } else if (test_lock_inode || -- cgit From d89775fc929c5a1d91ed518a71b456da0865e5ff Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 11 Aug 2020 18:34:50 -0700 Subject: lib/: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov Signed-off-by: Andrew Morton Acked-by: Coly Li [crc64.c] Link: http://lkml.kernel.org/r/20200726112154.16510-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/crc64.c | 2 +- lib/decompress_bunzip2.c | 2 +- lib/decompress_unlzma.c | 6 +++--- lib/math/rational.c | 2 +- lib/rbtree.c | 2 +- lib/ts_bm.c | 2 +- lib/xxhash.c | 2 +- lib/xz/xz_crc32.c | 2 +- lib/xz/xz_dec_bcj.c | 2 +- lib/xz/xz_dec_lzma2.c | 2 +- lib/xz/xz_lzma2.h | 2 +- lib/xz/xz_stream.h | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 32300eb79d46..5c5fbcb34af4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2215,7 +2215,7 @@ config LIST_KUNIT_TEST and associated macros. KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs + in TAP format (https://testanything.org/). Only useful for kernel devs running the KUnit test harness, and not intended for inclusion into a production build. diff --git a/lib/crc64.c b/lib/crc64.c index f8928ce28280..47cfa054827f 100644 --- a/lib/crc64.c +++ b/lib/crc64.c @@ -4,7 +4,7 @@ * * This is a basic crc64 implementation following ECMA-182 specification, * which can be found from, - * http://www.ecma-international.org/publications/standards/Ecma-182.htm + * https://www.ecma-international.org/publications/standards/Ecma-182.htm * * Dr. Ross N. Williams has a great document to introduce the idea of CRC * algorithm, here the CRC64 code is also inspired by the table-driven diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 7c4932eed748..f9628f3924ce 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -34,7 +34,7 @@ Phone (337) 232-1234 or 1-800-738-2226 Fax (337) 232-1297 - http://www.hospiceacadiana.com/ + https://www.hospiceacadiana.com/ Manuel */ diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index ed7a1fd819f2..1cf409ef8d04 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -8,7 +8,7 @@ *implementation for lzma. *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > * - *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) *Copyright (C) 1999-2005 Igor Pavlov * *Copyrights of the parts, see headers below. @@ -56,7 +56,7 @@ static long long INIT read_int(unsigned char *ptr, int size) /* Small range coder implementation for lzma. *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > * - *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) *Copyright (c) 1999-2005 Igor Pavlov */ @@ -213,7 +213,7 @@ rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) * Small lzma deflate implementation. * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > * - * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) * Copyright (C) 1999-2005 Igor Pavlov */ diff --git a/lib/math/rational.c b/lib/math/rational.c index 31fb27db2deb..df75c8809693 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -27,7 +27,7 @@ * with the fractional part size described in given_denominator. * * for theoretical background, see: - * http://en.wikipedia.org/wiki/Continued_fraction + * https://en.wikipedia.org/wiki/Continued_fraction */ void rational_best_approximation( diff --git a/lib/rbtree.c b/lib/rbtree.c index 8545872e61db..c4ac5c2421f2 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -13,7 +13,7 @@ #include /* - * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree * * 1) A node is either red or black * 2) The root is black diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 277cb4417ac2..4cf250031f0f 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -11,7 +11,7 @@ * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. * Communications of the Association for Computing Machinery, * 20(10), 1977, pp. 762-772. - * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf + * https://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf * * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf diff --git a/lib/xxhash.c b/lib/xxhash.c index aa61e2a3802f..d5bb9ff10607 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -34,7 +34,7 @@ * ("BSD"). * * You can contact the author at: - * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 912aae5fa09e..88a2c35e1b59 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -2,7 +2,7 @@ * CRC32 using the polynomial from IEEE-802.3 * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index a768e6d28bbb..72ddac6ef2ec 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -2,7 +2,7 @@ * Branch/Call/Jump (BCJ) filter decoders * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 156f26fdc4c9..9f336bc07ed6 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -2,7 +2,7 @@ * LZMA2 decoder * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h index 071d67bee9f5..92d852d4f87a 100644 --- a/lib/xz/xz_lzma2.h +++ b/lib/xz/xz_lzma2.h @@ -2,7 +2,7 @@ * LZMA2 definitions * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h index 66cb5a7055ec..430bb3a0d195 100644 --- a/lib/xz/xz_stream.h +++ b/lib/xz/xz_stream.h @@ -19,7 +19,7 @@ /* * See the .xz file format specification at - * http://tukaani.org/xz/xz-file-format.txt + * https://tukaani.org/xz/xz-file-format.txt * to understand the container format. */ -- cgit From b642e44e8ab335868b549fe5753b783ca47bf3a3 Mon Sep 17 00:00:00 2001 From: Kars Mulder Date: Tue, 11 Aug 2020 18:34:53 -0700 Subject: kstrto*: correct documentation references to simple_strto*() The documentation of the kstrto*() functions reference the simple_strtoull function by "used as a replacement for [the obsolete] simple_strtoull". All these functions describes themselves as replacements for the function simple_strtoull, even though a function like kstrtol() would be more aptly described as a replacement of simple_strtol(). Fix these references by making the documentation of kstrto*() reference the closest simple_strto*() equivalent available. The functions kstrto[u]int() do not have direct simple_strto[u]int() equivalences, so these are made to refer to simple_strto[u]l() instead. Furthermore, add parentheses after function names, as is standard in kernel documentation. Fixes: 4c925d6031f71 ("kstrto*: add documentation") Signed-off-by: Kars Mulder Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Eldad Zack Cc: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Link: http://lkml.kernel.org/r/1ee1-5f234c00-f3-165a6440@234394593 Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- lib/kstrtox.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e19c13616666..47b1dad63ffc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -346,7 +346,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Used as a replacement for the simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -374,7 +374,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Used as a replacement for the simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 1006bf70bf74..252ac414ba9a 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -115,7 +115,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must + * Used as a replacement for the obsolete simple_strtoull(). Return code must * be checked. */ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) @@ -139,7 +139,7 @@ EXPORT_SYMBOL(kstrtoull); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must + * Used as a replacement for the obsolete simple_strtoll(). Return code must * be checked. */ int kstrtoll(const char *s, unsigned int base, long long *res) @@ -211,7 +211,7 @@ EXPORT_SYMBOL(_kstrtol); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must + * Used as a replacement for the obsolete simple_strtoul(). Return code must * be checked. */ int kstrtouint(const char *s, unsigned int base, unsigned int *res) @@ -242,7 +242,7 @@ EXPORT_SYMBOL(kstrtouint); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must + * Used as a replacement for the obsolete simple_strtol(). Return code must * be checked. */ int kstrtoint(const char *s, unsigned int base, int *res) -- cgit From ef0f2685336bbc334e8b6997ce9b155e5f7edd31 Mon Sep 17 00:00:00 2001 From: Kars Mulder Date: Tue, 11 Aug 2020 18:34:56 -0700 Subject: kstrto*: do not describe simple_strto*() as obsolete/replaced The documentation of the kstrto*() functions describes kstrto*() as "replacements" of the "obsolete" simple_strto*() functions. Both of these terms are inaccurate: they're not replacements because they have different behaviour, and the simple_strto*() are not obsolete because there are cases where they have benefits over kstrto*(). Remove usage of the terms "replacement" and "obsolete" in reference to simple_strto*(), and instead use the term "preferred over". Fixes: 4c925d6031f71 ("kstrto*: add documentation") Fixes: 885e68e8b7b13 ("kernel.h: update comment about simple_strto() functions") Signed-off-by: Kars Mulder Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Eldad Zack Cc: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Link: http://lkml.kernel.org/r/29b9-5f234c80-13-4e3aa200@244003027 Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- lib/kstrtox.c | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 47b1dad63ffc..92517ae53e7c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -346,7 +346,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoul(). Return code must be checked. + * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -374,7 +374,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtol(). Return code must be checked. + * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 252ac414ba9a..a14ccf905055 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -115,8 +115,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull(). Return code must - * be checked. + * Preferred over simple_strtoull(). Return code must be checked. */ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) { @@ -139,8 +138,7 @@ EXPORT_SYMBOL(kstrtoull); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoll(). Return code must - * be checked. + * Preferred over simple_strtoll(). Return code must be checked. */ int kstrtoll(const char *s, unsigned int base, long long *res) { @@ -211,8 +209,7 @@ EXPORT_SYMBOL(_kstrtol); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoul(). Return code must - * be checked. + * Preferred over simple_strtoul(). Return code must be checked. */ int kstrtouint(const char *s, unsigned int base, unsigned int *res) { @@ -242,8 +239,7 @@ EXPORT_SYMBOL(kstrtouint); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtol(). Return code must - * be checked. + * Preferred over simple_strtol(). Return code must be checked. */ int kstrtoint(const char *s, unsigned int base, int *res) { -- cgit From 6d511020e13d5d6f5f0af853e32ddef75c693ccc Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Tue, 11 Aug 2020 18:35:03 -0700 Subject: lib/test_bits.c: add tests of GENMASK Add tests of GENMASK and GENMASK_ULL. A few test cases that should fail compilation are provided under #ifdef TEST_GENMASK_FAILURES [rd.dunlap@gmail.com: add MODULE_LICENSE()] Link: http://lkml.kernel.org/r/dfc74524-0789-2827-4eff-476ddab65699@gmail.com [weiyongjun1@huawei.com: make some functions static] Link: http://lkml.kernel.org/r/20200702150336.4756-1-weiyongjun1@huawei.com Suggested-by: Andy Shevchenko Signed-off-by: Rikard Falkeborn Signed-off-by: Randy Dunlap Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Acked-by: William Breathitt Gray Cc: Emil Velikov Cc: Syed Nayyar Waris Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Geert Uytterhoeven Cc: Kees Cook Cc: Linus Walleij Cc: Masahiro Yamada Link: http://lkml.kernel.org/r/20200621054210.14804-2-rikard.falkeborn@gmail.com Link: http://lkml.kernel.org/r/20200608221823.35799-2-rikard.falkeborn@gmail.com Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 11 ++++++++ lib/Makefile | 1 + lib/test_bits.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 lib/test_bits.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5c5fbcb34af4..9ca1b11af1f6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2236,6 +2236,17 @@ config LINEAR_RANGES_TEST If unsure, say N. +config BITS_TEST + tristate "KUnit test for bits.h" + depends on KUNIT + help + This builds the bits unit test. + Tests the logic of macros defined in bits.h. + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 9d1fd82ea145..e290fc5707ea 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -342,3 +342,4 @@ obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o +obj-$(CONFIG_BITS_TEST) += test_bits.o diff --git a/lib/test_bits.c b/lib/test_bits.c new file mode 100644 index 000000000000..c9368a2314e7 --- /dev/null +++ b/lib/test_bits.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for functions and macros in bits.h + */ + +#include +#include + + +static void genmask_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ul, GENMASK(0, 0)); + KUNIT_EXPECT_EQ(test, 3ul, GENMASK(1, 0)); + KUNIT_EXPECT_EQ(test, 6ul, GENMASK(2, 1)); + KUNIT_EXPECT_EQ(test, 0xFFFFFFFFul, GENMASK(31, 0)); + +#ifdef TEST_GENMASK_FAILURES + /* these should fail compilation */ + GENMASK(0, 1); + GENMASK(0, 10); + GENMASK(9, 10); +#endif + + +} + +static void genmask_ull_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ull, GENMASK_ULL(0, 0)); + KUNIT_EXPECT_EQ(test, 3ull, GENMASK_ULL(1, 0)); + KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_ULL(39, 21)); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_ULL(63, 0)); + +#ifdef TEST_GENMASK_FAILURES + /* these should fail compilation */ + GENMASK_ULL(0, 1); + GENMASK_ULL(0, 10); + GENMASK_ULL(9, 10); +#endif +} + +static void genmask_input_check_test(struct kunit *test) +{ + unsigned int x, y; + int z, w; + + /* Unknown input */ + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, 0)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, x)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, y)); + + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, 0)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, z)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, w)); + + /* Valid input */ + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21)); +} + + +static struct kunit_case bits_test_cases[] = { + KUNIT_CASE(genmask_test), + KUNIT_CASE(genmask_ull_test), + KUNIT_CASE(genmask_input_check_test), + {} +}; + +static struct kunit_suite bits_test_suite = { + .name = "bits-test", + .test_cases = bits_test_cases, +}; +kunit_test_suite(bits_test_suite); + +MODULE_LICENSE("GPL"); -- cgit From 50161266973bcc662e969e63d68fc7bff71de21b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:07 -0700 Subject: checkpatch: add test for possible misuse of IS_ENABLED() without CONFIG_ IS_ENABLED is almost always used with CONFIG_ defines. Add a test to verify that the #define being tested starts with CONFIG_. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Link: http://lkml.kernel.org/r/e7fda760b91b769ba82844ba282d432c0d26d709.camel@perches.com Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 599b8c4933a7..ba9ab2b42d73 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6465,6 +6465,12 @@ sub process { } } +# check for IS_ENABLED() without CONFIG_ ($rawline for comments too) + if ($rawline =~ /\bIS_ENABLED\s*\(\s*(\w+)\s*\)/ && $1 !~ /^CONFIG_/) { + WARN("IS_ENABLED_CONFIG", + "IS_ENABLED($1) is normally used as IS_ENABLED(CONFIG_$1)\n" . $herecurr); + } + # check for #if defined CONFIG_ || defined CONFIG__MODULE if ($line =~ /^\+\s*#\s*if\s+defined(?:\s*\(?\s*|\s+)(CONFIG_[A-Z_]+)\s*\)?\s*\|\|\s*defined(?:\s*\(?\s*|\s+)\1_MODULE\s*\)?\s*$/) { my $config = $1; -- cgit From 65b64b3bec3fa74937fdc18e5ccf10a48ee3a7d3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:10 -0700 Subject: checkpatch: add --fix option for ASSIGN_IN_IF Add a --fix option for 2 types of single-line assignment in if statements if ((foo = bar(...)) < BAZ) { expands to: foo = bar(..); if (foo < BAZ) { and if ((foo = bar(...)) { expands to: foo = bar(...); if (foo) { if statements with assignments spanning multiple lines are not converted with the --fix option. if statements with additional logic are also not converted. e.g.: if ((foo = bar(...)) & BAZ == BAZ) { Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Cc: Julia Lawall Link: http://lkml.kernel.org/r/9bc7c782516f37948f202deba511bc95ed279bbd.camel@perches.com Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index ba9ab2b42d73..5cb791eb4c6f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5020,8 +5020,30 @@ sub process { my ($s, $c) = ($stat, $cond); if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { - ERROR("ASSIGN_IN_IF", - "do not use assignment in if condition\n" . $herecurr); + if (ERROR("ASSIGN_IN_IF", + "do not use assignment in if condition\n" . $herecurr) && + $fix && $perl_version_ok) { + if ($rawline =~ /^\+(\s+)if\s*\(\s*(\!)?\s*\(\s*(($Lval)\s*=\s*$LvalOrFunc)\s*\)\s*(?:($Compare)\s*($FuncArg))?\s*\)\s*(\{)?\s*$/) { + my $space = $1; + my $not = $2; + my $statement = $3; + my $assigned = $4; + my $test = $8; + my $against = $9; + my $brace = $15; + fix_delete_line($fixlinenr, $rawline); + fix_insert_line($fixlinenr, "$space$statement;"); + my $newline = "${space}if ("; + $newline .= '!' if defined($not); + $newline .= '(' if (defined $not && defined($test) && defined($against)); + $newline .= "$assigned"; + $newline .= " $test $against" if (defined($test) && defined($against)); + $newline .= ')' if (defined $not && defined($test) && defined($against)); + $newline .= ')'; + $newline .= " {" if (defined($brace)); + fix_insert_line($fixlinenr + 1, $newline); + } + } } # Find out what is on the end of the line after the -- cgit From ced69da1db0b57bbb10054bd74201babc094f408 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 11 Aug 2020 18:35:13 -0700 Subject: checkpatch: fix CONST_STRUCT when const_structs.checkpatch is missing Checkpatch reports warnings when some specific structs are not declared as const in the code. The list of structs to consider was initially defined in the checkpatch.pl script itself, but it was later moved to an external file (scripts/const_structs.checkpatch), in commit bf1fa1dae68e ("checkpatch: externalize the structs that should be const"). This introduced two minor issues: - When file scripts/const_structs.checkpatch is not present (for example, if checkpatch is run outside of the kernel directory with the "--no-tree" option), a warning is printed to stderr to tell the user that "No structs that should be const will be found". This is fair, but the warning is printed unconditionally, even if the option "--ignore CONST_STRUCT" is passed. In the latter case, we explicitly ask checkpatch to skip this check, so no warning should be printed. - When scripts/const_structs.checkpatch is missing, or even when trying to silence the warning by adding an empty file, $const_structs is set to "", and the regex used for finding structs that should be const, "$line =~ /struct\s+($const_structs)(?!\s*\{)/)", matches all structs found in the code, thus reporting a number of false positives. Let's fix the first item by skipping scripts/const_structs.checkpatch processing if "CONST_STRUCT" checks are ignored, and the second one by skipping the test if $const_structs is not defined. Since we modify the read_words() function a little bit, update the checks for $typedefsfile/$typeOtherTypedefs as well. Signed-off-by: Quentin Monnet Signed-off-by: Andrew Morton Acked-by: Joe Perches Link: http://lkml.kernel.org/r/20200623221822.3727-1-quentin@isovalent.com Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5cb791eb4c6f..1abddf8fe56e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -59,7 +59,7 @@ my $spelling_file = "$D/spelling.txt"; my $codespell = 0; my $codespellfile = "/usr/share/codespell/dictionary.txt"; my $conststructsfile = "$D/const_structs.checkpatch"; -my $typedefsfile = ""; +my $typedefsfile; my $color = "auto"; my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANCE # git output parsing needs US English output, so first set backtick child process LANGUAGE @@ -756,7 +756,7 @@ sub read_words { next; } - $$wordsRef .= '|' if ($$wordsRef ne ""); + $$wordsRef .= '|' if (defined $$wordsRef); $$wordsRef .= $line; } close($file); @@ -766,16 +766,18 @@ sub read_words { return 0; } -my $const_structs = ""; -read_words(\$const_structs, $conststructsfile) - or warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; +my $const_structs; +if (show_type("CONST_STRUCT")) { + read_words(\$const_structs, $conststructsfile) + or warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; +} -my $typeOtherTypedefs = ""; -if (length($typedefsfile)) { +if (defined($typedefsfile)) { + my $typeOtherTypedefs; read_words(\$typeOtherTypedefs, $typedefsfile) or warn "No additional types will be considered - file '$typedefsfile': $!\n"; + $typeTypedefs .= '|' . $typeOtherTypedefs if (defined $typeOtherTypedefs); } -$typeTypedefs .= '|' . $typeOtherTypedefs if ($typeOtherTypedefs ne ""); sub build_types { my $mods = "(?x: \n" . join("|\n ", (@modifierList, @modifierListFile)) . "\n)"; @@ -6643,7 +6645,8 @@ sub process { # check for various structs that are normally const (ops, kgdb, device_tree) # and avoid what seem like struct definitions 'struct foo {' - if ($line !~ /\bconst\b/ && + if (defined($const_structs) && + $line !~ /\bconst\b/ && $line =~ /\bstruct\s+($const_structs)\b(?!\s*\{)/) { WARN("CONST_STRUCT", "struct $1 should normally be const\n" . $herecurr); -- cgit From 1a3dcf2e6b35faa1176b9cd8200094fbce16ba19 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:16 -0700 Subject: checkpatch: add test for repeated words Try to avoid adding repeated words either on the same line or consecutive comment lines in a block e.g.: duplicated word in comment block /* * this is a comment block where the last word of the previous * previous line is also the first word of the next line */ and simple duplication /* test this this again */ Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/cda9b566ad67976e1acd62b053de50ee44a57250.camel@perches.com Inspired-by: Randy Dunlap Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1abddf8fe56e..114af54c3194 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -588,6 +588,8 @@ our @mode_permission_funcs = ( ["__ATTR", 2], ); +my $word_pattern = '\b[A-Z]?[a-z]{2,}\b'; + #Create a search pattern for all these functions to speed up a loop below our $mode_perms_search = ""; foreach my $entry (@mode_permission_funcs) { @@ -3312,6 +3314,42 @@ sub process { } } +# check for repeated words separated by a single space + if ($rawline =~ /^\+/) { + while ($rawline =~ /\b($word_pattern) (?=($word_pattern))/g) { + + my $first = $1; + my $second = $2; + + if ($first =~ /(?:struct|union|enum)/) { + pos($rawline) += length($first) + length($second) + 1; + next; + } + + next if ($first ne $second); + next if ($first eq 'long'); + + if (WARN("REPEATED_WORD", + "Possible repeated word: '$first'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b$first $second\b/$first/; + } + } + + # if it's a repeated word on consecutive lines in a comment block + if ($prevline =~ /$;+\s*$/ && + $prevrawline =~ /($word_pattern)\s*$/) { + my $last_word = $1; + if ($rawline =~ /^\+\s*\*\s*$last_word /) { + if (WARN("REPEATED_WORD", + "Possible repeated word: '$last_word'\n" . $hereprev) && + $fix) { + $fixed[$fixlinenr] =~ s/(\+\s*\*\s*)$last_word /$1/; + } + } + } + } + # check for space before tabs. if ($rawline =~ /^\+/ && $rawline =~ / \t/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; -- cgit From ef3c005c0eb07a60949191bc6ee407d5f43cc502 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:19 -0700 Subject: checkpatch: remove missing switch/case break test This test doesn't work well and newer compilers are much better at emitting this warning. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Cc: Cambda Zhu Link: http://lkml.kernel.org/r/7e25090c79f6a69d502ab8219863300790192fe2.camel@perches.com Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 114af54c3194..2cbeae6d9aee 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6543,31 +6543,6 @@ sub process { } } -# check for case / default statements not preceded by break/fallthrough/switch - if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { - my $has_break = 0; - my $has_statement = 0; - my $count = 0; - my $prevline = $linenr; - while ($prevline > 1 && ($file || $count < 3) && !$has_break) { - $prevline--; - my $rline = $rawlines[$prevline - 1]; - my $fline = $lines[$prevline - 1]; - last if ($fline =~ /^\@\@/); - next if ($fline =~ /^\-/); - next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/); - $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i); - next if ($fline =~ /^.[\s$;]*$/); - $has_statement = 1; - $count++; - $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|exit\s*\(\b|return\b|goto\b|continue\b)/); - } - if (!$has_break && $has_statement) { - WARN("MISSING_BREAK", - "Possible switch case/default not preceded by break or fallthrough comment\n" . $herecurr); - } - } - # check for /* fallthrough */ like comment, prefer fallthrough; my @fallthroughs = ( 'fallthrough', -- cgit From 2fb3244f0a58ceb3d866ac63f644dfa31cae430f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:35:21 -0700 Subject: autofs: fix doubled word Change doubled word "is" to "it is". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Ian Kent Link: http://lkml.kernel.org/r/5a82befd-40f8-8dc0-3498-cbc0436cad9b@infradead.org Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_dev-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 374742651c30..62e625356dc8 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -82,7 +82,7 @@ struct args_ismountpoint { /* * All the ioctls use this structure. * When sending a path size must account for the total length - * of the chunk of memory otherwise is is the size of the + * of the chunk of memory otherwise it is the size of the * structure. */ -- cgit From da27e0a0e5f655f0d58d4e153c3182bb2b290f64 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:24 -0700 Subject: fs/minix: check return value of sb_getblk() Patch series "fs/minix: fix syzbot bugs and set s_maxbytes". This series fixes all syzbot bugs in the minix filesystem: KASAN: null-ptr-deref Write in get_block KASAN: use-after-free Write in get_block KASAN: use-after-free Read in get_block WARNING in inc_nlink KMSAN: uninit-value in get_block WARNING in drop_nlink It also fixes the minix filesystem to set s_maxbytes correctly, so that userspace sees the correct behavior when exceeding the max file size. This patch (of 6): sb_getblk() can fail, so check its return value. This fixes a NULL pointer dereference. Originally from Qiujun Huang. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+4a88b2b9dc280f47baf4@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Qiujun Huang Cc: Alexander Viro Cc: Link: http://lkml.kernel.org/r/20200628060846.682158-1-ebiggers@kernel.org Link: http://lkml.kernel.org/r/20200628060846.682158-2-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/itree_common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c index 043c3fdbc8e7..446148792f41 100644 --- a/fs/minix/itree_common.c +++ b/fs/minix/itree_common.c @@ -75,6 +75,7 @@ static int alloc_branch(struct inode *inode, int n = 0; int i; int parent = minix_new_block(inode); + int err = -ENOSPC; branch[0].key = cpu_to_block(parent); if (parent) for (n = 1; n < num; n++) { @@ -85,6 +86,11 @@ static int alloc_branch(struct inode *inode, break; branch[n].key = cpu_to_block(nr); bh = sb_getblk(inode->i_sb, parent); + if (!bh) { + minix_free_block(inode, nr); + err = -ENOMEM; + break; + } lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); branch[n].bh = bh; @@ -103,7 +109,7 @@ static int alloc_branch(struct inode *inode, bforget(branch[i].bh); for (i = 0; i < n; i++) minix_free_block(inode, block_to_cpu(branch[i].key)); - return -ENOSPC; + return err; } static inline int splice_branch(struct inode *inode, -- cgit From facb03dddec04e4aac1bb2139accdceb04deb1f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:27 -0700 Subject: fs/minix: don't allow getting deleted inodes If an inode has no links, we need to mark it bad rather than allowing it to be accessed. This avoids WARNINGs in inc_nlink() and drop_nlink() when doing directory operations on a fuzzed filesystem. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+a9ac3de1b5de5fb10efc@syzkaller.appspotmail.com Reported-by: syzbot+df958cf5688a96ad3287@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Cc: Link: http://lkml.kernel.org/r/20200628060846.682158-3-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7cb5fd38eb14..2bca95abe8f4 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -468,6 +468,13 @@ static struct inode *V1_minix_iget(struct inode *inode) iget_failed(inode); return ERR_PTR(-EIO); } + if (raw_inode->i_nlinks == 0) { + printk("MINIX-fs: deleted inode referenced: %lu\n", + inode->i_ino); + brelse(bh); + iget_failed(inode); + return ERR_PTR(-ESTALE); + } inode->i_mode = raw_inode->i_mode; i_uid_write(inode, raw_inode->i_uid); i_gid_write(inode, raw_inode->i_gid); @@ -501,6 +508,13 @@ static struct inode *V2_minix_iget(struct inode *inode) iget_failed(inode); return ERR_PTR(-EIO); } + if (raw_inode->i_nlinks == 0) { + printk("MINIX-fs: deleted inode referenced: %lu\n", + inode->i_ino); + brelse(bh); + iget_failed(inode); + return ERR_PTR(-ESTALE); + } inode->i_mode = raw_inode->i_mode; i_uid_write(inode, raw_inode->i_uid); i_gid_write(inode, raw_inode->i_gid); -- cgit From 270ef41094e9fa95273f288d7d785313ceab2ff3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:30 -0700 Subject: fs/minix: reject too-large maximum file size If the minix filesystem tries to map a very large logical block number to its on-disk location, block_to_path() can return offsets that are too large, causing out-of-bounds memory accesses when accessing indirect index blocks. This should be prevented by the check against the maximum file size, but this doesn't work because the maximum file size is read directly from the on-disk superblock and isn't validated itself. Fix this by validating the maximum file size at mount time. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+c7d9ec7a1a7272dd71b3@syzkaller.appspotmail.com Reported-by: syzbot+3b7b03a0c28948054fb5@syzkaller.appspotmail.com Reported-by: syzbot+6e056ee473568865f3e6@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Cc: Link: http://lkml.kernel.org/r/20200628060846.682158-4-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2bca95abe8f4..0dd929346f3f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -150,6 +150,23 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) return 0; } +static bool minix_check_superblock(struct minix_sb_info *sbi) +{ + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) + return false; + + /* + * s_max_size must not exceed the block mapping limitation. This check + * is only needed for V1 filesystems, since V2/V3 support an extra level + * of indirect blocks which places the limit well above U32_MAX. + */ + if (sbi->s_version == MINIX_V1 && + sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE) + return false; + + return true; +} + static int minix_fill_super(struct super_block *s, void *data, int silent) { struct buffer_head *bh; @@ -228,11 +245,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) } else goto out_no_fs; + if (!minix_check_superblock(sbi)) + goto out_illegal_sb; + /* * Allocate the buffer map to keep the superblock small. */ - if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) - goto out_illegal_sb; i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); map = kzalloc(i, GFP_KERNEL); if (!map) -- cgit From 32ac86efff91a3e4ef8c3d1cadd4559e23c8e73a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:33 -0700 Subject: fs/minix: set s_maxbytes correctly The minix filesystem leaves super_block::s_maxbytes at MAX_NON_LFS rather than setting it to the actual filesystem-specific limit. This is broken because it means userspace doesn't see the standard behavior like getting EFBIG and SIGXFSZ when exceeding the maximum file size. Fix this by setting s_maxbytes correctly. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 12 +++++++----- fs/minix/itree_v1.c | 2 +- fs/minix/itree_v2.c | 3 +-- fs/minix/minix.h | 1 - 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 0dd929346f3f..7b09a9158e40 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -150,8 +150,10 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) return 0; } -static bool minix_check_superblock(struct minix_sb_info *sbi) +static bool minix_check_superblock(struct super_block *sb) { + struct minix_sb_info *sbi = minix_sb(sb); + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) return false; @@ -161,7 +163,7 @@ static bool minix_check_superblock(struct minix_sb_info *sbi) * of indirect blocks which places the limit well above U32_MAX. */ if (sbi->s_version == MINIX_V1 && - sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE) + sb->s_maxbytes > (7 + 512 + 512*512) * BLOCK_SIZE) return false; return true; @@ -202,7 +204,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = ms->s_zmap_blocks; sbi->s_firstdatazone = ms->s_firstdatazone; sbi->s_log_zone_size = ms->s_log_zone_size; - sbi->s_max_size = ms->s_max_size; + s->s_maxbytes = ms->s_max_size; s->s_magic = ms->s_magic; if (s->s_magic == MINIX_SUPER_MAGIC) { sbi->s_version = MINIX_V1; @@ -233,7 +235,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = m3s->s_zmap_blocks; sbi->s_firstdatazone = m3s->s_firstdatazone; sbi->s_log_zone_size = m3s->s_log_zone_size; - sbi->s_max_size = m3s->s_max_size; + s->s_maxbytes = m3s->s_max_size; sbi->s_ninodes = m3s->s_ninodes; sbi->s_nzones = m3s->s_zones; sbi->s_dirsize = 64; @@ -245,7 +247,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) } else goto out_no_fs; - if (!minix_check_superblock(sbi)) + if (!minix_check_superblock(s)) goto out_illegal_sb; /* diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 046cc96ee7ad..c0d418209ead 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,7 +29,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) { + } else if (block >= inode->i_sb->s_maxbytes/BLOCK_SIZE) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index f7fc7ecccccc..ee8af2f9e282 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,8 +32,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, sb->s_bdev); - } else if ((u64)block * (u64)sb->s_blocksize >= - minix_sb(sb)->s_max_size) { + } else if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", diff --git a/fs/minix/minix.h b/fs/minix/minix.h index df081e8afcc3..168d45d3de73 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -32,7 +32,6 @@ struct minix_sb_info { unsigned long s_zmap_blocks; unsigned long s_firstdatazone; unsigned long s_log_zone_size; - unsigned long s_max_size; int s_dirsize; int s_namelen; struct buffer_head ** s_imap; -- cgit From 0a12c4a8069607247cb8edc3b035a664e636fd9a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:36 -0700 Subject: fs/minix: fix block limit check for V1 filesystems The minix filesystem reads its maximum file size from its on-disk superblock. This value isn't necessarily a multiple of the block size. When it's not, the V1 block mapping code doesn't allow mapping the last possible block. Commit 6ed6a722f9ab ("minixfs: fix block limit check") fixed this in the V2 mapping code. Fix it in the V1 mapping code too. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-6-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/itree_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index c0d418209ead..405573a79aab 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,7 +29,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if (block >= inode->i_sb->s_maxbytes/BLOCK_SIZE) { + } else if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", -- cgit From f666f9fb9a36f1c833b9d18923572f0e4d304754 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:39 -0700 Subject: fs/minix: remove expected error message in block_to_path() When truncating a file to a size within the last allowed logical block, block_to_path() is called with the *next* block. This exceeds the limit, causing the "block %ld too big" error message to be printed. This case isn't actually an error; there are just no more blocks past that point. So, remove this error message. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-7-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/minix/itree_v1.c | 12 ++++++------ fs/minix/itree_v2.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 405573a79aab..1fed906042aa 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,12 +29,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, inode->i_sb->s_bdev); - } else if (block < 7) { + return 0; + } + if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) + return 0; + + if (block < 7) { offsets[n++] = block; } else if ((block -= 7) < 512) { offsets[n++] = 7; diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index ee8af2f9e282..9d00f31a2d9d 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,12 +32,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, sb->s_bdev); - } else if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, sb->s_bdev); - } else if (block < DIRCOUNT) { + return 0; + } + if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) + return 0; + + if (block < DIRCOUNT) { offsets[n++] = block; } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) { offsets[n++] = DIRCOUNT; -- cgit From 1b0e31861d98920c3823c7519ae762499811df00 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:43 -0700 Subject: nilfs2: only call unlock_new_inode() if I_NEW Patch series "nilfs2 updates". This patch (of 3): unlock_new_inode() is only meant to be called after a new inode has already been inserted into the hash table. But nilfs_new_inode() can call it even before it has inserted the inode, triggering the WARNING in unlock_new_inode(). Fix this by only calling unlock_new_inode() if the inode has the I_NEW flag set, indicating that it's in the table. Signed-off-by: Eric Biggers Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/1595860111-3920-1-git-send-email-konishi.ryusuke@gmail.com Link: http://lkml.kernel.org/r/1595860111-3920-2-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Linus Torvalds --- fs/nilfs2/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 28009ec54420..3318dd1350b2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -388,7 +388,8 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) failed_after_creation: clear_nlink(inode); - unlock_new_inode(inode); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); iput(inode); /* * raw_inode will be deleted through * nilfs_evict_inode(). -- cgit From 2987a4cfc8332b4b516f3e03d29ec0316221e45f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:46 -0700 Subject: nilfs2: convert __nilfs_msg to integrate the level and format Reduce object size a bit by removing the KERN_ as a separate argument and adding it to the format string. Reduce overall object size by about ~.5% (x86-64 defconfig w/ nilfs2) old: $ size -t fs/nilfs2/built-in.a | tail -1 191738 8676 44 200458 30f0a (TOTALS) new: $ size -t fs/nilfs2/built-in.a | tail -1 190971 8676 44 199691 30c0b (TOTALS) Signed-off-by: Joe Perches Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/1595860111-3920-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Linus Torvalds --- fs/nilfs2/nilfs.h | 9 ++++----- fs/nilfs2/super.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 42395ba52da6..979a41016743 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -289,9 +289,8 @@ static inline int nilfs_mark_inode_dirty_sync(struct inode *inode) /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); -extern __printf(3, 4) -void __nilfs_msg(struct super_block *sb, const char *level, - const char *fmt, ...); +__printf(2, 3) +void __nilfs_msg(struct super_block *sb, const char *fmt, ...); extern __printf(3, 4) void __nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...); @@ -299,7 +298,7 @@ void __nilfs_error(struct super_block *sb, const char *function, #ifdef CONFIG_PRINTK #define nilfs_msg(sb, level, fmt, ...) \ - __nilfs_msg(sb, level, fmt, ##__VA_ARGS__) + __nilfs_msg(sb, level fmt, ##__VA_ARGS__) #define nilfs_error(sb, fmt, ...) \ __nilfs_error(sb, __func__, fmt, ##__VA_ARGS__) @@ -307,7 +306,7 @@ void __nilfs_error(struct super_block *sb, const char *function, #define nilfs_msg(sb, level, fmt, ...) \ do { \ - no_printk(fmt, ##__VA_ARGS__); \ + no_printk(level fmt, ##__VA_ARGS__); \ (void)(sb); \ } while (0) #define nilfs_error(sb, fmt, ...) \ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 5729ee86da9a..fef4821a1f0f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -62,19 +62,25 @@ struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -void __nilfs_msg(struct super_block *sb, const char *level, const char *fmt, - ...) +void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; + if (sb) - printk("%sNILFS (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cNILFS (%s): %pV\n", + KERN_SOH_ASCII, level, sb->s_id, &vaf); else - printk("%sNILFS: %pV\n", level, &vaf); + printk("%c%cNILFS: %pV\n", + KERN_SOH_ASCII, level, &vaf); + va_end(args); } -- cgit From a1d0747a393a079631130d61faa2a61027d1c789 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Aug 2020 18:35:49 -0700 Subject: nilfs2: use a more common logging style Add macros for nilfs_(sb, fmt, ...) and convert the uses of 'nilfs_msg(sb, KERN_, ...)' to 'nilfs_(sb, ...)' so nilfs2 uses a logging style more like the typical kernel logging style. Miscellanea: o Realign arguments for these uses Signed-off-by: Joe Perches Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/1595860111-3920-4-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 38 +++++++++++------------ fs/nilfs2/btree.c | 42 ++++++++++++------------- fs/nilfs2/cpfile.c | 10 +++--- fs/nilfs2/dat.c | 14 ++++----- fs/nilfs2/direct.c | 14 +++++---- fs/nilfs2/gcinode.c | 2 +- fs/nilfs2/ifile.c | 4 +-- fs/nilfs2/inode.c | 29 +++++++++--------- fs/nilfs2/ioctl.c | 37 +++++++++++----------- fs/nilfs2/mdt.c | 2 +- fs/nilfs2/namei.c | 6 ++-- fs/nilfs2/nilfs.h | 9 ++++++ fs/nilfs2/page.c | 11 +++---- fs/nilfs2/recovery.c | 32 +++++++++---------- fs/nilfs2/segbuf.c | 2 +- fs/nilfs2/segment.c | 38 +++++++++++------------ fs/nilfs2/sufile.c | 29 ++++++++---------- fs/nilfs2/super.c | 57 ++++++++++++++++------------------ fs/nilfs2/sysfs.c | 29 ++++++++---------- fs/nilfs2/the_nilfs.c | 85 ++++++++++++++++++++++++--------------------------- 20 files changed, 239 insertions(+), 251 deletions(-) diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 235b959fc2b3..adf3bb0a8048 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -613,10 +613,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)req->pr_entry_nr); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else nilfs_palloc_group_desc_add_entries(desc, lock, 1); @@ -654,10 +654,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)req->pr_entry_nr); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else nilfs_palloc_group_desc_add_entries(desc, lock, 1); @@ -763,10 +763,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) do { if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)entry_nrs[j]); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)entry_nrs[j]); } else { n++; } @@ -808,10 +808,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) ret = nilfs_palloc_delete_entry_block(inode, last_nrs[k]); if (ret && ret != -ENOENT) - nilfs_msg(inode->i_sb, KERN_WARNING, - "error %d deleting block that object (entry=%llu, ino=%lu) belongs to", - ret, (unsigned long long)last_nrs[k], - inode->i_ino); + nilfs_warn(inode->i_sb, + "error %d deleting block that object (entry=%llu, ino=%lu) belongs to", + ret, (unsigned long long)last_nrs[k], + inode->i_ino); } desc_kaddr = kmap_atomic(desc_bh->b_page); @@ -826,9 +826,9 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) if (nfree == nilfs_palloc_entries_per_group(inode)) { ret = nilfs_palloc_delete_bitmap_block(inode, group); if (ret && ret != -ENOENT) - nilfs_msg(inode->i_sb, KERN_WARNING, - "error %d deleting bitmap block of group=%lu, ino=%lu", - ret, group, inode->i_ino); + nilfs_warn(inode->i_sb, + "error %d deleting bitmap block of group=%lu, ino=%lu", + ret, group, inode->i_ino); } } return 0; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 23e043eca237..f42ab57201e7 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -351,10 +351,10 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, (flags & NILFS_BTREE_NODE_ROOT) || nchildren < 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", - inode->i_ino, (unsigned long long)blocknr, level, - flags, nchildren); + nilfs_crit(inode->i_sb, + "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, (unsigned long long)blocknr, level, + flags, nchildren); ret = 1; } return ret; @@ -381,9 +381,9 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", - inode->i_ino, level, flags, nchildren); + nilfs_crit(inode->i_sb, + "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, level, flags, nchildren); ret = 1; } return ret; @@ -450,10 +450,10 @@ static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); - nilfs_msg(btree->b_inode->i_sb, KERN_CRIT, - "btree level mismatch (ino=%lu): %d != %d", - btree->b_inode->i_ino, - nilfs_btree_node_get_level(node), level); + nilfs_crit(btree->b_inode->i_sb, + "btree level mismatch (ino=%lu): %d != %d", + btree->b_inode->i_ino, + nilfs_btree_node_get_level(node), level); return 1; } return 0; @@ -508,7 +508,7 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, out_no_wait: if (!buffer_uptodate(bh)) { - nilfs_msg(btree->b_inode->i_sb, KERN_ERR, + nilfs_err(btree->b_inode->i_sb, "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); @@ -2074,10 +2074,10 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) - nilfs_msg(btree->b_inode->i_sb, KERN_CRIT, - "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", - btree->b_inode->i_ino, - (unsigned long long)key, level); + nilfs_crit(btree->b_inode->i_sb, + "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", + btree->b_inode->i_ino, + (unsigned long long)key, level); goto out; } @@ -2114,11 +2114,11 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); - nilfs_msg(btree->b_inode->i_sb, KERN_WARNING, - "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", - level, (unsigned long long)key, - btree->b_inode->i_ino, - (unsigned long long)bh->b_blocknr); + nilfs_warn(btree->b_inode->i_sb, + "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", + level, (unsigned long long)key, + btree->b_inode->i_ino, + (unsigned long long)bh->b_blocknr); return; } diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 8d41311b5db4..86d4d850d130 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -322,7 +322,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { - nilfs_msg(cpfile->i_sb, KERN_ERR, + nilfs_err(cpfile->i_sb, "cannot delete checkpoints: invalid range [%llu, %llu)", (unsigned long long)start, (unsigned long long)end); return -EINVAL; @@ -376,7 +376,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno); if (ret == 0) continue; - nilfs_msg(cpfile->i_sb, KERN_ERR, + nilfs_err(cpfile->i_sb, "error %d deleting checkpoint block", ret); break; @@ -981,12 +981,10 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, int err; if (cpsize > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, - "too large checkpoint size: %zu bytes", cpsize); + nilfs_err(sb, "too large checkpoint size: %zu bytes", cpsize); return -EINVAL; } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { - nilfs_msg(sb, KERN_ERR, - "too small checkpoint size: %zu bytes", cpsize); + nilfs_err(sb, "too small checkpoint size: %zu bytes", cpsize); return -EINVAL; } diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 6f4066636be9..8bccdf1158fc 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -340,11 +340,11 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { - nilfs_msg(dat->i_sb, KERN_CRIT, - "%s: invalid vblocknr = %llu, [%llu, %llu)", - __func__, (unsigned long long)vblocknr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end)); + nilfs_crit(dat->i_sb, + "%s: invalid vblocknr = %llu, [%llu, %llu)", + __func__, (unsigned long long)vblocknr, + (unsigned long long)le64_to_cpu(entry->de_start), + (unsigned long long)le64_to_cpu(entry->de_end)); kunmap_atomic(kaddr); brelse(entry_bh); return -EINVAL; @@ -471,11 +471,11 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, int err; if (entry_size > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, "too large DAT entry size: %zu bytes", + nilfs_err(sb, "too large DAT entry size: %zu bytes", entry_size); return -EINVAL; } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { - nilfs_msg(sb, KERN_ERR, "too small DAT entry size: %zu bytes", + nilfs_err(sb, "too small DAT entry size: %zu bytes", entry_size); return -EINVAL; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 533e24ea3a88..f353101955e3 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -328,16 +328,18 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { - nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT, - "%s (ino=%lu): invalid key: %llu", __func__, - bmap->b_inode->i_ino, (unsigned long long)key); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid key: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)key); return -EINVAL; } ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { - nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT, - "%s (ino=%lu): invalid pointer: %llu", __func__, - bmap->b_inode->i_ino, (unsigned long long)ptr); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid pointer: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)ptr); return -EINVAL; } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index aa3c328ee189..448320496856 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -142,7 +142,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct inode *inode = bh->b_page->mapping->host; - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)", buffer_nilfs_node(bh) ? "node" : "data", inode->i_ino, (unsigned long long)bh->b_blocknr); diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 4140d232cadc..02727ed3a7c6 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -142,8 +142,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); if (unlikely(err)) - nilfs_msg(sb, KERN_WARNING, "error %d reading inode: ino=%lu", - err, (unsigned long)ino); + nilfs_warn(sb, "error %d reading inode: ino=%lu", + err, (unsigned long)ino); return err; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 3318dd1350b2..745d371d6fea 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -104,10 +104,10 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * However, the page having this block must * be locked in this case. */ - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", - __func__, inode->i_ino, - (unsigned long long)blkoff); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", + __func__, inode->i_ino, + (unsigned long long)blkoff); err = 0; } nilfs_transaction_abort(inode->i_sb); @@ -707,9 +707,8 @@ repeat: goto repeat; failed: - nilfs_msg(ii->vfs_inode.i_sb, KERN_WARNING, - "error %d truncating bmap (ino=%lu)", ret, - ii->vfs_inode.i_ino); + nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", + ret, ii->vfs_inode.i_ino); } void nilfs_truncate(struct inode *inode) @@ -920,9 +919,9 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) * This will happen when somebody is freeing * this inode. */ - nilfs_msg(inode->i_sb, KERN_WARNING, - "cannot set file dirty (ino=%lu): the file is being freed", - inode->i_ino); + nilfs_warn(inode->i_sb, + "cannot set file dirty (ino=%lu): the file is being freed", + inode->i_ino); spin_unlock(&nilfs->ns_inode_lock); return -EINVAL; /* * NILFS_I_DIRTY may remain for @@ -943,9 +942,9 @@ int __nilfs_mark_inode_dirty(struct inode *inode, int flags) err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "cannot mark inode dirty (ino=%lu): error %d loading inode block", - inode->i_ino, err); + nilfs_warn(inode->i_sb, + "cannot mark inode dirty (ino=%lu): error %d loading inode block", + inode->i_ino, err); return err; } nilfs_update_inode(inode, ibh, flags); @@ -971,8 +970,8 @@ void nilfs_dirty_inode(struct inode *inode, int flags) struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "tried to mark bad_inode dirty. ignored."); + nilfs_warn(inode->i_sb, + "tried to mark bad_inode dirty. ignored."); dump_stack(); return; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 4ba73dbf3e8d..07d26f61f22a 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -569,25 +569,25 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, if (unlikely(ret < 0)) { if (ret == -ENOENT) - nilfs_msg(inode->i_sb, KERN_CRIT, - "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); return ret; } if (unlikely(!list_empty(&bh->b_assoc_buffers))) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); brelse(bh); return -EEXIST; } @@ -837,8 +837,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, return 0; failed: - nilfs_msg(nilfs->ns_sb, KERN_ERR, "error %d preparing GC: %s", ret, - msg); + nilfs_err(nilfs->ns_sb, "error %d preparing GC: %s", ret, msg); return ret; } @@ -947,7 +946,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) { - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "error %d preparing GC: cannot read source blocks", ret); } else { diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 700870a92bc4..c0361ce45f62 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -199,7 +199,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, out_no_wait: err = -EIO; if (!buffer_uptodate(first_bh)) { - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "I/O error reading meta-data file (ino=%lu, block-offset=%lu)", inode->i_ino, block); goto failed_bh; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9fe6d4ab74f0..a6ec7961d4f5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -272,9 +272,9 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) goto out; if (!inode->i_nlink) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "deleting nonexistent file (ino=%lu), %d", - inode->i_ino, inode->i_nlink); + nilfs_warn(inode->i_sb, + "deleting nonexistent file (ino=%lu), %d", + inode->i_ino, inode->i_nlink); set_nlink(inode, 1); } err = nilfs_delete_entry(de, page); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 979a41016743..f8450ee3fd06 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -317,6 +317,15 @@ void __nilfs_error(struct super_block *sb, const char *function, #endif /* CONFIG_PRINTK */ +#define nilfs_crit(sb, fmt, ...) \ + nilfs_msg(sb, KERN_CRIT, fmt, ##__VA_ARGS__) +#define nilfs_err(sb, fmt, ...) \ + nilfs_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) +#define nilfs_warn(sb, fmt, ...) \ + nilfs_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__) +#define nilfs_info(sb, fmt, ...) \ + nilfs_msg(sb, KERN_INFO, fmt, ##__VA_ARGS__) + extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index d7fc8d369d89..b175f1330408 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -391,9 +391,8 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) BUG_ON(!PageLocked(page)); if (!silent) - nilfs_msg(sb, KERN_WARNING, - "discard dirty page: offset=%lld, ino=%lu", - page_offset(page), inode->i_ino); + nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", + page_offset(page), inode->i_ino); ClearPageUptodate(page); ClearPageMappedToDisk(page); @@ -409,9 +408,9 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) do { lock_buffer(bh); if (!silent) - nilfs_msg(sb, KERN_WARNING, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); + nilfs_warn(sb, + "discard dirty block: blocknr=%llu, size=%zu", + (u64)bh->b_blocknr, bh->b_size); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 140b663e91c7..0b453ef8fae5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -51,7 +51,7 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) switch (err) { case NILFS_SEG_FAIL_IO: - nilfs_msg(sb, KERN_ERR, "I/O error reading segment"); + nilfs_err(sb, "I/O error reading segment"); return -EIO; case NILFS_SEG_FAIL_MAGIC: msg = "Magic number mismatch"; @@ -72,10 +72,10 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) msg = "No super root in the last segment"; break; default: - nilfs_msg(sb, KERN_ERR, "unrecognized segment error %d", err); + nilfs_err(sb, "unrecognized segment error %d", err); return -EINVAL; } - nilfs_msg(sb, KERN_WARNING, "invalid segment: %s", msg); + nilfs_warn(sb, "invalid segment: %s", msg); return -EINVAL; } @@ -543,10 +543,10 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, put_page(page); failed_inode: - nilfs_msg(sb, KERN_WARNING, - "error %d recovering data block (ino=%lu, block-offset=%llu)", - err, (unsigned long)rb->ino, - (unsigned long long)rb->blkoff); + nilfs_warn(sb, + "error %d recovering data block (ino=%lu, block-offset=%llu)", + err, (unsigned long)rb->ino, + (unsigned long long)rb->blkoff); if (!err2) err2 = err; next: @@ -669,8 +669,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } if (nsalvaged_blocks) { - nilfs_msg(sb, KERN_INFO, "salvaged %lu blocks", - nsalvaged_blocks); + nilfs_info(sb, "salvaged %lu blocks", nsalvaged_blocks); ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: @@ -681,7 +680,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, confused: err = -EINVAL; failed: - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d roll-forwarding partial segment at blocknr = %llu", err, (unsigned long long)pseg_start); goto out; @@ -703,8 +702,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, set_buffer_dirty(bh); err = sync_dirty_buffer(bh); if (unlikely(err)) - nilfs_msg(nilfs->ns_sb, KERN_WARNING, - "buffer sync write failed during post-cleaning of recovery."); + nilfs_warn(nilfs->ns_sb, + "buffer sync write failed during post-cleaning of recovery."); brelse(bh); } @@ -739,8 +738,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d loading the latest checkpoint", err); + nilfs_err(sb, "error %d loading the latest checkpoint", err); return err; } @@ -751,8 +749,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d preparing segment for recovery", + nilfs_err(sb, "error %d preparing segment for recovery", err); goto failed; } @@ -766,8 +763,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, nilfs_detach_log_writer(sb); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d writing segment for recovery", + nilfs_err(sb, "error %d writing segment for recovery", err); goto failed; } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 20c479b5e41b..1a8729eded8b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -505,7 +505,7 @@ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) } while (--segbuf->sb_nbio > 0); if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { - nilfs_msg(segbuf->sb_super, KERN_ERR, + nilfs_err(segbuf->sb_super, "I/O error writing log (start-blocknr=%llu, block-count=%lu) in segment %llu", (unsigned long long)segbuf->sb_pseg_start, segbuf->sb_sum.nblocks, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 91b58c897f92..a651e821c2de 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -158,7 +158,7 @@ static int nilfs_prepare_segment_lock(struct super_block *sb, * it is saved and will be restored on * nilfs_transaction_commit(). */ - nilfs_msg(sb, KERN_WARNING, "journal info from a different FS"); + nilfs_warn(sb, "journal info from a different FS"); save = current->journal_info; } if (!ti) { @@ -1940,9 +1940,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, err = nilfs_ifile_get_inode_block( ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "log writer: error %d getting inode block (ino=%lu)", - err, ii->vfs_inode.i_ino); + nilfs_warn(sci->sc_super, + "log writer: error %d getting inode block (ino=%lu)", + err, ii->vfs_inode.i_ino); return err; } spin_lock(&nilfs->ns_inode_lock); @@ -2449,7 +2449,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, if (likely(!err)) break; - nilfs_msg(sb, KERN_WARNING, "error %d cleaning segments", err); + nilfs_warn(sb, "error %d cleaning segments", err); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(sci->sc_interval); } @@ -2457,9 +2457,9 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, sci->sc_nfreesegs); if (ret) { - nilfs_msg(sb, KERN_WARNING, - "error %d on discard request, turning discards off for the device", - ret); + nilfs_warn(sb, + "error %d on discard request, turning discards off for the device", + ret); nilfs_clear_opt(nilfs, DISCARD); } } @@ -2540,9 +2540,9 @@ static int nilfs_segctor_thread(void *arg) /* start sync. */ sci->sc_task = current; wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ - nilfs_msg(sci->sc_super, KERN_INFO, - "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", - sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); + nilfs_info(sci->sc_super, + "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", + sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); spin_lock(&sci->sc_state_lock); loop: @@ -2616,8 +2616,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) if (IS_ERR(t)) { int err = PTR_ERR(t); - nilfs_msg(sci->sc_super, KERN_ERR, - "error %d creating segctord thread", err); + nilfs_err(sci->sc_super, "error %d creating segctord thread", + err); return err; } wait_event(sci->sc_wait_task, sci->sc_task != NULL); @@ -2727,14 +2727,14 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_segctor_write_out(sci); if (!list_empty(&sci->sc_dirty_files)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "disposed unprocessed dirty file(s) when stopping log writer"); + nilfs_warn(sci->sc_super, + "disposed unprocessed dirty file(s) when stopping log writer"); nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } if (!list_empty(&sci->sc_iput_queue)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "disposed unprocessed inode(s) in iput queue when stopping log writer"); + nilfs_warn(sci->sc_super, + "disposed unprocessed inode(s) in iput queue when stopping log writer"); nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); } @@ -2812,8 +2812,8 @@ void nilfs_detach_log_writer(struct super_block *sb) spin_lock(&nilfs->ns_inode_lock); if (!list_empty(&nilfs->ns_dirty_files)) { list_splice_init(&nilfs->ns_dirty_files, &garbage_list); - nilfs_msg(sb, KERN_WARNING, - "disposed unprocessed dirty file(s) when detaching log writer"); + nilfs_warn(sb, + "disposed unprocessed dirty file(s) when detaching log writer"); } spin_unlock(&nilfs->ns_inode_lock); up_write(&nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index bf3f8f05c89b..42ff67c0c14f 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -171,9 +171,9 @@ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, down_write(&NILFS_MDT(sufile)->mi_sem); for (seg = segnumv; seg < segnumv + nsegs; seg++) { if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: invalid segment number: %llu", - __func__, (unsigned long long)*seg); + nilfs_warn(sufile->i_sb, + "%s: invalid segment number: %llu", + __func__, (unsigned long long)*seg); nerr++; } } @@ -230,9 +230,8 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, int ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: invalid segment number: %llu", - __func__, (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", + __func__, (unsigned long long)segnum); return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); @@ -410,9 +409,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: segment %llu must be clean", __func__, - (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", + __func__, (unsigned long long)segnum); kunmap_atomic(kaddr); return; } @@ -468,9 +466,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: segment %llu is already clean", - __func__, (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", + __func__, (unsigned long long)segnum); kunmap_atomic(kaddr); return; } @@ -1168,12 +1165,12 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, int err; if (susize > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, - "too large segment usage size: %zu bytes", susize); + nilfs_err(sb, "too large segment usage size: %zu bytes", + susize); return -EINVAL; } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { - nilfs_msg(sb, KERN_ERR, - "too small segment usage size: %zu bytes", susize); + nilfs_err(sb, "too small segment usage size: %zu bytes", + susize); return -EINVAL; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index fef4821a1f0f..2eee5fb1a882 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -112,7 +112,7 @@ static void nilfs_set_error(struct super_block *sb) * * This implements the body of nilfs_error() macro. Normally, * nilfs_error() should be used. As for sustainable errors such as a - * single-shot I/O error, nilfs_msg() should be used instead. + * single-shot I/O error, nilfs_err() should be used instead. * * Callers should not add a trailing newline since this will do it. */ @@ -184,8 +184,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag) } if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, "unable to write superblock: err=%d", - err); + nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], @@ -255,7 +254,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { - nilfs_msg(sb, KERN_CRIT, "superblock broke"); + nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && @@ -365,9 +364,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { - nilfs_msg(sb, KERN_WARNING, - "unable to move secondary superblock to block %llu", - (unsigned long long)newblocknr); + nilfs_warn(sb, + "unable to move secondary superblock to block %llu", + (unsigned long long)newblocknr); ret = -EIO; goto out; } @@ -530,7 +529,7 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) { if (err == -ENOENT || err == -EINVAL) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", (unsigned long long)cno); err = -EINVAL; @@ -628,8 +627,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { - nilfs_msg(sb, KERN_WARNING, - "failed to count free inodes: err=%d", err); + nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns @@ -761,7 +759,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) break; case Opt_snapshot: if (is_remount) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "\"%s\" option is invalid for remount", p); return 0; @@ -777,8 +775,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) nilfs_clear_opt(nilfs, DISCARD); break; default: - nilfs_msg(sb, KERN_ERR, - "unrecognized mount option \"%s\"", p); + nilfs_err(sb, "unrecognized mount option \"%s\"", p); return 0; } } @@ -814,10 +811,10 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount) mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { - nilfs_msg(sb, KERN_WARNING, "mounting fs with errors"); + nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { - nilfs_msg(sb, KERN_WARNING, "maximal mount count reached"); + nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) @@ -880,7 +877,7 @@ int nilfs_check_feature_compatibility(struct super_block *sb, features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; @@ -888,7 +885,7 @@ int nilfs_check_feature_compatibility(struct super_block *sb, features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; @@ -907,12 +904,12 @@ static int nilfs_get_root_dentry(struct super_block *sb, inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - nilfs_msg(sb, KERN_ERR, "error %d getting root inode", ret); + nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); - nilfs_msg(sb, KERN_ERR, "corrupt root inode"); + nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } @@ -940,7 +937,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, return ret; failed_dentry: - nilfs_msg(sb, KERN_ERR, "error %d getting root dentry", ret); + nilfs_err(sb, "error %d getting root dentry", ret); goto out; } @@ -960,7 +957,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "The specified checkpoint is not a snapshot (checkpoint number=%llu)", (unsigned long long)cno); ret = -EINVAL; @@ -969,7 +966,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "error %d while loading snapshot (checkpoint number=%llu)", ret, (unsigned long long)cno); goto out; @@ -1066,7 +1063,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d while loading last checkpoint (checkpoint number=%llu)", err, (unsigned long long)cno); goto failed_unload; @@ -1128,8 +1125,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { - nilfs_msg(sb, KERN_WARNING, - "couldn't remount because the filesystem is in an incomplete recovery state"); + nilfs_warn(sb, + "couldn't remount because the filesystem is in an incomplete recovery state"); goto restore_opts; } @@ -1161,9 +1158,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) ~NILFS_FEATURE_COMPAT_RO_SUPP; up_read(&nilfs->ns_sem); if (features) { - nilfs_msg(sb, KERN_WARNING, - "couldn't remount RDWR because of unsupported optional features (%llx)", - (unsigned long long)features); + nilfs_warn(sb, + "couldn't remount RDWR because of unsupported optional features (%llx)", + (unsigned long long)features); err = -EROFS; goto restore_opts; } @@ -1222,7 +1219,7 @@ static int nilfs_parse_snapshot_option(const char *option, return 0; parse_error: - nilfs_msg(NULL, KERN_ERR, "invalid option \"%s\": %s", option, msg); + nilfs_err(NULL, "invalid option \"%s\": %s", option, msg); return 1; } @@ -1325,7 +1322,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } else if (!sd.cno) { if (nilfs_tree_is_busy(s->s_root)) { if ((flags ^ s->s_flags) & SB_RDONLY) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index e60be7bb55b0..303d71430bdd 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -263,8 +263,8 @@ nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr, err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get checkpoint stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); return err; } @@ -286,8 +286,8 @@ nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr, err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get checkpoint stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); return err; } @@ -405,8 +405,8 @@ nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr, err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get segment stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get segment stat: err=%d", + err); return err; } @@ -779,15 +779,15 @@ nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr, err = kstrtouint(skip_spaces(buf), 0, &val); if (err) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to convert string: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to convert string: err=%d", + err); return err; } if (val < NILFS_SB_FREQ) { val = NILFS_SB_FREQ; - nilfs_msg(nilfs->ns_sb, KERN_WARNING, - "superblock update frequency cannot be lesser than 10 seconds"); + nilfs_warn(nilfs->ns_sb, + "superblock update frequency cannot be lesser than 10 seconds"); } down_write(&nilfs->ns_sem); @@ -990,8 +990,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL); if (unlikely(!nilfs->ns_dev_subgroups)) { err = -ENOMEM; - nilfs_msg(sb, KERN_ERR, - "unable to allocate memory for device group"); + nilfs_err(sb, "unable to allocate memory for device group"); goto failed_create_device_group; } @@ -1101,15 +1100,13 @@ int __init nilfs_sysfs_init(void) nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj); if (!nilfs_kset) { err = -ENOMEM; - nilfs_msg(NULL, KERN_ERR, - "unable to create sysfs entry: err=%d", err); + nilfs_err(NULL, "unable to create sysfs entry: err=%d", err); goto failed_sysfs_init; } err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group); if (unlikely(err)) { - nilfs_msg(NULL, KERN_ERR, - "unable to create feature group: err=%d", err); + nilfs_err(NULL, "unable to create feature group: err=%d", err); goto cleanup_sysfs_init; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 484785cdf96e..221a1cc597f0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -183,7 +183,7 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); nilfs->ns_cno = nilfs->ns_last_cno + 1; if (nilfs->ns_segnum >= nilfs->ns_nsegments) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "pointed segment number is out of range: segnum=%llu, nsegments=%lu", (unsigned long long)nilfs->ns_segnum, nilfs->ns_nsegments); @@ -210,12 +210,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int err; if (!valid_fs) { - nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs"); + nilfs_warn(sb, "mounting unchecked fs"); if (s_flags & SB_RDONLY) { - nilfs_msg(sb, KERN_INFO, - "recovery required for readonly filesystem"); - nilfs_msg(sb, KERN_INFO, - "write access will be enabled during recovery"); + nilfs_info(sb, + "recovery required for readonly filesystem"); + nilfs_info(sb, + "write access will be enabled during recovery"); } } @@ -230,12 +230,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto scan_error; if (!nilfs_valid_sb(sbp[1])) { - nilfs_msg(sb, KERN_WARNING, - "unable to fall back to spare super block"); + nilfs_warn(sb, + "unable to fall back to spare super block"); goto scan_error; } - nilfs_msg(sb, KERN_INFO, - "trying rollback from an earlier position"); + nilfs_info(sb, "trying rollback from an earlier position"); /* * restore super block with its spare and reconfigure @@ -248,9 +247,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) /* verify consistency between two super blocks */ blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); if (blocksize != nilfs->ns_blocksize) { - nilfs_msg(sb, KERN_WARNING, - "blocksize differs between two super blocks (%d != %d)", - blocksize, nilfs->ns_blocksize); + nilfs_warn(sb, + "blocksize differs between two super blocks (%d != %d)", + blocksize, nilfs->ns_blocksize); goto scan_error; } @@ -269,8 +268,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, "error %d while loading super root", - err); + nilfs_err(sb, "error %d while loading super root", err); goto failed; } @@ -281,28 +279,28 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) __u64 features; if (nilfs_test_opt(nilfs, NORECOVERY)) { - nilfs_msg(sb, KERN_INFO, - "norecovery option specified, skipping roll-forward recovery"); + nilfs_info(sb, + "norecovery option specified, skipping roll-forward recovery"); goto skip_recovery; } features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't proceed with recovery because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; goto failed_unload; } if (really_read_only) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "write access unavailable, cannot proceed"); err = -EROFS; goto failed_unload; } sb->s_flags &= ~SB_RDONLY; } else if (nilfs_test_opt(nilfs, NORECOVERY)) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "recovery cancelled because norecovery option was specified for a read/write mount"); err = -EINVAL; goto failed_unload; @@ -318,12 +316,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) up_write(&nilfs->ns_sem); if (err) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d updating super block. recovery unfinished.", err); goto failed_unload; } - nilfs_msg(sb, KERN_INFO, "recovery complete"); + nilfs_info(sb, "recovery complete"); skip_recovery: nilfs_clear_recovery_info(&ri); @@ -331,7 +329,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) return 0; scan_error: - nilfs_msg(sb, KERN_ERR, "error %d while searching super root", err); + nilfs_err(sb, "error %d while searching super root", err); goto failed; failed_unload: @@ -378,7 +376,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", le32_to_cpu(sbp->s_rev_level), le16_to_cpu(sbp->s_minor_rev_level), @@ -391,13 +389,11 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); if (nilfs->ns_inode_size > nilfs->ns_blocksize) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too large inode size: %d bytes", + nilfs_err(nilfs->ns_sb, "too large inode size: %d bytes", nilfs->ns_inode_size); return -EINVAL; } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too small inode size: %d bytes", + nilfs_err(nilfs->ns_sb, "too small inode size: %d bytes", nilfs->ns_inode_size); return -EINVAL; } @@ -406,8 +402,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too short segment: %lu blocks", + nilfs_err(nilfs->ns_sb, "too short segment: %lu blocks", nilfs->ns_blocks_per_segment); return -EINVAL; } @@ -417,7 +412,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, le32_to_cpu(sbp->s_r_segments_percentage); if (nilfs->ns_r_segments_percentage < 1 || nilfs->ns_r_segments_percentage > 99) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "invalid reserved segments percentage: %lu", nilfs->ns_r_segments_percentage); return -EINVAL; @@ -503,16 +498,16 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (!sbp[0]) { if (!sbp[1]) { - nilfs_msg(sb, KERN_ERR, "unable to read superblock"); + nilfs_err(sb, "unable to read superblock"); return -EIO; } - nilfs_msg(sb, KERN_WARNING, - "unable to read primary superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "unable to read primary superblock (blocksize = %d)", + blocksize); } else if (!sbp[1]) { - nilfs_msg(sb, KERN_WARNING, - "unable to read secondary superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "unable to read secondary superblock (blocksize = %d)", + blocksize); } /* @@ -534,14 +529,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, } if (!valid[swp]) { nilfs_release_super_block(nilfs); - nilfs_msg(sb, KERN_ERR, "couldn't find nilfs on the device"); + nilfs_err(sb, "couldn't find nilfs on the device"); return -EINVAL; } if (!valid[!swp]) - nilfs_msg(sb, KERN_WARNING, - "broken superblock, retrying with spare superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "broken superblock, retrying with spare superblock (blocksize = %d)", + blocksize); if (swp) nilfs_swap_super_block(nilfs); @@ -575,7 +570,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { - nilfs_msg(sb, KERN_ERR, "unable to set blocksize"); + nilfs_err(sb, "unable to set blocksize"); err = -EINVAL; goto out; } @@ -594,7 +589,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (blocksize < NILFS_MIN_BLOCK_SIZE || blocksize > NILFS_MAX_BLOCK_SIZE) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount because of unsupported filesystem blocksize %d", blocksize); err = -EINVAL; @@ -604,7 +599,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) int hw_blocksize = bdev_logical_block_size(sb->s_bdev); if (blocksize < hw_blocksize) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "blocksize %d too small for device (sector-size = %d)", blocksize, hw_blocksize); err = -EINVAL; -- cgit From 88b2e9b06381551b707d980627ad0591191f7a2d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Aug 2020 18:35:53 -0700 Subject: fs/ufs: avoid potential u32 multiplication overflow The 64 bit ino is being compared to the product of two u32 values, however, the multiplication is being performed using a 32 bit multiply so there is a potential of an overflow. To be fully safe, cast uspi->s_ncg to a u64 to ensure a 64 bit multiplication occurs to avoid any chance of overflow. Fixes: f3e2a520f5fb ("ufs: NFS support") Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Cc: Evgeniy Dushistov Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/20200715170355.1081713-1-colin.king@canonical.com Addresses-Coverity: ("Unintentional integer overflow") Signed-off-by: Linus Torvalds --- fs/ufs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 1da0be667409..e3b69fb280e8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -101,7 +101,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct inode *inode; - if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg) + if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg) return ERR_PTR(-ESTALE); inode = ufs_iget(sb, ino); -- cgit From e348e65a081d761caf72c2753c4e3d67bfb1ef80 Mon Sep 17 00:00:00 2001 From: Yubo Feng Date: Tue, 11 Aug 2020 18:35:56 -0700 Subject: fatfs: switch write_lock to read_lock in fat_ioctl_get_attributes There is no need to hold write_lock in fat_ioctl_get_attributes. write_lock may make an impact on concurrency of fat_ioctl_get_attributes. Signed-off-by: Yubo Feng Signed-off-by: Andrew Morton Acked-by: OGAWA Hirofumi Link: http://lkml.kernel.org/r/1593308053-12702-1-git-send-email-fengyubo3@huawei.com Signed-off-by: Linus Torvalds --- fs/fat/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fat/file.c b/fs/fat/file.c index 42134c58c87e..f9ee27cf4d7c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -25,9 +25,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; - inode_lock(inode); + inode_lock_shared(inode); attr = fat_make_attrs(inode); - inode_unlock(inode); + inode_unlock_shared(inode); return put_user(attr, user_attr); } -- cgit From 4ecfed61de766f4655025a71d79c9a0f4793a3f4 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 11 Aug 2020 18:35:59 -0700 Subject: VFAT/FAT/MSDOS FILESYSTEM: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `xmlns`: For each link, `http://[^# ]*(?:\w|/)`: If neither `gnu\.org/license`, nor `mozilla\.org/MPL`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Andrew Morton Acked-by: OGAWA Hirofumi Link: http://lkml.kernel.org/r/20200708200409.22293-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds --- fs/fat/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index ca31993dcb47..66532a71e8fd 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -41,7 +41,7 @@ config MSDOS_FS they are compressed; to access compressed MSDOS partitions under Linux, you can either use the DOS emulator DOSEMU, described in the DOSEMU-HOWTO, available from - , or try dmsdosfs in + , or try dmsdosfs in . If you intend to use dosemu with a non-compressed MSDOS partition, say Y here) and MSDOS floppies. This means that file access becomes -- cgit From a090a5a7d73f79a9ae2dcc6e60d89bfc6864a65a Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 11 Aug 2020 18:36:01 -0700 Subject: fat: fix fat_ra_init() for data clusters == 0 If data clusters == 0, fat_ra_init() calls the ->ent_blocknr() for the cluster beyond ->max_clusters. This checks the limit before initialization to suppress the warning. Reported-by: syzbot+756199124937b31a9b7e@syzkaller.appspotmail.com Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/87mu462sv4.fsf@mail.parknet.co.jp Signed-off-by: Linus Torvalds --- fs/fat/fatent.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index bbfe18c07417..f7e3304b7802 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -657,6 +657,9 @@ static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra, unsigned long ra_pages = sb->s_bdi->ra_pages; unsigned int reada_blocks; + if (fatent->entry >= ent_limit) + return; + if (ra_pages > sb->s_bdi->io_pages) ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages); reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1); -- cgit From a089e3fd5a82aea20f3d9ec4caa5f4c65cc2cfcc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 11 Aug 2020 18:36:04 -0700 Subject: fs/signalfd.c: fix inconsistent return codes for signalfd4 The kernel signalfd4() syscall returns different error codes when called either in compat or native mode. This behaviour makes correct emulation in qemu and testing programs like LTP more complicated. Fix the code to always return -in both modes- EFAULT for unaccessible user memory, and EINVAL when called with an invalid signal mask. Signed-off-by: Helge Deller Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Laurent Vivier Link: http://lkml.kernel.org/r/20200530100707.GA10159@ls3530.fritz.box Signed-off-by: Linus Torvalds --- fs/signalfd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 44b6845b071c..5b78719be445 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -314,9 +314,10 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, { sigset_t mask; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&mask, user_mask, sizeof(mask))) + if (sizemask != sizeof(sigset_t)) return -EINVAL; + if (copy_from_user(&mask, user_mask, sizeof(mask))) + return -EFAULT; return do_signalfd4(ufd, &mask, flags); } @@ -325,9 +326,10 @@ SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, { sigset_t mask; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&mask, user_mask, sizeof(mask))) + if (sizemask != sizeof(sigset_t)) return -EINVAL; + if (copy_from_user(&mask, user_mask, sizeof(mask))) + return -EFAULT; return do_signalfd4(ufd, &mask, 0); } -- cgit From aaa3e7fb81d8a8598b993c8012f4b8aa18d92b20 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:08 -0700 Subject: selftests: kmod: use variable NAME in kmod_test_0001() Patch series "kmod/umh: a few fixes". Tiezhu Yang had sent out a patch set with a slew of kmod selftest fixes, and one patch which modified kmod to return 254 when a module was not found. This opened up pandora's box about why that was being used for and low and behold its because when UMH_WAIT_PROC is used we call a kernel_wait4() call but have never unwrapped the error code. The commit log for that fix details the rationale for the approach taken. I'd appreciate some review on that, in particular nfs folks as it seems a case was never really hit before. This patch (of 5): Use the variable NAME instead of "\000" directly in kmod_test_0001(). Signed-off-by: Tiezhu Yang Signed-off-by: Luis Chamberlain Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Greg Kroah-Hartman Cc: Al Viro Cc: Philipp Reisner Cc: Lars Ellenberg Cc: Jens Axboe Cc: J. Bruce Fields Cc: Chuck Lever Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: David S. Miller Cc: Jakub Kicinski Cc: David Howells Cc: Jarkko Sakkinen Cc: James Morris Cc: "Serge E. Hallyn" Cc: Christian Brauner Cc: Sergei Trofimovich Cc: Alexei Starovoitov Cc: Kees Cook Cc: Josh Triplett Cc: Sergey Kvachonok Cc: Tony Vroon Cc: Shuah Khan Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200610154923.27510-1-mcgrof@kernel.org Link: http://lkml.kernel.org/r/20200610154923.27510-2-mcgrof@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index ea2147248ebe..afd42387e8b2 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -343,7 +343,7 @@ kmod_test_0001_driver() kmod_defaults_driver config_num_threads 1 - printf '\000' >"$DIR"/config_test_driver + printf $NAME >"$DIR"/config_test_driver config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND } @@ -354,7 +354,7 @@ kmod_test_0001_fs() kmod_defaults_fs config_num_threads 1 - printf '\000' >"$DIR"/config_test_fs + printf $NAME >"$DIR"/config_test_fs config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} -EINVAL } -- cgit From 6f9e148c218641ad876845abdea4b0597468c55e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:12 -0700 Subject: kmod: remove redundant "be an" in the comment There exists redundant "be an" in the comment, remove it. Signed-off-by: Tiezhu Yang Signed-off-by: Luis Chamberlain Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Al Viro Cc: Christian Brauner Cc: Chuck Lever Cc: David Howells Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jakub Kicinski Cc: James Morris Cc: Jarkko Sakkinen Cc: J. Bruce Fields Cc: Jens Axboe Cc: Josh Triplett Cc: Kees Cook Cc: Lars Ellenberg Cc: Nikolay Aleksandrov Cc: Philipp Reisner Cc: Roopa Prabhu Cc: "Serge E. Hallyn" Cc: Sergei Trofimovich Cc: Sergey Kvachonok Cc: Shuah Khan Cc: Tony Vroon Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200610154923.27510-3-mcgrof@kernel.org Signed-off-by: Linus Torvalds --- kernel/kmod.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 37c3c4b97b8e..3cd075ce2a1e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,9 +36,8 @@ * * If you need less than 50 threads would mean we're dealing with systems * smaller than 3200 pages. This assumes you are capable of having ~13M memory, - * and this would only be an be an upper limit, after which the OOM killer - * would take effect. Systems like these are very unlikely if modules are - * enabled. + * and this would only be an upper limit, after which the OOM killer would take + * effect. Systems like these are very unlikely if modules are enabled. */ #define MAX_KMOD_CONCURRENT 50 static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); -- cgit From 0776d1231bec0c7ab43baf440a3f5ef5f49dd795 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:16 -0700 Subject: test_kmod: avoid potential double free in trigger_config_run_type() Reset the member "test_fs" of the test configuration after a call of the function "kfree_const" to a null pointer so that a double memory release will not be performed. Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Tiezhu Yang Signed-off-by: Luis Chamberlain Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Al Viro Cc: Christian Brauner Cc: Chuck Lever Cc: David Howells Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jakub Kicinski Cc: James Morris Cc: Jarkko Sakkinen Cc: J. Bruce Fields Cc: Jens Axboe Cc: Josh Triplett Cc: Kees Cook Cc: Lars Ellenberg Cc: Nikolay Aleksandrov Cc: Philipp Reisner Cc: Roopa Prabhu Cc: "Serge E. Hallyn" Cc: Sergei Trofimovich Cc: Sergey Kvachonok Cc: Shuah Khan Cc: Tony Vroon Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200610154923.27510-4-mcgrof@kernel.org Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index e651c37d56db..eab52770070d 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -745,7 +745,7 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev, break; case TEST_KMOD_FS_TYPE: kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; copied = config_copy_test_fs(config, test_str, strlen(test_str)); break; -- cgit From f38c85f1ba6902e4e2e2bf1b84edf065a904cdeb Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Tue, 11 Aug 2020 18:36:20 -0700 Subject: coredump: add %f for executable filename The document reads "%e" should be "executable filename" while actually it could be changed by things like pr_ctl PR_SET_NAME. People who uses "%e" in core_pattern get surprised when they find out they get thread name instead of executable filename. This is either a bug of document or a bug of code. Since the behavior of "%e" is there for long time, it could bring another surprise for users if we "fix" the code. So we just "fix" the document. And more, for users who really need the "executable filename" in core_pattern, we introduce a new "%f" for the real executable filename. We already have "%E" for executable path in kernel, so just reuse most of its code for the new added "%f" format. Signed-off-by: Lepton Wu Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200701031432.2978761-1-ytht.net@gmail.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/kernel.rst | 3 ++- fs/coredump.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 2ae9669eb22c..d4b32cc32bb7 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -164,7 +164,8 @@ core_pattern %s signal number %t UNIX time of dump %h hostname - %e executable filename (may be shortened) + %e executable filename (may be shortened, could be changed by prctl etc) + %f executable filename %E executable path %c maximum size of core file by resource limit RLIMIT_CORE % both are dropped diff --git a/fs/coredump.c b/fs/coredump.c index 7237f07ff6be..76e7c10edfc0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -153,10 +153,10 @@ int cn_esc_printf(struct core_name *cn, const char *fmt, ...) return ret; } -static int cn_print_exe_file(struct core_name *cn) +static int cn_print_exe_file(struct core_name *cn, bool name_only) { struct file *exe_file; - char *pathbuf, *path; + char *pathbuf, *path, *ptr; int ret; exe_file = get_mm_exe_file(current->mm); @@ -175,6 +175,11 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } + if (name_only) { + ptr = strrchr(path, '/'); + if (ptr) + path = ptr + 1; + } ret = cn_esc_printf(cn, "%s", path); free_buf: @@ -301,12 +306,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, utsname()->nodename); up_read(&uts_sem); break; - /* executable */ + /* executable, could be changed by prctl PR_SET_NAME etc */ case 'e': err = cn_esc_printf(cn, "%s", current->comm); break; + /* file name of executable */ + case 'f': + err = cn_print_exe_file(cn, true); + break; case 'E': - err = cn_print_exe_file(cn); + err = cn_print_exe_file(cn, false); break; /* core limit size */ case 'c': -- cgit From db19c91c3b75cf8ece3ffd92a4e84a306a7547b0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 11 Aug 2020 18:36:23 -0700 Subject: exec: change uselib(2) IS_SREG() failure to EACCES Patch series "Relocate execve() sanity checks", v2. While looking at the code paths for the proposed O_MAYEXEC flag, I saw some things that looked like they should be fixed up. exec: Change uselib(2) IS_SREG() failure to EACCES This just regularizes the return code on uselib(2). exec: Move S_ISREG() check earlier This moves the S_ISREG() check even earlier than it was already. exec: Move path_noexec() check earlier This adds the path_noexec() check to the same place as the S_ISREG() check. This patch (of 3): Change uselib(2)' S_ISREG() error return to EACCES instead of EINVAL so the behavior matches execve(2), and the seemingly documented value. The "not a regular file" failure mode of execve(2) is explicitly documented[1], but it is not mentioned in uselib(2)[2] which does, however, say that open(2) and mmap(2) errors may apply. The documentation for open(2) does not include a "not a regular file" error[3], but mmap(2) does[4], and it is EACCES. [1] http://man7.org/linux/man-pages/man2/execve.2.html#ERRORS [2] http://man7.org/linux/man-pages/man2/uselib.2.html#ERRORS [3] http://man7.org/linux/man-pages/man2/open.2.html#ERRORS [4] http://man7.org/linux/man-pages/man2/mmap.2.html#ERRORS Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Acked-by: Christian Brauner Cc: Aleksa Sarai Cc: Alexander Viro Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Tetsuo Handa Link: http://lkml.kernel.org/r/20200605160013.3954297-1-keescook@chromium.org Link: http://lkml.kernel.org/r/20200605160013.3954297-2-keescook@chromium.org Signed-off-by: Linus Torvalds --- fs/exec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 29ef78ae9f50..c56aad307069 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -141,11 +141,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (IS_ERR(file)) goto out; - error = -EINVAL; + error = -EACCES; if (!S_ISREG(file_inode(file)->i_mode)) goto exit; - error = -EACCES; if (path_noexec(&file->f_path)) goto exit; -- cgit From 633fb6ac39801514613fbe050db6abdc3fe744d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 11 Aug 2020 18:36:26 -0700 Subject: exec: move S_ISREG() check earlier The execve(2)/uselib(2) syscalls have always rejected non-regular files. Recently, it was noticed that a deadlock was introduced when trying to execute pipes, as the S_ISREG() test was happening too late. This was fixed in commit 73601ea5b7b1 ("fs/open.c: allow opening only regular files during execve()"), but it was added after inode_permission() had already run, which meant LSMs could see bogus attempts to execute non-regular files. Move the test into the other inode type checks (which already look for other pathological conditions[1]). Since there is no need to use FMODE_EXEC while we still have access to "acc_mode", also switch the test to MAY_EXEC. Also include a comment with the redundant S_ISREG() checks at the end of execve(2)/uselib(2) to note that they are present to avoid any mistakes. My notes on the call path, and related arguments, checks, etc: do_open_execat() struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC, ... do_filp_open(dfd, filename, open_flags) path_openat(nameidata, open_flags, flags) file = alloc_empty_file(open_flags, current_cred()); do_open(nameidata, file, open_flags) may_open(path, acc_mode, open_flag) /* new location of MAY_EXEC vs S_ISREG() test */ inode_permission(inode, MAY_OPEN | acc_mode) security_inode_permission(inode, acc_mode) vfs_open(path, file) do_dentry_open(file, path->dentry->d_inode, open) /* old location of FMODE_EXEC vs S_ISREG() test */ security_file_open(f) open() [1] https://lore.kernel.org/lkml/202006041910.9EF0C602@keescook/ Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Aleksa Sarai Cc: Alexander Viro Cc: Christian Brauner Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Tetsuo Handa Link: http://lkml.kernel.org/r/20200605160013.3954297-3-keescook@chromium.org Signed-off-by: Linus Torvalds --- fs/exec.c | 14 ++++++++++++-- fs/namei.c | 6 ++++-- fs/open.c | 6 ------ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index c56aad307069..cc7ec7b9000b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -141,8 +141,13 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (IS_ERR(file)) goto out; + /* + * may_open() has already checked for this, so it should be + * impossible to trip now. But we need to be extra cautious + * and check again at the very end too. + */ error = -EACCES; - if (!S_ISREG(file_inode(file)->i_mode)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) goto exit; if (path_noexec(&file->f_path)) @@ -908,8 +913,13 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) goto out; + /* + * may_open() has already checked for this, so it should be + * impossible to trip now. But we need to be extra cautious + * and check again at the very end too. + */ err = -EACCES; - if (!S_ISREG(file_inode(file)->i_mode)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) goto exit; if (path_noexec(&file->f_path)) diff --git a/fs/namei.c b/fs/namei.c index fde8fe086c09..9cc3b90abdff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2849,16 +2849,18 @@ static int may_open(const struct path *path, int acc_mode, int flag) case S_IFLNK: return -ELOOP; case S_IFDIR: - if (acc_mode & MAY_WRITE) + if (acc_mode & (MAY_WRITE | MAY_EXEC)) return -EISDIR; break; case S_IFBLK: case S_IFCHR: if (!may_open_dev(path)) return -EACCES; - /*FALLTHRU*/ + fallthrough; case S_IFIFO: case S_IFSOCK: + if (acc_mode & MAY_EXEC) + return -EACCES; flag &= ~O_TRUNC; break; } diff --git a/fs/open.c b/fs/open.c index c80e9f497e9b..9af548fb841b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -779,12 +779,6 @@ static int do_dentry_open(struct file *f, return 0; } - /* Any file opened for execve()/uselib() has to be a regular file. */ - if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { - error = -EACCES; - goto cleanup_file; - } - if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) -- cgit From 0fd338b2d2cdf827091ae819ae90ad760b94ad0c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 11 Aug 2020 18:36:30 -0700 Subject: exec: move path_noexec() check earlier The path_noexec() check, like the regular file check, was happening too late, letting LSMs see impossible execve()s. Check it earlier as well in may_open() and collect the redundant fs/exec.c path_noexec() test under the same robustness comment as the S_ISREG() check. My notes on the call path, and related arguments, checks, etc: do_open_execat() struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC, ... do_filp_open(dfd, filename, open_flags) path_openat(nameidata, open_flags, flags) file = alloc_empty_file(open_flags, current_cred()); do_open(nameidata, file, open_flags) may_open(path, acc_mode, open_flag) /* new location of MAY_EXEC vs path_noexec() test */ inode_permission(inode, MAY_OPEN | acc_mode) security_inode_permission(inode, acc_mode) vfs_open(path, file) do_dentry_open(file, path->dentry->d_inode, open) security_file_open(f) open() /* old location of path_noexec() test */ Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Aleksa Sarai Cc: Christian Brauner Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Tetsuo Handa Link: http://lkml.kernel.org/r/20200605160013.3954297-4-keescook@chromium.org Signed-off-by: Linus Torvalds --- fs/exec.c | 12 ++++-------- fs/namei.c | 4 ++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index cc7ec7b9000b..a57d9785832b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -147,10 +147,8 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) * and check again at the very end too. */ error = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) - goto exit; - - if (path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || + path_noexec(&file->f_path))) goto exit; fsnotify_open(file); @@ -919,10 +917,8 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) * and check again at the very end too. */ err = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) - goto exit; - - if (path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || + path_noexec(&file->f_path))) goto exit; err = deny_write_access(file); diff --git a/fs/namei.c b/fs/namei.c index 9cc3b90abdff..fb318e06ba3c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2863,6 +2863,10 @@ static int may_open(const struct path *path, int acc_mode, int flag) return -EACCES; flag &= ~O_TRUNC; break; + case S_IFREG: + if ((acc_mode & MAY_EXEC) && path_noexec(path)) + return -EACCES; + break; } error = inode_permission(inode, MAY_OPEN | acc_mode); -- cgit From 0935288c6e008c0682ab6171fb5605dcc049e7bd Mon Sep 17 00:00:00 2001 From: Vijay Balakrishna Date: Tue, 11 Aug 2020 18:36:33 -0700 Subject: kdump: append kernel build-id string to VMCOREINFO Make kernel GNU build-id available in VMCOREINFO. Having build-id in VMCOREINFO facilitates presenting appropriate kernel namelist image with debug information file to kernel crash dump analysis tools. Currently VMCOREINFO lacks uniquely identifiable key for crash analysis automation. Regarding if this patch is necessary or matching of linux_banner and OSRELEASE in VMCOREINFO employed by crash(8) meets the need -- IMO, build-id approach more foolproof, in most instances it is a cryptographic hash generated using internal code/ELF bits unlike kernel version string upon which linux_banner is based that is external to the code. I feel each is intended for a different purpose. Also OSRELEASE is not suitable when two different kernel builds from same version with different features enabled. Currently for most linux (and non-linux) systems build-id can be extracted using standard methods for file types such as user mode crash dumps, shared libraries, loadable kernel modules etc., This is an exception for linux kernel dump. Having build-id in VMCOREINFO brings some uniformity for automation tools. Tyler said: : I think this is a nice improvement over today's linux_banner approach for : correlating vmlinux to a kernel dump. : : The elf notes parsing in this patch lines up with what is described in in : the "Notes (Nhdr)" section of the elf(5) man page. : : BUILD_ID_MAX is sufficient to hold a sha1 build-id, which is the default : build-id type today in GNU ld(2). It is also sufficient to hold the : "fast" build-id, which is the default build-id type today in LLVM lld(2). Signed-off-by: Vijay Balakrishna Signed-off-by: Andrew Morton Reviewed-by: Tyler Hicks Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Link: http://lkml.kernel.org/r/1591849672-34104-1-git-send-email-vijayb@linux.microsoft.com Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 6 ++++++ kernel/crash_core.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..6594dbc34a37 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -38,6 +38,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID(value) \ + vmcoreinfo_append_str("BUILD-ID=%s\n", value) #define VMCOREINFO_PAGESIZE(value) \ vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ @@ -64,6 +66,10 @@ extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +/* raw contents of kernel .notes section */ +extern const void __start_notes __weak; +extern const void __stop_notes __weak; + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 18175687133a..106e4500fd53 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -11,6 +11,8 @@ #include #include +#include + /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; size_t vmcoreinfo_size; @@ -376,6 +378,53 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void) } EXPORT_SYMBOL(paddr_vmcoreinfo_note); +#define NOTES_SIZE (&__stop_notes - &__start_notes) +#define BUILD_ID_MAX SHA1_DIGEST_SIZE +#define NT_GNU_BUILD_ID 3 + +struct elf_note_section { + struct elf_note n_hdr; + u8 n_data[]; +}; + +/* + * Add build ID from .notes section as generated by the GNU ld(1) + * or LLVM lld(1) --build-id option. + */ +static void add_build_id_vmcoreinfo(void) +{ + char build_id[BUILD_ID_MAX * 2 + 1]; + int n_remain = NOTES_SIZE; + + while (n_remain >= sizeof(struct elf_note)) { + const struct elf_note_section *note_sec = + &__start_notes + NOTES_SIZE - n_remain; + const u32 n_namesz = note_sec->n_hdr.n_namesz; + + if (note_sec->n_hdr.n_type == NT_GNU_BUILD_ID && + n_namesz != 0 && + !strcmp((char *)¬e_sec->n_data[0], "GNU")) { + if (note_sec->n_hdr.n_descsz <= BUILD_ID_MAX) { + const u32 n_descsz = note_sec->n_hdr.n_descsz; + const u8 *s = ¬e_sec->n_data[n_namesz]; + + s = PTR_ALIGN(s, 4); + bin2hex(build_id, s, n_descsz); + build_id[2 * n_descsz] = '\0'; + VMCOREINFO_BUILD_ID(build_id); + return; + } + pr_warn("Build ID is too large to include in vmcoreinfo: %u > %u\n", + note_sec->n_hdr.n_descsz, + BUILD_ID_MAX); + return; + } + n_remain -= sizeof(struct elf_note) + + ALIGN(note_sec->n_hdr.n_namesz, 4) + + ALIGN(note_sec->n_hdr.n_descsz, 4); + } +} + static int __init crash_save_vmcoreinfo_init(void) { vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); @@ -394,6 +443,7 @@ static int __init crash_save_vmcoreinfo_init(void) } VMCOREINFO_OSRELEASE(init_uts_ns.name.release); + add_build_id_vmcoreinfo(); VMCOREINFO_PAGESIZE(PAGE_SIZE); VMCOREINFO_SYMBOL(init_uts_ns); -- cgit From 216ec27f3c5215d357f351fa103817c8b62ea804 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Aug 2020 18:36:37 -0700 Subject: drivers/rapidio/devices/rio_mport_cdev.c: use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This issue was found with the help of Coccinelle and, audited and fixed manually. Addresses KSPP ID: https://github.com/KSPP/linux/issues/83 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Andrew Morton Cc: Matt Porter Cc: Alexandre Bounine Link: http://lkml.kernel.org/r/20200619170843.GA24923@embeddedor Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 451608e960a1..3abbba8c2b5b 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1710,8 +1710,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, if (rval & RIO_PEF_SWITCH) { rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, &swpinfo); - size += (RIO_GET_TOTAL_PORTS(swpinfo) * - sizeof(rswitch->nextdev[0])) + sizeof(*rswitch); + size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo)); } rdev = kzalloc(size, GFP_KERNEL); -- cgit From 330d5589604d7c1d08caa097a97690d2e8c5bb3f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Aug 2020 18:36:40 -0700 Subject: drivers/rapidio/rio-scan.c: use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. Also, while there, use the preferred form for passing a size of a struct. The alternative form where struct name is spelled out hurts readability and introduces an opportunity for a bug when the pointer variable type is changed but the corresponding sizeof that is passed as argument is not. This issue was found with the help of Coccinelle and, audited and fixed manually. Addresses KSPP ID: https://github.com/KSPP/linux/issues/83 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Andrew Morton Cc: Matt Porter Cc: Alexandre Bounine Link: http://lkml.kernel.org/r/20200619170445.GA22641@embeddedor Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index eb8ed28533f8..19b0c33f4a62 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -330,7 +330,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, size_t size; u32 swpinfo = 0; - size = sizeof(struct rio_dev); + size = sizeof(*rdev); if (rio_mport_read_config_32(port, destid, hopcount, RIO_PEF_CAR, &result)) return NULL; @@ -338,10 +338,8 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, if (result & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) { rio_mport_read_config_32(port, destid, hopcount, RIO_SWP_INFO_CAR, &swpinfo); - if (result & RIO_PEF_SWITCH) { - size += (RIO_GET_TOTAL_PORTS(swpinfo) * - sizeof(rswitch->nextdev[0])) + sizeof(*rswitch); - } + if (result & RIO_PEF_SWITCH) + size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo)); } rdev = kzalloc(size, GFP_KERNEL); -- cgit From d71375499d7b0a954e3012d4e16ccad3f28e3d1c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Aug 2020 18:36:43 -0700 Subject: rapidio/rio_mport_cdev: use array_size() helper in copy_{from,to}_user() Use array_size() helper instead of the open-coded version in copy_{from,to}_user(). These sorts of multiplication factors need to be wrapped in array_size(). This issue was found with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Cc: Matt Porter Cc: Alexandre Bounine Link: http://lkml.kernel.org/r/20200616183050.GA31840@embeddedor Addresses-KSPP-ID: https://github.com/KSPP/linux/issues/83 Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 3abbba8c2b5b..c07ceec3c6d4 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -981,7 +981,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) if (unlikely(copy_from_user(transfer, (void __user *)(uintptr_t)transaction.block, - transaction.count * sizeof(*transfer)))) { + array_size(sizeof(*transfer), transaction.count)))) { ret = -EFAULT; goto out_free; } @@ -994,7 +994,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block, transfer, - transaction.count * sizeof(*transfer)))) + array_size(sizeof(*transfer), transaction.count)))) ret = -EFAULT; out_free: -- cgit From 79076e1241bb3bf02d0aac7d39120d8161fe07b1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:46 -0700 Subject: kernel/panic.c: make oops_may_print() return bool The return value of oops_may_print() is true or false, so change its type to reflect that. Signed-off-by: Tiezhu Yang Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Cc: Xuefeng Li Link: http://lkml.kernel.org/r/1591103358-32087-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- kernel/panic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 92517ae53e7c..211005163682 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -322,7 +322,7 @@ void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); -extern int oops_may_print(void); +extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; diff --git a/kernel/panic.c b/kernel/panic.c index e2157ca387c8..93ba061d1c79 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -505,7 +505,7 @@ static void do_oops_enter_exit(void) * Return true if the calling CPU is allowed to print oops-related info. * This is a bit racy.. */ -int oops_may_print(void) +bool oops_may_print(void) { return pause_on_oops_flag == 0; } -- cgit From 9d5b134f9f51dd5a67adf8fdc4f59af97e540ceb Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:49 -0700 Subject: lib/Kconfig.debug: fix typo in the help text of CONFIG_PANIC_TIMEOUT There exists duplicated "the" in the help text of CONFIG_PANIC_TIMEOUT, Remove it. Signed-off-by: Tiezhu Yang Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Cc: Xuefeng Li Link: http://lkml.kernel.org/r/1591103358-32087-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ca1b11af1f6..e068c3c7189a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -917,7 +917,7 @@ config PANIC_TIMEOUT int "panic timeout" default 0 help - Set the timeout value (in seconds) until a reboot occurs when the + Set the timeout value (in seconds) until a reboot occurs when the kernel panics. If n = 0, then we wait forever. A timeout value n > 0 will wait n seconds before rebooting, while a timeout value n < 0 will reboot immediately. -- cgit From 63037f74725ddd8a767ed2ad0369e60a3bf1f2ce Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 11 Aug 2020 18:36:53 -0700 Subject: panic: make print_oops_end_marker() static Since print_oops_end_marker() is not used externally, also remove it in kernel.h at the same time. Signed-off-by: Yue Hu Signed-off-by: Andrew Morton Cc: Kees Cook Link: http://lkml.kernel.org/r/20200724011516.12756-1-zbestahu@gmail.com Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - kernel/panic.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 211005163682..500def620d8f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -321,7 +321,6 @@ void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); -void print_oops_end_marker(void); extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; diff --git a/kernel/panic.c b/kernel/panic.c index 93ba061d1c79..aef8872ba843 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -551,7 +551,7 @@ static int init_oops_id(void) } late_initcall(init_oops_id); -void print_oops_end_marker(void) +static void print_oops_end_marker(void) { init_oops_id(); pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); -- cgit From 31a1b9878c0667945b078c92bce907552f57f2b6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Aug 2020 18:36:56 -0700 Subject: kcov: unconditionally add -fno-stack-protector to compiler options Unconditionally add -fno-stack-protector to KCOV's compiler options, as all supported compilers support the option. This saves a compiler invocation to determine if the option is supported. Because Clang does not support -fno-conserve-stack, and -fno-stack-protector was wrapped in the same cc-option, we were missing -fno-stack-protector with Clang. Unconditionally adding this option fixes this for Clang. Suggested-by: Nick Desaulniers Signed-off-by: Marco Elver Signed-off-by: Andrew Morton Reviewed-by: Nick Desaulniers Reviewed-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Alexander Potapenko Link: http://lkml.kernel.org/r/20200615184302.7591-1-elver@google.com Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 5350fd292910..b3da548691c9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -36,7 +36,7 @@ KCOV_INSTRUMENT_stacktrace.o := n KCOV_INSTRUMENT_kcov.o := n KASAN_SANITIZE_kcov.o := n KCSAN_SANITIZE_kcov.o := n -CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack) -fno-stack-protector # cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO) -- cgit From fed79d057d0842d2e381632b783af30745dd7938 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 11 Aug 2020 18:36:59 -0700 Subject: kcov: make some symbols static Fix sparse build warnings: kernel/kcov.c:99:1: warning: symbol '__pcpu_scope_kcov_percpu_data' was not declared. Should it be static? kernel/kcov.c:778:6: warning: symbol 'kcov_remote_softirq_start' was not declared. Should it be static? kernel/kcov.c:795:6: warning: symbol 'kcov_remote_softirq_stop' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Reviewed-by: Andrey Konovalov Link: http://lkml.kernel.org/r/20200702115501.73077-1-weiyongjun1@huawei.com Signed-off-by: Linus Torvalds --- kernel/kcov.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 6afae0bcbac4..6b8368be89c8 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -96,7 +96,7 @@ struct kcov_percpu_data { int saved_sequence; }; -DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); +static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); /* Must be called with kcov_remote_lock locked. */ static struct kcov_remote *kcov_remote_find(u64 handle) @@ -775,7 +775,7 @@ static inline bool kcov_mode_enabled(unsigned int mode) return (mode & ~KCOV_IN_CTXSW) != KCOV_MODE_DISABLED; } -void kcov_remote_softirq_start(struct task_struct *t) +static void kcov_remote_softirq_start(struct task_struct *t) { struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data); unsigned int mode; @@ -792,7 +792,7 @@ void kcov_remote_softirq_start(struct task_struct *t) } } -void kcov_remote_softirq_stop(struct task_struct *t) +static void kcov_remote_softirq_stop(struct task_struct *t) { struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data); -- cgit From a3ec9f38a97545b5923a90a1052cbdb3683b9631 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Aug 2020 18:37:02 -0700 Subject: scripts/gdb: fix python 3.8 SyntaxWarning Fixes the observed warnings: scripts/gdb/linux/rbtree.py:20: SyntaxWarning: "is" with a literal. Did you mean "=="? if node is 0: scripts/gdb/linux/rbtree.py:36: SyntaxWarning: "is" with a literal. Did you mean "=="? if node is 0: It looks like this is a new warning added in Python 3.8. I've only seen this once after adding the add-auto-load-safe-path rule to my ~/.gdbinit for a new tree. Fixes: commit 449ca0c95ea2 ("scripts/gdb: add rb tree iterating utilities") Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Reviewed-by: Stephen Boyd Cc: Jan Kiszka Cc: Kieran Bingham Cc: Aymeric Agon-Rambosson Link: http://lkml.kernel.org/r/20200805225015.2847624-1-ndesaulniers@google.com Link: https://adamj.eu/tech/2020/01/21/why-does-python-3-8-syntaxwarning-for-is-literal/ Signed-off-by: Linus Torvalds --- scripts/gdb/linux/rbtree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/rbtree.py b/scripts/gdb/linux/rbtree.py index c4b991607917..fe462855eefd 100644 --- a/scripts/gdb/linux/rbtree.py +++ b/scripts/gdb/linux/rbtree.py @@ -17,7 +17,7 @@ def rb_first(root): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) node = root['rb_node'] - if node is 0: + if node == 0: return None while node['rb_left']: @@ -33,7 +33,7 @@ def rb_last(root): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) node = root['rb_node'] - if node is 0: + if node == 0: return None while node['rb_right']: -- cgit From 00898e8599a184ae05e2c00903e85dc80d82a43d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Aug 2020 18:37:05 -0700 Subject: ipc: uninline functions Two functions are only called via function pointers, don't bother inlining them. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Cc: Manfred Spraul Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/20200710200312.GA960353@localhost.localdomain Signed-off-by: Linus Torvalds --- ipc/sem.c | 3 +-- ipc/shm.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index 3687b71151b3..8c0244e0365e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -585,8 +585,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) /* * Called with sem_ids.rwsem and ipcp locked. */ -static inline int sem_more_checks(struct kern_ipc_perm *ipcp, - struct ipc_params *params) +static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) { struct sem_array *sma; diff --git a/ipc/shm.c b/ipc/shm.c index bf38d7e2fbe9..fd296135a5e6 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -711,8 +711,7 @@ no_file: /* * Called with shm_ids.rwsem and ipcp locked. */ -static inline int shm_more_checks(struct kern_ipc_perm *ipcp, - struct ipc_params *params) +static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) { struct shmid_kernel *shp; -- cgit From ce14489c8e2d327b56ff3253a3642a6d79b9bca2 Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Tue, 11 Aug 2020 18:37:08 -0700 Subject: ipc/shm.c: remove the superfluous break Remove the superfuous break, as there is a 'return' before it. Signed-off-by: Liao Pingfang Signed-off-by: Yi Wang Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/1594724361-11525-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Linus Torvalds --- ipc/shm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ipc/shm.c b/ipc/shm.c index fd296135a5e6..f1ed36e3ac9f 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1380,7 +1380,6 @@ static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int versio case SHM_LOCK: case SHM_UNLOCK: return shmctl_do_lock(ns, shmid, cmd); - break; default: return -EINVAL; } -- cgit From c7073bab5772e0e2ab2c766a05f68174974b1ab5 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:11 -0700 Subject: mm/page_isolation: prefer the node of the source page Patch series "clean-up the migration target allocation functions", v5. This patch (of 9): For locality, it's better to migrate the page to the same node rather than the node of the current caller's cpu. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Roman Gushchin Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Link: http://lkml.kernel.org/r/1594622517-20681-1-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1594622517-20681-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/page_isolation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f6d07c5f0d34..aec26d972b9f 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -309,5 +309,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, struct page *alloc_migrate_target(struct page *page, unsigned long private) { - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); + int nid = page_to_nid(page); + + return new_page_nodemask(page, nid, &node_states[N_MEMORY]); } -- cgit From b4b382238ed2f94f0d3860f9120b66404fa99463 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:14 -0700 Subject: mm/migrate: move migration helper from .h to .c It's not performance sensitive function. Move it to .c. This is a preparation step for future change. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 33 +++++---------------------------- mm/migrate.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 540998d9810b..abeb4b15b297 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -31,34 +31,6 @@ enum migrate_reason { /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; -static inline struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) -{ - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; - unsigned int order = 0; - struct page *new_page = NULL; - - if (PageHuge(page)) - return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - preferred_nid, nodemask); - - if (PageTransHuge(page)) { - gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; - } - - if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) - gfp_mask |= __GFP_HIGHMEM; - - new_page = __alloc_pages_nodemask(gfp_mask, order, - preferred_nid, nodemask); - - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); - - return new_page; -} - #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); @@ -67,6 +39,8 @@ extern int migrate_page(struct address_space *mapping, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); +extern struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); @@ -85,6 +59,9 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } +static inline struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask) + { return NULL; } static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } diff --git a/mm/migrate.c b/mm/migrate.c index 52896b4921a7..5269bc520aee 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1538,6 +1538,35 @@ out: return rc; } +struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask) +{ + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; + unsigned int order = 0; + struct page *new_page = NULL; + + if (PageHuge(page)) + return alloc_huge_page_nodemask( + page_hstate(compound_head(page)), + preferred_nid, nodemask); + + if (PageTransHuge(page)) { + gfp_mask |= GFP_TRANSHUGE; + order = HPAGE_PMD_ORDER; + } + + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) + gfp_mask |= __GFP_HIGHMEM; + + new_page = __alloc_pages_nodemask(gfp_mask, order, + preferred_nid, nodemask); + + if (new_page && PageTransHuge(new_page)) + prep_transhuge_page(new_page); + + return new_page; +} + #ifdef CONFIG_NUMA static int store_status(int __user *status, int start, int value, int nr) -- cgit From d92bbc2719bd2be237ee336113b63492a6baca3b Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:17 -0700 Subject: mm/hugetlb: unify migration callbacks There is no difference between two migration callback functions, alloc_huge_page_node() and alloc_huge_page_nodemask(), except __GFP_THISNODE handling. It's redundant to have two almost similar functions in order to handle this flag. So, this patch tries to remove one by introducing a new argument, gfp_mask, to alloc_huge_page_nodemask(). After introducing gfp_mask argument, it's caller's job to provide correct gfp_mask. So, every callsites for alloc_huge_page_nodemask() are changed to provide gfp_mask. Note that it's safe to remove a node id check in alloc_huge_page_node() since there is no caller passing NUMA_NO_NODE as a node id. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Reviewed-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-4-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 26 ++++++++++++++++++-------- mm/hugetlb.c | 35 ++--------------------------------- mm/mempolicy.c | 10 ++++++---- mm/migrate.c | 11 +++++++---- 4 files changed, 33 insertions(+), 49 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a520bf26e5d8..3517edde681e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -10,6 +10,7 @@ #include #include #include +#include struct ctl_table; struct user_struct; @@ -506,9 +507,8 @@ struct huge_bootmem_page { struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask); + nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, @@ -694,6 +694,15 @@ static inline bool hugepage_movable_supported(struct hstate *h) return true; } +/* Movability of hugepages depends on migration support. */ +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + if (hugepage_movable_supported(h)) + return GFP_HIGHUSER_MOVABLE; + else + return GFP_HIGHUSER; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -761,13 +770,9 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma, return NULL; } -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - return NULL; -} - static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask) +alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } @@ -880,6 +885,11 @@ static inline bool hugepage_movable_supported(struct hstate *h) return false; } +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + return 0; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b66bf74e999e..eaab9ef88e9d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1093,15 +1093,6 @@ retry_cpuset: return NULL; } -/* Movability of hugepages depends on migration support. */ -static inline gfp_t htlb_alloc_mask(struct hstate *h) -{ - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; -} - static struct page *dequeue_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address, int avoid_reserve, @@ -1985,32 +1976,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, return page; } -/* page migration callback function */ -struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - gfp_t gfp_mask = htlb_alloc_mask(h); - struct page *page = NULL; - - if (nid != NUMA_NO_NODE) - gfp_mask |= __GFP_THISNODE; - - spin_lock(&hugetlb_lock); - if (h->free_huge_pages - h->resv_huge_pages > 0) - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); - spin_unlock(&hugetlb_lock); - - if (!page) - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); - - return page; -} - /* page migration callback function */ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask) + nodemask_t *nmask, gfp_t gfp_mask) { - gfp_t gfp_mask = htlb_alloc_mask(h); - spin_lock(&hugetlb_lock); if (h->free_huge_pages - h->resv_huge_pages > 0) { struct page *page; @@ -2038,7 +2007,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, gfp_mask = htlb_alloc_mask(h); node = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = alloc_huge_page_nodemask(h, node, nodemask); + page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask); mpol_cond_put(mpol); return page; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 25b7e412c20b..9ae2b704bdf6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1068,10 +1068,12 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, /* page allocation callback for NUMA node migration */ struct page *alloc_new_node_page(struct page *page, unsigned long node) { - if (PageHuge(page)) - return alloc_huge_page_node(page_hstate(compound_head(page)), - node); - else if (PageTransHuge(page)) { + if (PageHuge(page)) { + struct hstate *h = page_hstate(compound_head(page)); + gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + + return alloc_huge_page_nodemask(h, node, NULL, gfp_mask); + } else if (PageTransHuge(page)) { struct page *thp; thp = alloc_pages_node(node, diff --git a/mm/migrate.c b/mm/migrate.c index 5269bc520aee..8d084e9bc13b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1545,10 +1545,13 @@ struct page *new_page_nodemask(struct page *page, unsigned int order = 0; struct page *new_page = NULL; - if (PageHuge(page)) - return alloc_huge_page_nodemask( - page_hstate(compound_head(page)), - preferred_nid, nodemask); + if (PageHuge(page)) { + struct hstate *h = page_hstate(compound_head(page)); + + gfp_mask = htlb_alloc_mask(h); + return alloc_huge_page_nodemask(h, preferred_nid, + nodemask, gfp_mask); + } if (PageTransHuge(page)) { gfp_mask |= GFP_TRANSHUGE; -- cgit From 9933a0c8a5396df6dbaef809a9ee4ad64ebc3abe Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:20 -0700 Subject: mm/migrate: clear __GFP_RECLAIM to make the migration callback consistent with regular THP allocations new_page_nodemask is a migration callback and it tries to use a common gfp flags for the target page allocation whether it is a base page or a THP. The later only adds GFP_TRANSHUGE to the given mask. This results in the allocation being slightly more aggressive than necessary because the resulting gfp mask will contain also __GFP_RECLAIM_KSWAPD. THP allocations usually exclude this flag to reduce over eager background reclaim during a high THP allocation load which has been seen during large mmaps initialization. There is no indication that this is a problem for migration as well but theoretically the same might happen when migrating large mappings to a different node. Make the migration callback consistent with regular THP allocations. [akpm@linux-foundation.org: fix comment typo, per Vlastimil] Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-5-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/migrate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index 8d084e9bc13b..46cca5c2ebff 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1554,6 +1554,11 @@ struct page *new_page_nodemask(struct page *page, } if (PageTransHuge(page)) { + /* + * clear __GFP_RECLAIM to make the migration callback + * consistent with regular THP allocations. + */ + gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; order = HPAGE_PMD_ORDER; } -- cgit From 19fc7bed252c16ace29491e4cfa2bafb264eb505 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:25 -0700 Subject: mm/migrate: introduce a standard migration target allocation function There are some similar functions for migration target allocation. Since there is no fundamental difference, it's better to keep just one rather than keeping all variants. This patch implements base migration target allocation function. In the following patches, variants will be converted to use this function. Changes should be mechanical, but, unfortunately, there are some differences. First, some callers' nodemask is assgined to NULL since NULL nodemask will be considered as all available nodes, that is, &node_states[N_MEMORY]. Second, for hugetlb page allocation, gfp_mask is redefined as regular hugetlb allocation gfp_mask plus __GFP_THISNODE if user provided gfp_mask has it. This is because future caller of this function requires to set this node constaint. Lastly, if provided nodeid is NUMA_NO_NODE, nodeid is set up to the node where migration source lives. It helps to remove simple wrappers for setting up the nodeid. Note that PageHighmem() call in previous function is changed to open-code "is_highmem_idx()" since it provides more readability. [akpm@linux-foundation.org: tweak patch title, per Vlastimil] [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 15 +++++++++++++++ include/linux/migrate.h | 9 +++++---- mm/internal.h | 7 +++++++ mm/memory-failure.c | 7 +++++-- mm/memory_hotplug.c | 12 ++++++++---- mm/migrate.c | 26 ++++++++++++++++---------- mm/page_isolation.c | 7 +++++-- 7 files changed, 61 insertions(+), 22 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3517edde681e..30e1f14119c8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -703,6 +703,16 @@ static inline gfp_t htlb_alloc_mask(struct hstate *h) return GFP_HIGHUSER; } +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + gfp_t modified_mask = htlb_alloc_mask(h); + + /* Some callers might want to enforce node */ + modified_mask |= (gfp_mask & __GFP_THISNODE); + + return modified_mask; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -890,6 +900,11 @@ static inline gfp_t htlb_alloc_mask(struct hstate *h) return 0; } +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + return 0; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/migrate.h b/include/linux/migrate.h index abeb4b15b297..0f8d1583fa8e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -10,6 +10,8 @@ typedef struct page *new_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private); +struct migration_target_control; + /* * Return values from addresss_space_operations.migratepage(): * - negative errno on page migration failure; @@ -39,8 +41,7 @@ extern int migrate_page(struct address_space *mapping, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); -extern struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask); +extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); @@ -59,8 +60,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } -static inline struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) +static inline struct page *alloc_migration_target(struct page *page, + unsigned long private) { return NULL; } static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } diff --git a/mm/internal.h b/mm/internal.h index 42cf0b610847..f725aa8a9698 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -614,4 +614,11 @@ static inline bool is_migrate_highatomic_page(struct page *page) void setup_zone_pageset(struct zone *zone); extern struct page *alloc_new_node_page(struct page *page, unsigned long node); + +struct migration_target_control { + int nid; /* preferred node id */ + nodemask_t *nmask; + gfp_t gfp_mask; +}; + #endif /* __MM_INTERNAL_H */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 47b8ccb1fb9b..f1aa6433f404 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1648,9 +1648,12 @@ EXPORT_SYMBOL(unpoison_memory); static struct page *new_page(struct page *p, unsigned long private) { - int nid = page_to_nid(p); + struct migration_target_control mtc = { + .nid = page_to_nid(p), + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; - return new_page_nodemask(p, nid, &node_states[N_MEMORY]); + return alloc_migration_target(p, (unsigned long)&mtc); } /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0a9e1972fbe7..c32ead89c911 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1276,19 +1276,23 @@ found: static struct page *new_node_page(struct page *page, unsigned long private) { - int nid = page_to_nid(page); nodemask_t nmask = node_states[N_MEMORY]; + struct migration_target_control mtc = { + .nid = page_to_nid(page), + .nmask = &nmask, + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; /* * try to allocate from a different node but reuse this node if there * are no other online nodes to be used (e.g. we are offlining a part * of the only existing node) */ - node_clear(nid, nmask); + node_clear(mtc.nid, nmask); if (nodes_empty(nmask)) - node_set(nid, nmask); + node_set(mtc.nid, nmask); - return new_page_nodemask(page, nid, &nmask); + return alloc_migration_target(page, (unsigned long)&mtc); } static int diff --git a/mm/migrate.c b/mm/migrate.c index 46cca5c2ebff..48b1f149494b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1538,19 +1538,26 @@ out: return rc; } -struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) +struct page *alloc_migration_target(struct page *page, unsigned long private) { - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; + struct migration_target_control *mtc; + gfp_t gfp_mask; unsigned int order = 0; struct page *new_page = NULL; + int nid; + int zidx; + + mtc = (struct migration_target_control *)private; + gfp_mask = mtc->gfp_mask; + nid = mtc->nid; + if (nid == NUMA_NO_NODE) + nid = page_to_nid(page); if (PageHuge(page)) { struct hstate *h = page_hstate(compound_head(page)); - gfp_mask = htlb_alloc_mask(h); - return alloc_huge_page_nodemask(h, preferred_nid, - nodemask, gfp_mask); + gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); + return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); } if (PageTransHuge(page)) { @@ -1562,12 +1569,11 @@ struct page *new_page_nodemask(struct page *page, gfp_mask |= GFP_TRANSHUGE; order = HPAGE_PMD_ORDER; } - - if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) + zidx = zone_idx(page_zone(page)); + if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, order, - preferred_nid, nodemask); + new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask); if (new_page && PageTransHuge(new_page)) prep_transhuge_page(new_page); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index aec26d972b9f..f25c66ea37ac 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -309,7 +309,10 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, struct page *alloc_migrate_target(struct page *page, unsigned long private) { - int nid = page_to_nid(page); + struct migration_target_control mtc = { + .nid = page_to_nid(page), + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; - return new_page_nodemask(page, nid, &node_states[N_MEMORY]); + return alloc_migration_target(page, (unsigned long)&mtc); } -- cgit From a097631160c3b71a12db224ece7d72208c9b6846 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:28 -0700 Subject: mm/mempolicy: use a standard migration target allocation callback There is a well-defined migration target allocation callback. Use it. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-7-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/internal.h | 1 - mm/mempolicy.c | 31 ++++++------------------------- mm/migrate.c | 8 ++++++-- 3 files changed, 12 insertions(+), 28 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index f725aa8a9698..d11a9a8d2135 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -613,7 +613,6 @@ static inline bool is_migrate_highatomic_page(struct page *page) } void setup_zone_pageset(struct zone *zone); -extern struct page *alloc_new_node_page(struct page *page, unsigned long node); struct migration_target_control { int nid; /* preferred node id */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9ae2b704bdf6..afaa09ff9f6c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1065,29 +1065,6 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, return 0; } -/* page allocation callback for NUMA node migration */ -struct page *alloc_new_node_page(struct page *page, unsigned long node) -{ - if (PageHuge(page)) { - struct hstate *h = page_hstate(compound_head(page)); - gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - - return alloc_huge_page_nodemask(h, node, NULL, gfp_mask); - } else if (PageTransHuge(page)) { - struct page *thp; - - thp = alloc_pages_node(node, - (GFP_TRANSHUGE | __GFP_THISNODE), - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } else - return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE, 0); -} - /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. @@ -1098,6 +1075,10 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, nodemask_t nmask; LIST_HEAD(pagelist); int err = 0; + struct migration_target_control mtc = { + .nid = dest, + .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, + }; nodes_clear(nmask); node_set(source, nmask); @@ -1112,8 +1093,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, flags | MPOL_MF_DISCONTIG_OK, &pagelist); if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest, - MIGRATE_SYNC, MR_SYSCALL); + err = migrate_pages(&pagelist, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(&pagelist); } diff --git a/mm/migrate.c b/mm/migrate.c index 48b1f149494b..5053439be6ab 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1598,9 +1598,13 @@ static int do_move_pages_to_node(struct mm_struct *mm, struct list_head *pagelist, int node) { int err; + struct migration_target_control mtc = { + .nid = node, + .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, + }; - err = migrate_pages(pagelist, alloc_new_node_page, NULL, node, - MIGRATE_SYNC, MR_SYSCALL); + err = migrate_pages(pagelist, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(pagelist); return err; -- cgit From 8b94e0b8be360cf5460331e56a2678ba265f0694 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:31 -0700 Subject: mm/page_alloc: remove a wrapper for alloc_migration_target() There is a well-defined standard migration target callback. Use it directly. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-8-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++++-- mm/page_isolation.c | 10 ---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d80c9bac489c..8b7d0ecf30b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8347,6 +8347,10 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long pfn = start; unsigned int tries = 0; int ret = 0; + struct migration_target_control mtc = { + .nid = zone_to_nid(cc->zone), + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; migrate_prep(); @@ -8373,8 +8377,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, &cc->migratepages); cc->nr_migratepages -= nr_reclaimed; - ret = migrate_pages(&cc->migratepages, alloc_migrate_target, - NULL, 0, cc->mode, MR_CONTIG_RANGE); + ret = migrate_pages(&cc->migratepages, alloc_migration_target, + NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); } if (ret < 0) { putback_movable_pages(&cc->migratepages); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f25c66ea37ac..242c03121d73 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -306,13 +306,3 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return pfn < end_pfn ? -EBUSY : 0; } - -struct page *alloc_migrate_target(struct page *page, unsigned long private) -{ - struct migration_target_control mtc = { - .nid = page_to_nid(page), - .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, - }; - - return alloc_migration_target(page, (unsigned long)&mtc); -} -- cgit From 41b4dc14ee807cb1bd15e67cad287534046f92dc Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:34 -0700 Subject: mm/gup: restrict CMA region by using allocation scope API We have well defined scope API to exclude CMA region. Use it rather than manipulating gfp_mask manually. With this change, we can now restore __GFP_MOVABLE for gfp_mask like as usual migration target allocation. It would result in that the ZONE_MOVABLE is also searched by page allocator. For hugetlb, gfp_mask is redefined since it has a regular allocation mask filter for migration target. __GPF_NOWARN is added to hugetlb gfp_mask filter since a new user for gfp_mask filter, gup, want to be silent when allocation fails. Note that this can be considered as a fix for the commit 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region"). However, "Fixes" tag isn't added here since it is just suboptimal but it doesn't cause any problem. Suggested-by: Michal Hocko Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Roman Gushchin Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Link: http://lkml.kernel.org/r/1596180906-8442-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ mm/gup.c | 17 ++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 30e1f14119c8..d86c82749836 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -710,6 +710,8 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) /* Some callers might want to enforce node */ modified_mask |= (gfp_mask & __GFP_THISNODE); + modified_mask |= (gfp_mask & __GFP_NOWARN); + return modified_mask; } diff --git a/mm/gup.c b/mm/gup.c index d8a33dd1430d..6c20c6e37635 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1620,10 +1620,12 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) * Trying to allocate a page for migration. Ignore allocation * failure warnings. We don't force __GFP_THISNODE here because * this node here is the node where we have CMA reservation and - * in some case these nodes will have really less non movable + * in some case these nodes will have really less non CMA * allocation memory. + * + * Note that CMA region is prohibited by allocation scope. */ - gfp_t gfp_mask = GFP_USER | __GFP_NOWARN; + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN; if (PageHighMem(page)) gfp_mask |= __GFP_HIGHMEM; @@ -1631,6 +1633,8 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(page)) { struct hstate *h = page_hstate(page); + + gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); /* * We don't want to dequeue from the pool because pool pages will * mostly be from the CMA region. @@ -1645,11 +1649,6 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) */ gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN; - /* - * Remove the movable mask so that we don't allocate from - * CMA area again. - */ - thp_gfpmask &= ~__GFP_MOVABLE; thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); if (!thp) return NULL; @@ -1795,7 +1794,6 @@ static long __gup_longterm_locked(struct task_struct *tsk, vmas_tmp, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { - memalloc_nocma_restore(flags); if (rc < 0) goto out; @@ -1808,9 +1806,10 @@ static long __gup_longterm_locked(struct task_struct *tsk, rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, vmas_tmp, gup_flags); +out: + memalloc_nocma_restore(flags); } -out: if (vmas_tmp != vmas) kfree(vmas_tmp); return rc; -- cgit From bbe88753bd42b1faf1458dde8f58ff1239990436 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:38 -0700 Subject: mm/hugetlb: make hugetlb migration callback CMA aware new_non_cma_page() in gup.c requires to allocate the new page that is not on the CMA area. new_non_cma_page() implements it by using allocation scope APIs. However, there is a work-around for hugetlb. Normal hugetlb page allocation API for migration is alloc_huge_page_nodemask(). It consists of two steps. First is dequeing from the pool. Second is, if there is no available page on the queue, allocating by using the page allocator. new_non_cma_page() can't use this API since first step (deque) isn't aware of scope API to exclude CMA area. So, new_non_cma_page() exports hugetlb internal function for the second step, alloc_migrate_huge_page(), to global scope and uses it directly. This is suboptimal since hugetlb pages on the queue cannot be utilized. This patch tries to fix this situation by making the deque function on hugetlb CMA aware. In the deque function, CMA memory is skipped if PF_MEMALLOC_NOCMA flag is found. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Aneesh Kumar K . V" Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1596180906-8442-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 -- mm/gup.c | 6 +----- mm/hugetlb.c | 11 +++++++++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d86c82749836..d5cc5f802dd4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -511,8 +511,6 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); diff --git a/mm/gup.c b/mm/gup.c index 6c20c6e37635..c55427beeb26 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1635,11 +1635,7 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) struct hstate *h = page_hstate(page); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); - /* - * We don't want to dequeue from the pool because pool pages will - * mostly be from the CMA region. - */ - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL); + return alloc_huge_page_nodemask(h, nid, NULL, gfp_mask); } #endif if (PageTransHuge(page)) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eaab9ef88e9d..a301c2d672bf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1040,10 +1041,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) { struct page *page; + bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA); + + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) { + if (nocma && is_migrate_cma_page(page)) + continue; - list_for_each_entry(page, &h->hugepage_freelists[nid], lru) if (!PageHWPoison(page)) break; + } + /* * if 'non-isolated free hugepage' not found on the list, * the allocation fails. @@ -1935,7 +1942,7 @@ out_unlock: return page; } -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, +static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct page *page; -- cgit From ed03d924587e7dfc54f1ace81e5f5a497a7d9666 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:41 -0700 Subject: mm/gup: use a standard migration target allocation callback There is a well-defined migration target allocation callback. Use it. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Aneesh Kumar K . V" Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1596180906-8442-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/gup.c | 54 ++++++------------------------------------------------ 1 file changed, 6 insertions(+), 48 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index c55427beeb26..e9d1d0cc18f0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1609,52 +1609,6 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) } #ifdef CONFIG_CMA -static struct page *new_non_cma_page(struct page *page, unsigned long private) -{ - /* - * We want to make sure we allocate the new page from the same node - * as the source page. - */ - int nid = page_to_nid(page); - /* - * Trying to allocate a page for migration. Ignore allocation - * failure warnings. We don't force __GFP_THISNODE here because - * this node here is the node where we have CMA reservation and - * in some case these nodes will have really less non CMA - * allocation memory. - * - * Note that CMA region is prohibited by allocation scope. - */ - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - -#ifdef CONFIG_HUGETLB_PAGE - if (PageHuge(page)) { - struct hstate *h = page_hstate(page); - - gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); - return alloc_huge_page_nodemask(h, nid, NULL, gfp_mask); - } -#endif - if (PageTransHuge(page)) { - struct page *thp; - /* - * ignore allocation failure warnings - */ - gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN; - - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } - - return __alloc_pages_node(nid, gfp_mask, 0); -} - static long check_and_migrate_cma_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, @@ -1669,6 +1623,10 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, bool migrate_allow = true; LIST_HEAD(cma_page_list); long ret = nr_pages; + struct migration_target_control mtc = { + .nid = NUMA_NO_NODE, + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN, + }; check_again: for (i = 0; i < nr_pages;) { @@ -1714,8 +1672,8 @@ check_again: for (i = 0; i < nr_pages; i++) put_page(pages[i]); - if (migrate_pages(&cma_page_list, new_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) { + if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { /* * some of the pages failed migration. Do get_user_pages * without migration. -- cgit From bce617edecada007aee8610fbe2c14d10b8de2f6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:44 -0700 Subject: mm: do page fault accounting in handle_mm_fault Patch series "mm: Page fault accounting cleanups", v5. This is v5 of the pf accounting cleanup series. It originates from Gerald Schaefer's report on an issue a week ago regarding to incorrect page fault accountings for retried page fault after commit 4064b9827063 ("mm: allow VM_FAULT_RETRY for multiple times"): https://lore.kernel.org/lkml/20200610174811.44b94525@thinkpad/ What this series did: - Correct page fault accounting: we do accounting for a page fault (no matter whether it's from #PF handling, or gup, or anything else) only with the one that completed the fault. For example, page fault retries should not be counted in page fault counters. Same to the perf events. - Unify definition of PERF_COUNT_SW_PAGE_FAULTS: currently this perf event is used in an adhoc way across different archs. Case (1): for many archs it's done at the entry of a page fault handler, so that it will also cover e.g. errornous faults. Case (2): for some other archs, it is only accounted when the page fault is resolved successfully. Case (3): there're still quite some archs that have not enabled this perf event. Since this series will touch merely all the archs, we unify this perf event to always follow case (1), which is the one that makes most sense. And since we moved the accounting into handle_mm_fault, the other two MAJ/MIN perf events are well taken care of naturally. - Unify definition of "major faults": the definition of "major fault" is slightly changed when used in accounting (not VM_FAULT_MAJOR). More information in patch 1. - Always account the page fault onto the one that triggered the page fault. This does not matter much for #PF handlings, but mostly for gup. More information on this in patch 25. Patchset layout: Patch 1: Introduced the accounting in handle_mm_fault(), not enabled. Patch 2-23: Enable the new accounting for arch #PF handlers one by one. Patch 24: Enable the new accounting for the rest outliers (gup, iommu, etc.) Patch 25: Cleanup GUP task_struct pointer since it's not needed any more This patch (of 25): This is a preparation patch to move page fault accountings into the general code in handle_mm_fault(). This includes both the per task flt_maj/flt_min counters, and the major/minor page fault perf events. To do this, the pt_regs pointer is passed into handle_mm_fault(). PERF_COUNT_SW_PAGE_FAULTS should still be kept in per-arch page fault handlers. So far, all the pt_regs pointer that passed into handle_mm_fault() is NULL, which means this patch should have no intented functional change. Suggested-by: Linus Torvalds Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Brian Cain Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James E.J. Bottomley Cc: John Hubbard Cc: Jonas Bonn Cc: Ley Foon Tan Cc: "Luck, Tony" Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vincent Chen Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200707225021.200906-1-peterx@redhat.com Link: http://lkml.kernel.org/r/20200707225021.200906-2-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/alpha/mm/fault.c | 2 +- arch/arc/mm/fault.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/csky/mm/fault.c | 3 +- arch/hexagon/mm/vm_fault.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/microblaze/mm/fault.c | 2 +- arch/mips/mm/fault.c | 2 +- arch/nds32/mm/fault.c | 2 +- arch/nios2/mm/fault.c | 2 +- arch/openrisc/mm/fault.c | 2 +- arch/parisc/mm/fault.c | 2 +- arch/powerpc/mm/copro_fault.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/riscv/mm/fault.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/sh/mm/fault.c | 2 +- arch/sparc/mm/fault_32.c | 4 +-- arch/sparc/mm/fault_64.c | 2 +- arch/um/kernel/trap.c | 2 +- arch/x86/mm/fault.c | 2 +- arch/xtensa/mm/fault.c | 2 +- drivers/iommu/amd/iommu_v2.c | 2 +- drivers/iommu/intel/svm.c | 3 +- include/linux/mm.h | 7 +++-- mm/gup.c | 4 +-- mm/hmm.c | 3 +- mm/ksm.c | 3 +- mm/memory.c | 64 ++++++++++++++++++++++++++++++++++++++++++- 31 files changed, 103 insertions(+), 34 deletions(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index c2303a8c2b9f..1983e43a5e2f 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -148,7 +148,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 7287c793d1c9..587dea524e6b 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -130,7 +130,7 @@ retry: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c6550eddfce1..01a8e0f8fef7 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -224,7 +224,7 @@ good_area: goto out; } - return handle_mm_fault(vma, addr & PAGE_MASK, flags); + return handle_mm_fault(vma, addr & PAGE_MASK, flags, NULL); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8afb238ff335..be29f4076fe3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -428,7 +428,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, NULL); } static bool is_el0_instruction_abort(unsigned int esr) diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index b1dce9f2f04d..b252e6e4d32f 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -150,7 +150,8 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0, + NULL); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index cd3808f96b93..f12f330e7946 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -88,7 +88,7 @@ good_area: break; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3a4dec334cc5..abf2808f9b4b 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -143,7 +143,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 508abb63da67..08b35a318ebe 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -134,7 +134,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); pr_debug("handle_mm_fault returns %x\n", fault); if (fault_signal_pending(fault, regs)) diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index a2bfe587b491..1a3d4c4ca28b 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -214,7 +214,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 01b168a90434..b1db39784db9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -152,7 +152,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 8fb73f6401a0..d0ecc8fb5b23 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -206,7 +206,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, NULL); /* * If we need to retry but a fatal signal is pending, handle the diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 4112ef0e247e..86beb9a2698e 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -131,7 +131,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index d2224ccca294..3daa491d1edb 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -159,7 +159,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 66ac0719bd49..e32d06928c24 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -302,7 +302,7 @@ good_area: * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index b83abbead4a2..2d0276abe0a6 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 925a7231abb3..c6a5225a3521 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,7 +511,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); major |= fault & VM_FAULT_MAJOR; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5873835a3e6b..30c1124d0fb6 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -109,7 +109,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, NULL); /* * If we need to retry but a fatal signal is pending, handle the diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index aebf9183bedd..ad783aaaf649 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -476,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index fbe1f2fe9a8c..3c0a11827f7e 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -482,7 +482,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index cfef656eda0f..06af03db4417 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -234,7 +234,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; @@ -410,7 +410,7 @@ good_area: if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(vma, address, flags)) { + switch (handle_mm_fault(vma, address, flags, NULL)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a3806614e4dc..9ebee14ee893 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -422,7 +422,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) goto exit_exception; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 2b3afa354a90..8d9870d76da1 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -71,7 +71,7 @@ good_area: do { vm_fault_t fault; - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0c7643d9f7cb..e1bf5555d80a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1291,7 +1291,7 @@ good_area: * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); major |= fault & VM_FAULT_MAJOR; /* Quick path to respond to signals */ diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index c128dcc7c85b..e72c8c1359a6 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -107,7 +107,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if (fault_signal_pending(fault, regs)) return; diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index e4b025c5637c..c259108ab6dd 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -495,7 +495,7 @@ static void do_fault(struct work_struct *work) if (access_error(vma, fault)) goto out; - ret = handle_mm_fault(vma, address, flags); + ret = handle_mm_fault(vma, address, flags, NULL); out: mmap_read_unlock(mm); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 6c87c807a0ab..5ae59a6ad681 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -872,7 +872,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) goto invalid; ret = handle_mm_fault(vma, address, - req->wr_req ? FAULT_FLAG_WRITE : 0); + req->wr_req ? FAULT_FLAG_WRITE : 0, + NULL); if (ret & VM_FAULT_ERROR) goto invalid; diff --git a/include/linux/mm.h b/include/linux/mm.h index f97b10117d44..ec0ffb423769 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -38,6 +38,7 @@ struct file_ra_state; struct user_struct; struct writeback_control; struct bdi_writeback; +struct pt_regs; void init_mm_internals(void); @@ -1658,7 +1659,8 @@ int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); + unsigned long address, unsigned int flags, + struct pt_regs *regs); extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); @@ -1668,7 +1670,8 @@ void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); #else static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) + unsigned long address, unsigned int flags, + struct pt_regs *regs) { /* should never happen if there's no MMU */ BUG(); diff --git a/mm/gup.c b/mm/gup.c index e9d1d0cc18f0..ae7121d729fa 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -884,7 +884,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_TRIED; } - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); @@ -1238,7 +1238,7 @@ retry: fatal_signal_pending(current)) return -EINTR; - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); diff --git a/mm/hmm.c b/mm/hmm.c index bb279319bf40..943cb2ba4442 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -75,7 +75,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end, } for (; addr < end; addr += PAGE_SIZE) - if (handle_mm_fault(vma, addr, fault_flags) & VM_FAULT_ERROR) + if (handle_mm_fault(vma, addr, fault_flags, NULL) & + VM_FAULT_ERROR) return -EFAULT; return -EBUSY; } diff --git a/mm/ksm.c b/mm/ksm.c index 217842a66912..0aa2247bddd7 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -480,7 +480,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) break; if (PageKsm(page)) ret = handle_mm_fault(vma, addr, - FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE); + FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, + NULL); else ret = VM_FAULT_WRITE; put_page(page); diff --git a/mm/memory.c b/mm/memory.c index 325bb575e7ec..9b7d35734caa 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -71,6 +71,8 @@ #include #include #include +#include +#include #include @@ -4356,6 +4358,64 @@ retry_pud: return handle_pte_fault(&vmf); } +/** + * mm_account_fault - Do page fault accountings + * + * @regs: the pt_regs struct pointer. When set to NULL, will skip accounting + * of perf event counters, but we'll still do the per-task accounting to + * the task who triggered this page fault. + * @address: the faulted address. + * @flags: the fault flags. + * @ret: the fault retcode. + * + * This will take care of most of the page fault accountings. Meanwhile, it + * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter + * updates. However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should + * still be in per-arch page fault handlers at the entry of page fault. + */ +static inline void mm_account_fault(struct pt_regs *regs, + unsigned long address, unsigned int flags, + vm_fault_t ret) +{ + bool major; + + /* + * We don't do accounting for some specific faults: + * + * - Unsuccessful faults (e.g. when the address wasn't valid). That + * includes arch_vma_access_permitted() failing before reaching here. + * So this is not a "this many hardware page faults" counter. We + * should use the hw profiling for that. + * + * - Incomplete faults (VM_FAULT_RETRY). They will only be counted + * once they're completed. + */ + if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) + return; + + /* + * We define the fault as a major fault when the final successful fault + * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't + * handle it immediately previously). + */ + major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); + + /* + * If the fault is done for GUP, regs will be NULL, and we will skip + * the fault accounting. + */ + if (!regs) + return; + + if (major) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); + } +} + /* * By the time we get here, we already hold the mm semaphore * @@ -4363,7 +4423,7 @@ retry_pud: * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) + unsigned int flags, struct pt_regs *regs) { vm_fault_t ret; @@ -4404,6 +4464,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } + mm_account_fault(regs, address, flags, ret); + return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); -- cgit From c0f6eda41f97dbe4e4feda914c18d019b9b9fccd Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:49 -0700 Subject: mm/alpha: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Link: http://lkml.kernel.org/r/20200707225021.200906-3-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/alpha/mm/fault.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 1983e43a5e2f..09172f017efc 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -25,6 +25,7 @@ #include #include #include +#include extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, #endif if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -148,7 +150,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -164,10 +166,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 52e3f8d03052036ce97296915a3746421a1da1d0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:52 -0700 Subject: mm/arc: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Fix PERF_COUNT_SW_PAGE_FAULTS perf event manually for page fault retries, by moving it before taking mmap_sem. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Vineet Gupta Link: http://lkml.kernel.org/r/20200707225021.200906-4-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/arc/mm/fault.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 587dea524e6b..f5657cb68e4f 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) if (write) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -130,7 +131,7 @@ retry: goto bad_area; } - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -155,22 +156,9 @@ bad_area: * Major/minor page fault accounting * (in case of retry we only land here once) */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - - if (likely(!(fault & VM_FAULT_ERROR))) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - + if (likely(!(fault & VM_FAULT_ERROR))) /* Normal return path: fault Handled Gracefully */ return; - } if (!user_mode(regs)) goto no_context; -- cgit From 79fea6c6548e28400d7870c61477d35aecb7baf8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:54 -0700 Subject: mm/arm: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. To do this, we need to pass the pt_regs pointer into __do_page_fault(). Fix PERF_COUNT_SW_PAGE_FAULTS perf event manually for page fault retries, by moving it before taking mmap_sem. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Russell King Cc: Will Deacon Link: http://lkml.kernel.org/r/20200707225021.200906-5-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 01a8e0f8fef7..efa402025031 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static vm_fault_t __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk, + struct pt_regs *regs) { struct vm_area_struct *vma; vm_fault_t fault; @@ -224,7 +225,7 @@ good_area: goto out; } - return handle_mm_fault(vma, addr & PAGE_MASK, flags, NULL); + return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ @@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -290,7 +293,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, tsk); + fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -302,23 +305,7 @@ retry: return 0; } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; -- cgit From 6a1bb025d28e1026fead73b7b33e2dfccba3f4d2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:57 -0700 Subject: mm/arm64: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. To do this, we pass pt_regs pointer into __do_page_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Will Deacon Cc: Catalin Marinas Link: http://lkml.kernel.org/r/20200707225021.200906-6-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/fault.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index be29f4076fe3..f07333e86c2f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags) + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); @@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, NULL); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); } static bool is_el0_instruction_abort(unsigned int esr) @@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { const struct fault_info *inf; struct mm_struct *mm = current->mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; @@ -516,8 +517,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags); - major |= fault & VM_FAULT_MAJOR; + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -538,25 +538,8 @@ retry: * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to -- cgit From a2a9e439baf8aca2af1e614fab7956c09091a6d1 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:00 -0700 Subject: mm/csky: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Guo Ren Link: http://lkml.kernel.org/r/20200707225021.200906-7-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/csky/mm/fault.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index b252e6e4d32f..081b178b41b1 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -151,7 +151,7 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0, - NULL); + regs); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -161,16 +161,6 @@ good_area: goto bad_area; BUG(); } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - address); - } - mmap_read_unlock(mm); return; -- cgit From e08157c3c4239fac29879143019c96498ba9c2bc Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:03 -0700 Subject: mm/hexagon: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Brian Cain Link: http://lkml.kernel.org/r/20200707225021.200906-8-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/hexagon/mm/vm_fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index f12f330e7946..ef32c5a84ff3 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Decode of hardware exception sends us to one of several @@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -88,7 +91,7 @@ good_area: break; } - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -96,10 +99,6 @@ good_area: /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; -- cgit From b444eed891cf8656af1f59d239f77c5338481774 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:06 -0700 Subject: mm/ia64: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: "Luck, Tony" Link: http://lkml.kernel.org/r/20200707225021.200906-9-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/ia64/mm/fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index abf2808f9b4b..cd9766d2b6e0 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re flags |= FAULT_FLAG_USER; if (mask & VM_WRITE) flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -143,7 +146,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -166,10 +169,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From e1c17f627b4232e2c7e9edb7151fa60a746150ee Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:09 -0700 Subject: mm/m68k: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200707225021.200906-10-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/m68k/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 08b35a318ebe..795f483b1050 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -134,7 +137,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); pr_debug("handle_mm_fault returns %x\n", fault); if (fault_signal_pending(fault, regs)) @@ -150,16 +153,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From aeb6aefc3129307858e4d5a4b73ae28f1871ae05 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:12 -0700 Subject: mm/microblaze: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Michal Simek Link: http://lkml.kernel.org/r/20200707225021.200906-11-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/microblaze/mm/fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 1a3d4c4ca28b..b3fed2cecf84 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -214,7 +217,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -230,10 +233,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (unlikely(fault & VM_FAULT_MAJOR)) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 2558fd7f5c3eda31d4474c7cdc8dc4b3bb6526f5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:15 -0700 Subject: mm/mips: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Fix PERF_COUNT_SW_PAGE_FAULTS perf event manually for page fault retries, by moving it before taking mmap_sem. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Thomas Bogendoerfer Link: http://lkml.kernel.org/r/20200707225021.200906-12-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/mips/mm/fault.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index b1db39784db9..7c871b14e74a 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -152,12 +154,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -168,15 +169,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - tsk->maj_flt++; - } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - tsk->min_flt++; - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From daf7bf5d90397bc0f3d7b45030461f55eb9c74fa Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:19 -0700 Subject: mm/nds32: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Fix PERF_COUNT_SW_PAGE_FAULTS perf event manually for page fault retries, by moving it before taking mmap_sem. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Greentime Hu Cc: Nick Hu Cc: Vincent Chen Link: http://lkml.kernel.org/r/20200707225021.200906-13-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/nds32/mm/fault.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index d0ecc8fb5b23..f02524eb6d56 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr, if (unlikely(faulthandler_disabled() || !mm)) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -206,7 +208,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, addr, flags, NULL); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -228,22 +230,7 @@ good_area: goto bad_area; } - /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. - */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 4487dcf9b75180eb5115c196ca9bd6ebefade5b3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:22 -0700 Subject: mm/nios2: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Ley Foon Tan Link: http://lkml.kernel.org/r/20200707225021.200906-14-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/nios2/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 86beb9a2698e..9476feecf512 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->ea)) goto bad_area_nosemaphore; @@ -131,7 +134,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -146,16 +149,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 38caa902dccad61e02273c18a633fc5be91aeca5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:25 -0700 Subject: mm/openrisc: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Stafford Horne Cc: Jonas Bonn Cc: Stefan Kristiansson Link: http://lkml.kernel.org/r/20200707225021.200906-15-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/openrisc/mm/fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 3daa491d1edb..ca97d9baab51 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (in_interrupt() || !mm) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -159,7 +162,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -176,10 +179,6 @@ good_area: if (flags & FAULT_FLAG_ALLOW_RETRY) { /*RGD modeled on Cris */ - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From af8a7926273645dc81f9e7f5c3e18136abebf05b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:28 -0700 Subject: mm/parisc: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Add the missing PERF_COUNT_SW_PAGE_FAULTS perf events too. Note, the other two perf events (PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]) were done in handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: James E.J. Bottomley Cc: Helge Deller Link: http://lkml.kernel.org/r/20200707225021.200906-16-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/parisc/mm/fault.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e32d06928c24..4bfe2da9fbe3 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma_prev(mm, address, &prev_vma); @@ -302,7 +304,7 @@ good_area: * fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -323,10 +325,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { /* * No need to mmap_read_unlock(mm) as we would -- cgit From 428fdc094492e720c342f1c934e7972cbc328d13 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:31 -0700 Subject: mm/powerpc: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20200707225021.200906-17-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/powerpc/mm/fault.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c6a5225a3521..0add963a849b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,7 +511,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -537,14 +537,9 @@ retry: /* * Major/minor page fault accounting. */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + if (major) cmo_account_page_fault(); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } + return 0; } NOKPROBE_SYMBOL(__do_page_fault); -- cgit From 5ac365a458902214adfbe3567c94e114cc91cde6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:34 -0700 Subject: mm/riscv: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Pekka Enberg Acked-by: Palmer Dabbelt Cc: Paul Walmsley Cc: Albert Ou Link: http://lkml.kernel.org/r/20200707225021.200906-18-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/riscv/mm/fault.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 30c1124d0fb6..716d64e36f83 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -109,7 +109,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, addr, flags, NULL); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -127,21 +127,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 35e45f3e5a1fe652df2153f2b1c7dd234d448356 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:37 -0700 Subject: mm/s390: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Gerald Schaefer Acked-by: Gerald Schaefer Cc: Alexander Gordeev Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Link: http://lkml.kernel.org/r/20200707225021.200906-19-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/s390/mm/fault.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ad783aaaf649..4c8c063bce5b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -476,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) @@ -486,21 +486,7 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { -- cgit From 105f886220e944b6aa01accfad59af49341703c4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:40 -0700 Subject: mm/sh: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Yoshinori Sato Cc: Rich Felker Link: http://lkml.kernel.org/r/20200707225021.200906-20-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/sh/mm/fault.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 3c0a11827f7e..482668a2f6d3 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -482,22 +482,13 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) return; if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 56e10e6ab11924b19b7c9583750fdd4e440e25c8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:43 -0700 Subject: mm/sparc32: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: David S. Miller Link: http://lkml.kernel.org/r/20200707225021.200906-21-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/sparc/mm/fault_32.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 06af03db4417..8071bfd72349 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -234,7 +234,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -250,15 +250,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From f08147df4092dd5093b85dbccbd34a7843fb60bf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:46 -0700 Subject: mm/sparc64: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: David S. Miller Link: http://lkml.kernel.org/r/20200707225021.200906-22-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/sparc/mm/fault_64.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 9ebee14ee893..0a6bcc85fba7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -422,7 +422,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) goto exit_exception; @@ -438,15 +438,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; -- cgit From 968614fc7b8410e1ee99d0111015a1bf3e955f64 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:49 -0700 Subject: mm/x86: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: H. Peter Anvin Link: http://lkml.kernel.org/r/20200707225021.200906-23-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e1bf5555d80a..35f1498e9832 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs, struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; tsk = current; @@ -1291,8 +1291,7 @@ good_area: * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ - fault = handle_mm_fault(vma, address, flags, NULL); - major |= fault & VM_FAULT_MAJOR; + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -1319,18 +1318,6 @@ good_area: return; } - /* - * Major/minor page fault accounting. If any of the events - * returned VM_FAULT_MAJOR, we account it as a major fault. - */ - if (major) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } - check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault); -- cgit From 484e51e4af528a408b8e0f2db4865625169279cf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:53 -0700 Subject: mm/xtensa: use general page fault accounting Use the general page fault accounting by passing regs into handle_mm_fault(). It naturally solve the issue of multiple page fault accounting when page fault retry happened. Remove the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf events because it's now also done in handle_mm_fault(). Move the PERF_COUNT_SW_PAGE_FAULTS event higher before taking mmap_sem for the fault, then it'll match with the rest of the archs. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Acked-by: Max Filippov Cc: Chris Zankel Link: http://lkml.kernel.org/r/20200707225021.200906-24-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/xtensa/mm/fault.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index e72c8c1359a6..7666408ce12a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -107,7 +110,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, NULL); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -122,10 +125,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -139,12 +138,6 @@ good_area: } mmap_read_unlock(mm); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (flags & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - else - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - return; /* Something tried to access memory that isn't in our memory map.. -- cgit From a2beb5f1efede6924a4258462a5660572e6ca864 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:38:57 -0700 Subject: mm: clean up the last pieces of page fault accountings Here're the last pieces of page fault accounting that were still done outside handle_mm_fault() where we still have regs==NULL when calling handle_mm_fault(): arch/powerpc/mm/copro_fault.c: copro_handle_mm_fault arch/sparc/mm/fault_32.c: force_user_fault arch/um/kernel/trap.c: handle_page_fault mm/gup.c: faultin_page fixup_user_fault mm/hmm.c: hmm_vma_fault mm/ksm.c: break_ksm Some of them has the issue of duplicated accounting for page fault retries. Some of them didn't do the accounting at all. This patch cleans all these up by letting handle_mm_fault() to do per-task page fault accounting even if regs==NULL (though we'll still skip the perf event accountings). With that, we can safely remove all the outliers now. There's another functional change in that now we account the page faults to the caller of gup, rather than the task_struct that passed into the gup code. More information of this can be found at [1]. After this patch, below things should never be touched again outside handle_mm_fault(): - task_struct.[maj|min]_flt - PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] [1] https://lore.kernel.org/lkml/CAHk-=wj_V2Tps2QrMn20_W0OJF9xqNh52XSGA42s-ZJ8Y+GyKw@mail.gmail.com/ Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Brian Cain Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James E.J. Bottomley Cc: John Hubbard Cc: Jonas Bonn Cc: Ley Foon Tan Cc: "Luck, Tony" Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vincent Chen Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200707225021.200906-25-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/powerpc/mm/copro_fault.c | 5 ----- arch/um/kernel/trap.c | 4 ---- mm/gup.c | 13 ------------- mm/memory.c | 17 ++++++++++------- 4 files changed, 10 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 2d0276abe0a6..8acd00178956 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, BUG(); } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - out_unlock: mmap_read_unlock(mm); return ret; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 8d9870d76da1..ad12f78bda7e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -88,10 +88,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/mm/gup.c b/mm/gup.c index ae7121d729fa..d5d44c68fa19 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -893,13 +893,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, BUG(); } - if (tsk) { - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - } - if (ret & VM_FAULT_RETRY) { if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; @@ -1255,12 +1248,6 @@ retry: goto retry; } - if (tsk) { - if (major) - tsk->maj_flt++; - else - tsk->min_flt++; - } return 0; } EXPORT_SYMBOL_GPL(fixup_user_fault); diff --git a/mm/memory.c b/mm/memory.c index 9b7d35734caa..2b7f0e00f312 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4400,20 +4400,23 @@ static inline void mm_account_fault(struct pt_regs *regs, */ major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); + if (major) + current->maj_flt++; + else + current->min_flt++; + /* - * If the fault is done for GUP, regs will be NULL, and we will skip - * the fault accounting. + * If the fault is done for GUP, regs will be NULL. We only do the + * accounting for the per thread fault counters who triggered the + * fault, and we skip the perf event updates. */ if (!regs) return; - if (major) { - current->maj_flt++; + if (major) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - current->min_flt++; + else perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } } /* -- cgit From 64019a2e467a288a16b65ab55ddcbf58c1b00187 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:39:01 -0700 Subject: mm/gup: remove task_struct pointer for all gup code After the cleanup of page fault accounting, gup does not need to pass task_struct around any more. Remove that parameter in the whole gup stack. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: John Hubbard Link: http://lkml.kernel.org/r/20200707225021.200906-26-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/arc/kernel/process.c | 2 +- arch/s390/kvm/interrupt.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/priv.c | 8 +-- arch/s390/mm/gmap.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/infiniband/core/umem_odp.c | 2 +- drivers/vfio/vfio_iommu_type1.c | 4 +- fs/exec.c | 2 +- include/linux/mm.h | 9 ++- kernel/events/uprobes.c | 6 +- kernel/futex.c | 2 +- mm/gup.c | 101 ++++++++++++---------------- mm/memory.c | 2 +- mm/process_vm_access.c | 2 +- security/tomoyo/domain.c | 2 +- virt/kvm/async_pf.c | 2 +- virt/kvm/kvm_main.c | 2 +- 18 files changed, 69 insertions(+), 87 deletions(-) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e12c80d71b78..efeba1fe7252 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -91,7 +91,7 @@ fault: goto fail; mmap_read_lock(current->mm); - ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + ret = fixup_user_fault(current->mm, (unsigned long) uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(current->mm); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1608fd99bbee..2f177298c663 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) struct page *page = NULL; mmap_read_lock(kvm->mm); - get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); mmap_read_unlock(kvm->mm); return page; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 66da278a67fb..6b74b92c1a58 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) { - r = fixup_user_fault(current, current->mm, hva, + r = fixup_user_fault(current->mm, hva, FAULT_FLAG_WRITE, &unlocked); if (r) break; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2f721a923b54..cd74989ce0b0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -273,7 +273,7 @@ retry: rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -319,7 +319,7 @@ retry: mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) m3 & SSKE_MC); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } @@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..8747487c50a8 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -649,7 +649,7 @@ retry: rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked)) { rc = -EFAULT; goto out_up; @@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, BUG_ON(gmap_is_shadow(gmap)); fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; - if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) /* lost mmap_lock, caller has to retry __gmap_translate */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index e946032b13e4..2c2bf24140c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -469,7 +469,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) locked = 1; } ret = pin_user_pages_remote - (work->task, mm, + (mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 5e32f61a2fe4..cc6b4befde7c 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -439,7 +439,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * complex (and doesn't gain us much performance in most use * cases). */ - npages = get_user_pages_remote(owning_process, owning_mm, + npages = get_user_pages_remote(owning_mm, user_virt, gup_num_pages, flags, local_page_list, NULL, NULL); mmap_read_unlock(owning_mm); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5e556ac9102a..9d41105bfd01 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -425,7 +425,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, if (ret) { bool unlocked = false; - ret = fixup_user_fault(NULL, mm, vaddr, + ret = fixup_user_fault(mm, vaddr, FAULT_FLAG_REMOTE | (write_fault ? FAULT_FLAG_WRITE : 0), &unlocked); @@ -453,7 +453,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM, + ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM, page, NULL, NULL); if (ret == 1) { *pfn = page_to_pfn(page[0]); diff --git a/fs/exec.c b/fs/exec.c index a57d9785832b..a91003e28eaa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -217,7 +217,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * We are doing an exec(). 'current' is the process * doing the exec and bprm->mm is the new process's mm. */ - ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags, + ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags, &page, NULL, NULL); if (ret <= 0) return NULL; diff --git a/include/linux/mm.h b/include/linux/mm.h index ec0ffb423769..e7602a3bcef1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1661,7 +1661,7 @@ int invalidate_inode_page(struct page *page); extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs); -extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); void unmap_mapping_pages(struct address_space *mapping, @@ -1677,8 +1677,7 @@ static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, BUG(); return VM_FAULT_SIGBUS; } -static inline int fixup_user_fault(struct task_struct *tsk, - struct mm_struct *mm, unsigned long address, +static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { /* should never happen if there's no MMU */ @@ -1704,11 +1703,11 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 49047d479c57..649fd53dc9ad 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -376,7 +376,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) if (!vaddr || !d) return -EINVAL; - ret = get_user_pages_remote(NULL, mm, vaddr, 1, + ret = get_user_pages_remote(mm, vaddr, 1, FOLL_WRITE, &page, &vma, NULL); if (unlikely(ret <= 0)) { /* @@ -477,7 +477,7 @@ retry: if (is_register) gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ - ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags, + ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, &old_page, &vma, NULL); if (ret <= 0) return ret; @@ -2029,7 +2029,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) * but we treat this as a 'remote' access since it is * essentially a kernel access to the memory. */ - result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, + result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL, NULL); if (result < 0) return result; diff --git a/kernel/futex.c b/kernel/futex.c index 83404124b77b..61e8153e6c76 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -678,7 +678,7 @@ static int fault_in_user_writeable(u32 __user *uaddr) int ret; mmap_read_lock(mm); - ret = fixup_user_fault(current, mm, (unsigned long)uaddr, + ret = fixup_user_fault(mm, (unsigned long)uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(mm); diff --git a/mm/gup.c b/mm/gup.c index d5d44c68fa19..39e58df6925d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -859,7 +859,7 @@ unmap: * does not include FOLL_NOWAIT, the mmap_lock may be released. If it * is, *@locked will be set to 0 and -EBUSY returned. */ -static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, +static int faultin_page(struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *locked) { unsigned int fault_flags = 0; @@ -962,7 +962,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) /** * __get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1021,7 +1020,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1103,8 +1102,7 @@ retry: page = follow_page_mask(vma, start, foll_flags, &ctx); if (!page) { - ret = faultin_page(tsk, vma, start, &foll_flags, - locked); + ret = faultin_page(vma, start, &foll_flags, locked); switch (ret) { case 0: goto retry; @@ -1178,8 +1176,6 @@ static bool vma_permits_fault(struct vm_area_struct *vma, /** * fixup_user_fault() - manually resolve a user page fault - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @address: user address * @fault_flags:flags to pass down to handle_mm_fault() @@ -1207,7 +1203,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma, * This function will not return with an unlocked mmap_lock. So it has not the * same semantics wrt the @mm->mmap_lock as does filemap_fault(). */ -int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { @@ -1256,8 +1252,7 @@ EXPORT_SYMBOL_GPL(fixup_user_fault); * Please note that this function, unlike __get_user_pages will not * return 0 for nr_pages > 0 without FOLL_NOWAIT */ -static __always_inline long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1290,7 +1285,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, pages_done = 0; lock_dropped = false; for (;;) { - ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages, + ret = __get_user_pages(mm, start, nr_pages, flags, pages, vmas, locked); if (!locked) /* VM_FAULT_RETRY couldn't trigger, bypass */ @@ -1350,7 +1345,7 @@ retry: } *locked = 1; - ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED, + ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, pages, NULL, locked); if (!*locked) { /* Continue to retry until we succeeded */ @@ -1437,7 +1432,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(mm, start, nr_pages, gup_flags, NULL, NULL, locked); } @@ -1521,7 +1516,7 @@ struct page *get_dump_page(unsigned long addr) struct vm_area_struct *vma; struct page *page; - if (__get_user_pages(current, current->mm, addr, 1, + if (__get_user_pages(current->mm, addr, 1, FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma, NULL) < 1) return NULL; @@ -1530,8 +1525,7 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ #else /* CONFIG_MMU */ -static long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, unsigned long start, +static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, int *locked, unsigned int foll_flags) @@ -1596,8 +1590,7 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) } #ifdef CONFIG_CMA -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1675,7 +1668,7 @@ check_again: * again migrating any new CMA pages which we failed to isolate * earlier. */ - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, gup_flags); @@ -1689,8 +1682,7 @@ check_again: return ret; } #else -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1705,8 +1697,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which * allows us to process the FOLL_LONGTERM flag. */ -static long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1731,7 +1722,7 @@ static long __gup_longterm_locked(struct task_struct *tsk, flags = memalloc_nocma_save(); } - rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas_tmp, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { @@ -1745,7 +1736,7 @@ static long __gup_longterm_locked(struct task_struct *tsk, goto out; } - rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, + rc = check_and_migrate_cma_pages(mm, start, rc, pages, vmas_tmp, gup_flags); out: memalloc_nocma_restore(flags); @@ -1756,22 +1747,20 @@ out: return rc; } #else /* !CONFIG_FS_DAX && !CONFIG_CMA */ -static __always_inline long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, unsigned int flags) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, flags); } #endif /* CONFIG_FS_DAX || CONFIG_CMA */ #ifdef CONFIG_MMU -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1790,20 +1779,18 @@ static long __get_user_pages_remote(struct task_struct *tsk, * This will check the vmas (even if our vmas arg is NULL) * and return -ENOTSUPP if DAX isn't allowed in this case: */ - return __gup_longterm_locked(tsk, mm, start, nr_pages, pages, + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } /** * get_user_pages_remote() - pin user pages in memory - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1862,7 +1849,7 @@ static long __get_user_pages_remote(struct task_struct *tsk, * should use get_user_pages_remote because it cannot pass * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1874,13 +1861,13 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(get_user_pages_remote); #else /* CONFIG_MMU */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1888,8 +1875,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return 0; } -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1909,11 +1895,10 @@ static long __get_user_pages_remote(struct task_struct *tsk, * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * - * This is the same as get_user_pages_remote(), just with a - * less-flexible calling convention where we assume that the task - * and mm being operated on are the current task's and don't allow - * passing of a locked parameter. We also obviously don't pass - * FOLL_REMOTE in here. + * This is the same as get_user_pages_remote(), just with a less-flexible + * calling convention where we assume that the mm being operated on belongs to + * the current task, and doesn't allow passing of a locked parameter. We also + * obviously don't pass FOLL_REMOTE in here. */ long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, @@ -1926,7 +1911,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1936,7 +1921,7 @@ EXPORT_SYMBOL(get_user_pages); * * mmap_read_lock(mm); * do_something() - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * to: @@ -1944,7 +1929,7 @@ EXPORT_SYMBOL(get_user_pages); * int locked = 1; * mmap_read_lock(mm); * do_something() - * get_user_pages_locked(tsk, mm, ..., pages, &locked); + * get_user_pages_locked(mm, ..., pages, &locked); * if (locked) * mmap_read_unlock(mm); * @@ -1982,7 +1967,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } @@ -1992,12 +1977,12 @@ EXPORT_SYMBOL(get_user_pages_locked); * get_user_pages_unlocked() is suitable to replace the form: * * mmap_read_lock(mm); - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * with: * - * get_user_pages_unlocked(tsk, mm, ..., pages); + * get_user_pages_unlocked(mm, ..., pages); * * It is functionally equivalent to get_user_pages_fast so * get_user_pages_fast should be used instead if specific gup_flags @@ -2020,7 +2005,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, return -EINVAL; mmap_read_lock(mm); - ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, &locked, gup_flags | FOLL_TOUCH); if (locked) mmap_read_unlock(mm); @@ -2665,7 +2650,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages, */ if (gup_flags & FOLL_LONGTERM) { mmap_read_lock(current->mm); - ret = __gup_longterm_locked(current, current->mm, + ret = __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL, gup_flags); mmap_read_unlock(current->mm); @@ -2908,10 +2893,8 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages, EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); /** - * pin_user_pages_remote() - pin pages of a remote process (task != current) + * pin_user_pages_remote() - pin pages of a remote process * - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -2932,7 +2915,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. */ -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -2942,7 +2925,7 @@ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(pin_user_pages_remote); @@ -2974,7 +2957,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags); } EXPORT_SYMBOL(pin_user_pages); @@ -3019,7 +3002,7 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } diff --git a/mm/memory.c b/mm/memory.c index 2b7f0e00f312..228efaca75d3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4742,7 +4742,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages_remote(tsk, mm, addr, 1, + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index cc85ce81914a..29c052099aff 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -105,7 +105,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * current/current->mm */ mmap_read_lock(mm); - pinned_pages = pin_user_pages_remote(task, mm, pa, pinned_pages, + pinned_pages = pin_user_pages_remote(mm, pa, pinned_pages, flags, process_pages, NULL, &locked); if (locked) diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 53b3e1f5f227..dc4ecc0b2038 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -914,7 +914,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, * (represented by bprm). 'current' is the process doing * the execve(). */ - if (get_user_pages_remote(current, bprm->mm, pos, 1, + if (get_user_pages_remote(bprm->mm, pos, 1, FOLL_FORCE, &page, NULL, NULL) <= 0) return false; #else diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 390f758d5a27..dd777688d14a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -61,7 +61,7 @@ static void async_pf_execute(struct work_struct *work) * access remotely. */ mmap_read_lock(mm); - get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL, + get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL, &locked); if (locked) mmap_read_unlock(mm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2c2c0254c2d8..737666db02de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1893,7 +1893,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * not call the fault handler, so do it here. */ bool unlocked = false; - r = fixup_user_fault(current, current->mm, addr, + r = fixup_user_fault(current->mm, addr, (write_fault ? FAULT_FLAG_WRITE : 0), &unlocked); if (unlocked) -- cgit From 41077c99026643c633a1f79376d369fffc757e06 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 12 Aug 2020 10:32:49 +0100 Subject: sfc: fix ef100 design-param checking The handling of the RXQ/TXQ size granularity design-params had two problems: it had a 64-bit divide that didn't build on 32-bit platforms, and it could divide by zero if the NIC supplied 0 as the value of the design-param. Fix both by checking for 0 and for a granularity bigger than our min-size; if the granularity <= EFX_MIN_DMAQ_SIZE then it fits in 32 bits, so we can cast it to u32 for the divide. Reported-by: kernel test robot Signed-off-by: Edward Cree Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 36598d0542ed..206d70f9d95b 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -979,7 +979,8 @@ static int ef100_process_design_param(struct efx_nic *efx, * EFX_MIN_DMAQ_SIZE is divisible by GRANULARITY. * This is very unlikely to fail. */ - if (EFX_MIN_DMAQ_SIZE % reader->value) { + if (!reader->value || reader->value > EFX_MIN_DMAQ_SIZE || + EFX_MIN_DMAQ_SIZE % (u32)reader->value) { netif_err(efx, probe, efx->net_dev, "%s size granularity is %llu, can't guarantee safety\n", reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", -- cgit From 1980c05844830a44708c98c96d600833aa3fae08 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 12 Aug 2020 14:56:02 +0200 Subject: vsock: fix potential null pointer dereference in vsock_poll() syzbot reported this issue where in the vsock_poll() we find the socket state at TCP_ESTABLISHED, but 'transport' is null: general protection fault, probably for non-canonical address 0xdffffc0000000012: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097] CPU: 0 PID: 8227 Comm: syz-executor.2 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vsock_poll+0x75a/0x8e0 net/vmw_vsock/af_vsock.c:1038 Call Trace: sock_poll+0x159/0x460 net/socket.c:1266 vfs_poll include/linux/poll.h:90 [inline] do_pollfd fs/select.c:869 [inline] do_poll fs/select.c:917 [inline] do_sys_poll+0x607/0xd40 fs/select.c:1011 __do_sys_poll fs/select.c:1069 [inline] __se_sys_poll fs/select.c:1057 [inline] __x64_sys_poll+0x18c/0x440 fs/select.c:1057 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This issue can happen if the TCP_ESTABLISHED state is set after we read the vsk->transport in the vsock_poll(). We could put barriers to synchronize, but this can only happen during connection setup, so we can simply check that 'transport' is valid. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reported-and-tested-by: syzbot+a61bac2fcc1a7c6623fe@syzkaller.appspotmail.com Signed-off-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 27bbcfad9c17..9e93bc201cc0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == TCP_ESTABLISHED) { + if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( -- cgit From 06a7a37be55e29961c9ba2abec4d07c8e0e21861 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Aug 2020 21:08:53 +0200 Subject: ipv4: tunnel: fix compilation on ARCH=um With certain configurations, a 64-bit ARCH=um errors out here with an unknown csum_ipv6_magic() function. Include the right header file to always have it. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..5cb30b7aa752 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,6 +38,7 @@ #include #include #include +#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; -- cgit From b1eef236f50ba6afea680da039ef3a2ca9c43d11 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Mon, 10 Aug 2020 17:42:40 -0700 Subject: i2c: iproc: fix race between client unreg and isr When i2c client unregisters, synchronize irq before setting iproc_i2c->slave to NULL. (1) disable_irq() (2) Mask event enable bits in control reg (3) Erase slave address (avoid further writes to rx fifo) (4) Flush tx and rx FIFOs (5) Clear pending event (interrupt) bits in status reg (6) enable_irq() (7) Set client pointer to NULL Unable to handle kernel NULL pointer dereference at virtual address 0000000000000318 [ 371.020421] pc : bcm_iproc_i2c_isr+0x530/0x11f0 [ 371.025098] lr : __handle_irq_event_percpu+0x6c/0x170 [ 371.030309] sp : ffff800010003e40 [ 371.033727] x29: ffff800010003e40 x28: 0000000000000060 [ 371.039206] x27: ffff800010ca9de0 x26: ffff800010f895df [ 371.044686] x25: ffff800010f18888 x24: ffff0008f7ff3600 [ 371.050165] x23: 0000000000000003 x22: 0000000001600000 [ 371.055645] x21: ffff800010f18888 x20: 0000000001600000 [ 371.061124] x19: ffff0008f726f080 x18: 0000000000000000 [ 371.066603] x17: 0000000000000000 x16: 0000000000000000 [ 371.072082] x15: 0000000000000000 x14: 0000000000000000 [ 371.077561] x13: 0000000000000000 x12: 0000000000000001 [ 371.083040] x11: 0000000000000000 x10: 0000000000000040 [ 371.088519] x9 : ffff800010f317c8 x8 : ffff800010f317c0 [ 371.093999] x7 : ffff0008f805b3b0 x6 : 0000000000000000 [ 371.099478] x5 : ffff0008f7ff36a4 x4 : ffff8008ee43d000 [ 371.104957] x3 : 0000000000000000 x2 : ffff8000107d64c0 [ 371.110436] x1 : 00000000c00000af x0 : 0000000000000000 [ 371.115916] Call trace: [ 371.118439] bcm_iproc_i2c_isr+0x530/0x11f0 [ 371.122754] __handle_irq_event_percpu+0x6c/0x170 [ 371.127606] handle_irq_event_percpu+0x34/0x88 [ 371.132189] handle_irq_event+0x40/0x120 [ 371.136234] handle_fasteoi_irq+0xcc/0x1a0 [ 371.140459] generic_handle_irq+0x24/0x38 [ 371.144594] __handle_domain_irq+0x60/0xb8 [ 371.148820] gic_handle_irq+0xc0/0x158 [ 371.152687] el1_irq+0xb8/0x140 [ 371.155927] arch_cpu_idle+0x10/0x18 [ 371.159615] do_idle+0x204/0x290 [ 371.162943] cpu_startup_entry+0x24/0x60 [ 371.166990] rest_init+0xb0/0xbc [ 371.170322] arch_call_rest_init+0xc/0x14 [ 371.174458] start_kernel+0x404/0x430 Fixes: c245d94ed106 ("i2c: iproc: Add multi byte read-write support for slave mode") Signed-off-by: Dhananjay Phadke Reviewed-by: Florian Fainelli Acked-by: Ray Jui Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm-iproc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 8a3c98866fb7..688e92818821 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1078,7 +1078,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) if (!iproc_i2c->slave) return -EINVAL; - iproc_i2c->slave = NULL; + disable_irq(iproc_i2c->irq); /* disable all slave interrupts */ tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); @@ -1091,6 +1091,17 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); iproc_i2c_wr_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET, tmp); + /* flush TX/RX FIFOs */ + tmp = (BIT(S_FIFO_RX_FLUSH_SHIFT) | BIT(S_FIFO_TX_FLUSH_SHIFT)); + iproc_i2c_wr_reg(iproc_i2c, S_FIFO_CTRL_OFFSET, tmp); + + /* clear all pending slave interrupts */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, ISR_MASK_SLAVE); + + iproc_i2c->slave = NULL; + + enable_irq(iproc_i2c->irq); + return 0; } -- cgit From 592d751c1e174df5ff219946908b005eb48934b3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:01 +0100 Subject: net: stmmac: dwmac1000: provide multicast filter fallback If we don't have a hardware multicast filter available then instead of silently failing to listen for the requested ethernet broadcast addresses fall back to receiving all multicast packets, in a similar fashion to other drivers with no multicast filter. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index efc6ec1b8027..fc8759f146c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -164,6 +164,9 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + } else if (!netdev_mc_empty(dev) && (mcbitslog2 == 0)) { + /* Fall back to all multicast if we've no filter */ + value = GMAC_FRAME_FILTER_PM; } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; -- cgit From df43dd526e6609769ae513a81443c7aa727c8ca3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:23 +0100 Subject: net: ethernet: stmmac: Disable hardware multicast filter The IPQ806x does not appear to have a functional multicast ethernet address filter. This was observed as a failure to correctly receive IPv6 packets on a LAN to the all stations address. Checking the vendor driver shows that it does not attempt to enable the multicast filter and instead falls back to receiving all multicast packets, internally setting ALLMULTI. Use the new fallback support in the dwmac1000 driver to correctly achieve the same with the mainline IPQ806x driver. Confirmed to fix IPv6 functionality on an RB3011 router. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 02102c781a8c..bf3250e0e59c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -351,6 +351,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->has_gmac = true; plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; + plat_dat->multicast_filter_bins = 0; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) -- cgit From 2e0d8fef5f76bce0887c73b824d9e625a08e7406 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Aug 2020 18:34:40 -0700 Subject: net: accept an empty mask in /sys/class/net/*/queues/rx-*/rps_cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We must accept an empty mask in store_rps_map(), or we are not able to disable RPS on a queue. Fixes: 07bbecb34106 ("net: Restrict receive packets queuing to housekeeping CPUs") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Alex Belits Cc: Nitesh Narayan Lal Cc: Peter Zijlstra (Intel) Reviewed-by: Maciej Żenczykowski Acked-by: Peter Zijlstra (Intel) Acked-by: Nitesh Narayan Lal Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9de33b594ff2..efec66fa78b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -757,11 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; - cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); - if (cpumask_empty(mask)) { - free_cpumask_var(mask); - return -EINVAL; + if (!cpumask_empty(mask)) { + hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; + cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); + if (cpumask_empty(mask)) { + free_cpumask_var(mask); + return -EINVAL; + } } map = kzalloc(max_t(unsigned int, -- cgit From 9643609423c7667fb748cc3ccff023d761d0ac90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 12 Aug 2020 13:26:37 -0700 Subject: Revert "ipv4: tunnel: fix compilation on ARCH=um" This reverts commit 06a7a37be55e29961c9ba2abec4d07c8e0e21861. The bug was already fixed, this added a dup include. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5cb30b7aa752..75c6013ff9a4 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,7 +38,6 @@ #include #include #include -#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; -- cgit From f254ac04c8744cf7bfed012717eac34eacc65dfb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Aug 2020 17:33:30 -0600 Subject: io_uring: enable lookup of links holding inflight files When a process exits, we cancel whatever requests it has pending that are referencing the file table. However, if a link is holding a reference, then we cannot find it by simply looking at the inflight list. Enable checking of the poll and timeout list to find the link, and cancel it appropriately. Cc: stable@vger.kernel.org Reported-by: Josef Signed-off-by: Jens Axboe --- fs/io_uring.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a2afd8c33c9..1ec25ee71372 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4937,6 +4937,7 @@ static bool io_poll_remove_one(struct io_kiocb *req) io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(req->ctx); req->flags |= REQ_F_COMP_LOCKED; + req_set_fail_links(req); io_put_req(req); } @@ -5109,6 +5110,23 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static int __io_timeout_cancel(struct io_kiocb *req) +{ + int ret; + + list_del_init(&req->timeout.list); + + ret = hrtimer_try_to_cancel(&req->io->timeout.timer); + if (ret == -1) + return -EALREADY; + + req_set_fail_links(req); + req->flags |= REQ_F_COMP_LOCKED; + io_cqring_fill_event(req, -ECANCELED); + io_put_req(req); + return 0; +} + static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) { struct io_kiocb *req; @@ -5116,7 +5134,6 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) list_for_each_entry(req, &ctx->timeout_list, timeout.list) { if (user_data == req->user_data) { - list_del_init(&req->timeout.list); ret = 0; break; } @@ -5125,15 +5142,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) if (ret == -ENOENT) return ret; - ret = hrtimer_try_to_cancel(&req->io->timeout.timer); - if (ret == -1) - return -EALREADY; - - req_set_fail_links(req); - req->flags |= REQ_F_COMP_LOCKED; - io_cqring_fill_event(req, -ECANCELED); - io_put_req(req); - return 0; + return __io_timeout_cancel(req); } static int io_timeout_remove_prep(struct io_kiocb *req, @@ -7935,6 +7944,71 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) return work->files == files; } +/* + * Returns true if 'preq' is the link parent of 'req' + */ +static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) +{ + struct io_kiocb *link; + + if (!(preq->flags & REQ_F_LINK_HEAD)) + return false; + + list_for_each_entry(link, &preq->link_list, link_list) { + if (link == req) + return true; + } + + return false; +} + +/* + * We're looking to cancel 'req' because it's holding on to our files, but + * 'req' could be a link to another request. See if it is, and cancel that + * parent request if so. + */ +static bool io_poll_remove_link(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + struct hlist_node *tmp; + struct io_kiocb *preq; + bool found = false; + int i; + + spin_lock_irq(&ctx->completion_lock); + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry_safe(preq, tmp, list, hash_node) { + found = io_match_link(preq, req); + if (found) { + io_poll_remove_one(preq); + break; + } + } + } + spin_unlock_irq(&ctx->completion_lock); + return found; +} + +static bool io_timeout_remove_link(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + struct io_kiocb *preq; + bool found = false; + + spin_lock_irq(&ctx->completion_lock); + list_for_each_entry(preq, &ctx->timeout_list, timeout.list) { + found = io_match_link(preq, req); + if (found) { + __io_timeout_cancel(preq); + break; + } + } + spin_unlock_irq(&ctx->completion_lock); + return found; +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { @@ -7989,6 +8063,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } } else { io_wq_cancel_work(ctx->io_wq, &cancel_req->work); + /* could be a link, check and remove if it is */ + if (!io_poll_remove_link(ctx, cancel_req)) + io_timeout_remove_link(ctx, cancel_req); io_put_req(cancel_req); } -- cgit From 466a62d7642f02f36d37d9b30c19a725538a01ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 07:35:33 +0100 Subject: mfd: core: Make a best effort attempt to match devices with the correct of_nodes Currently, when a child platform device (sometimes referred to as a sub-device) is registered via the Multi-Functional Device (MFD) API, the framework attempts to match the newly registered platform device with its associated Device Tree (OF) node. Until now, the device has been allocated the first node found with an identical OF compatible string. Unfortunately, if there are, say for example '3' devices which are to be handled by the same driver and therefore have the same compatible string, each of them will be allocated a pointer to the *first* node. An example Device Tree entry might look like this: mfd_of_test { compatible = "mfd,of-test-parent"; #address-cells = <0x02>; #size-cells = <0x02>; child@aaaaaaaaaaaaaaaa { compatible = "mfd,of-test-child"; reg = <0xaaaaaaaa 0xaaaaaaaa 0 0x11>, <0xbbbbbbbb 0xbbbbbbbb 0 0x22>; }; child@cccccccc { compatible = "mfd,of-test-child"; reg = <0x00000000 0xcccccccc 0 0x33>; }; child@dddddddd00000000 { compatible = "mfd,of-test-child"; reg = <0xdddddddd 0x00000000 0 0x44>; }; }; When used with example sub-device registration like this: static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 0, "mfd,of-test-child"), OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child"), OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child") }; ... the current implementation will result in all devices being allocated the first OF node found containing a matching compatible string: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@aaaaaaaaaaaaaaaa [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@aaaaaaaaaaaaaaaa After this patch each device will be allocated a unique OF node: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@aaaaaaaaaaaaaaaa [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@cccccccc [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@dddddddd00000000 Which is fine if all OF nodes are identical. However if we wish to apply an attribute to particular device, we really need to ensure the correct OF node will be associated with the device containing the correct address. We accomplish this by matching the device's address expressed in DT with one provided during sub-device registration. Like this: static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child", 0xdddddddd00000000), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child", 0xaaaaaaaaaaaaaaaa), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 3, "mfd,of-test-child", 0x00000000cccccccc) }; This will ensure a specific device (designated here using the platform_ids; 1, 2 and 3) is matched with a particular OF node: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@dddddddd00000000 [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@cccccccc This implementation is still not infallible, hence the mention of "best effort" in the commit subject. Since we have not *insisted* on the existence of 'reg' properties (in some scenarios they just do not make sense) and no device currently uses the new 'of_reg' attribute, we have to make an on-the-fly judgement call whether to associate the OF node anyway. Which we do in cases where parent drivers haven't specified a particular OF node to match to. So there is a *slight* possibility of the following result (note: the implementation here is convoluted, but it shows you one means by which this process can still break): /* * First entry will match to the first OF node with matching compatible * Second will fail, since the first took its OF node and is no longer available * Third will succeed */ static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child"), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child", 0xaaaaaaaaaaaaaaaa), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 3, "mfd,of-test-child", 0x00000000cccccccc) }; The result: [0.753869] mfd-of-test-parent mfd_of_test: Registering 3 devices [0.756597] mfd-of-test-child: Failed to locate of_node [id: 2] [0.759999] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.760314] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.760908] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.761183] mfd-of-test-child mfd-of-test-child.2: No OF node associated with this device [0.761621] mfd-of-test-child mfd-of-test-child.3: Probing platform device: 3 [0.761899] mfd-of-test-child mfd-of-test-child.3: Using OF node: child@cccccccc We could code around this with some pre-parsing semantics, but the added complexity required to cover each and every corner-case is not justified. Merely patching the current failing (via this patch) is already working with some pretty small corner-cases. Other issues should be patched in the parent drivers which can be achieved simply by implementing OF_MFD_CELL_REG(). Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 95 ++++++++++++++++++++++++++++++++++++++++++------ include/linux/mfd/core.h | 10 +++++ 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 720e5c8b1588..b201842f82ad 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,8 +18,17 @@ #include #include #include +#include #include +static LIST_HEAD(mfd_of_node_list); + +struct mfd_of_node_entry { + struct list_head list; + struct device *dev; + struct device_node *np; +}; + static struct device_type mfd_dev_type = { .name = "mfd_device", }; @@ -107,6 +117,55 @@ static inline void mfd_acpi_add_device(const struct mfd_cell *cell, } #endif +static int mfd_match_of_node_to_dev(struct platform_device *pdev, + struct device_node *np, + const struct mfd_cell *cell) +{ +#if IS_ENABLED(CONFIG_OF) + struct mfd_of_node_entry *of_entry; + const __be32 *reg; + u64 of_node_addr; + + /* Skip devices 'disabled' by Device Tree */ + if (!of_device_is_available(np)) + return -ENODEV; + + /* Skip if OF node has previously been allocated to a device */ + list_for_each_entry(of_entry, &mfd_of_node_list, list) + if (of_entry->np == np) + return -EAGAIN; + + if (!cell->use_of_reg) + /* No of_reg defined - allocate first free compatible match */ + goto allocate_of_node; + + /* We only care about each node's first defined address */ + reg = of_get_address(np, 0, NULL, NULL); + if (!reg) + /* OF node does not contatin a 'reg' property to match to */ + return -EAGAIN; + + of_node_addr = of_read_number(reg, of_n_addr_cells(np)); + + if (cell->of_reg != of_node_addr) + /* No match */ + return -EAGAIN; + +allocate_of_node: + of_entry = kzalloc(sizeof(*of_entry), GFP_KERNEL); + if (!of_entry) + return -ENOMEM; + + of_entry->dev = &pdev->dev; + of_entry->np = np; + list_add_tail(&of_entry->list, &mfd_of_node_list); + + pdev->dev.of_node = np; + pdev->dev.fwnode = &np->fwnode; +#endif + return 0; +} + static int mfd_add_device(struct device *parent, int id, const struct mfd_cell *cell, struct resource *mem_base, @@ -115,6 +174,7 @@ static int mfd_add_device(struct device *parent, int id, struct resource *res; struct platform_device *pdev; struct device_node *np = NULL; + struct mfd_of_node_entry *of_entry, *tmp; int ret = -ENOMEM; int platform_id; int r; @@ -149,19 +209,22 @@ static int mfd_add_device(struct device *parent, int id, if (ret < 0) goto fail_res; - if (parent->of_node && cell->of_compatible) { + if (IS_ENABLED(CONFIG_OF) && parent->of_node && cell->of_compatible) { for_each_child_of_node(parent->of_node, np) { if (of_device_is_compatible(np, cell->of_compatible)) { - if (!of_device_is_available(np)) { - /* Ignore disabled devices error free */ - ret = 0; + ret = mfd_match_of_node_to_dev(pdev, np, cell); + if (ret == -EAGAIN) + continue; + if (ret) goto fail_alias; - } - pdev->dev.of_node = np; - pdev->dev.fwnode = &np->fwnode; + break; } } + + if (!pdev->dev.of_node) + pr_warn("%s: Failed to locate of_node [id: %d]\n", + cell->name, platform_id); } mfd_acpi_add_device(cell, pdev); @@ -170,13 +233,13 @@ static int mfd_add_device(struct device *parent, int id, ret = platform_device_add_data(pdev, cell->platform_data, cell->pdata_size); if (ret) - goto fail_alias; + goto fail_of_entry; } if (cell->properties) { ret = platform_device_add_properties(pdev, cell->properties); if (ret) - goto fail_alias; + goto fail_of_entry; } for (r = 0; r < cell->num_resources; r++) { @@ -213,18 +276,18 @@ static int mfd_add_device(struct device *parent, int id, if (has_acpi_companion(&pdev->dev)) { ret = acpi_check_resource_conflict(&res[r]); if (ret) - goto fail_alias; + goto fail_of_entry; } } } ret = platform_device_add_resources(pdev, res, cell->num_resources); if (ret) - goto fail_alias; + goto fail_of_entry; ret = platform_device_add(pdev); if (ret) - goto fail_alias; + goto fail_of_entry; if (cell->pm_runtime_no_callbacks) pm_runtime_no_callbacks(&pdev->dev); @@ -233,6 +296,12 @@ static int mfd_add_device(struct device *parent, int id, return 0; +fail_of_entry: + list_for_each_entry_safe(of_entry, tmp, &mfd_of_node_list, list) + if (of_entry->dev == &pdev->dev) { + list_del(&of_entry->list); + kfree(of_entry); + } fail_alias: regulator_bulk_unregister_supply_alias(&pdev->dev, cell->parent_supplies, @@ -297,6 +366,8 @@ static int mfd_remove_devices_fn(struct device *dev, void *data) regulator_bulk_unregister_supply_alias(dev, cell->parent_supplies, cell->num_parent_supplies); + kfree(cell); + platform_device_unregister(pdev); return 0; } diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index ab76cdd06199..c437a73b43a3 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -78,6 +78,16 @@ struct mfd_cell { */ const char *of_compatible; + /* + * Address as defined in Device Tree. Used to compement 'of_compatible' + * (above) when matching OF nodes with devices that have identical + * compatible strings + */ + const u64 of_reg; + + /* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */ + bool use_of_reg; + /* Matches ACPI */ const struct mfd_cell_acpi_match *acpi_match; -- cgit From d097965bb6682afe1f8481923b16c033f708923b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 10:18:43 +0100 Subject: mfd: core: Fix formatting of MFD helpers Remove unnecessary '\'s and leading tabs. This will help to clean-up future diffs when subsequent changes are made. Hint: The aforementioned changes follow this patch. Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index c437a73b43a3..da9b066e1594 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -26,20 +26,20 @@ .id = (_id), \ } -#define OF_MFD_CELL(_name, _res, _pdata, _pdsize,_id, _compat) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) \ +#define OF_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _compat) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) -#define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) \ +#define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) -#define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) \ +#define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) -#define MFD_CELL_RES(_name, _res) \ - MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) \ +#define MFD_CELL_RES(_name, _res) \ + MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) -#define MFD_CELL_NAME(_name) \ - MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) \ +#define MFD_CELL_NAME(_name) \ + MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) struct irq_domain; struct property_entry; -- cgit From 44e6171ed04a0cd0378e2503f03a444ebdd4e8e3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 10:12:05 +0100 Subject: mfd: core: Add OF_MFD_CELL_REG() helper Extend current list of helpers to provide support for parent drivers wishing to match specific child devices to particular OF nodes. Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index da9b066e1594..6d68f44a26a1 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -14,7 +14,7 @@ #define MFD_RES_SIZE(arr) (sizeof(arr) / sizeof(struct resource)) -#define MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _match)\ +#define MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg, _use_of_reg, _match) \ { \ .name = (_name), \ .resources = (_res), \ @@ -22,24 +22,29 @@ .platform_data = (_pdata), \ .pdata_size = (_pdsize), \ .of_compatible = (_compat), \ + .of_reg = (_of_reg), \ + .use_of_reg = (_use_of_reg), \ .acpi_match = (_match), \ .id = (_id), \ } +#define OF_MFD_CELL_REG(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg, true, NULL) + #define OF_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _compat) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, 0, false, NULL) #define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, 0, false, _match) #define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, 0, false, NULL) #define MFD_CELL_RES(_name, _res) \ - MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) + MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, 0, false, NULL) #define MFD_CELL_NAME(_name) \ - MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) + MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, 0, false, NULL) struct irq_domain; struct property_entry; -- cgit From 7d2594cd1fa0b03b2746ce811926ee150a3a14fa Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 1 Jul 2020 23:23:48 +0200 Subject: mfd: smsc-ece1099: Remove driver This MFD driver has no user. The keypad driver of this device never made it into the kernel. Therefore, this driver is useless. Remove it. Signed-off-by: Michael Walle Cc: Sourav Poddar Signed-off-by: Lee Jones --- Documentation/driver-api/index.rst | 1 - Documentation/driver-api/smsc_ece1099.rst | 60 ----------------- drivers/mfd/Kconfig | 12 ---- drivers/mfd/Makefile | 1 - drivers/mfd/smsc-ece1099.c | 87 ------------------------- include/linux/mfd/smsc.h | 104 ------------------------------ 6 files changed, 265 deletions(-) delete mode 100644 Documentation/driver-api/smsc_ece1099.rst delete mode 100644 drivers/mfd/smsc-ece1099.c delete mode 100644 include/linux/mfd/smsc.h diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 6567187e7687..1397a30188eb 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -98,7 +98,6 @@ available subsections can be seen below. rfkill serial/index sm501 - smsc_ece1099 switchtec sync_file vfio-mediated-device diff --git a/Documentation/driver-api/smsc_ece1099.rst b/Documentation/driver-api/smsc_ece1099.rst deleted file mode 100644 index 079277421eaf..000000000000 --- a/Documentation/driver-api/smsc_ece1099.rst +++ /dev/null @@ -1,60 +0,0 @@ -================================================= -Msc Keyboard Scan Expansion/GPIO Expansion device -================================================= - -What is smsc-ece1099? ----------------------- - -The ECE1099 is a 40-Pin 3.3V Keyboard Scan Expansion -or GPIO Expansion device. The device supports a keyboard -scan matrix of 23x8. The device is connected to a Master -via the SMSC BC-Link interface or via the SMBus. -Keypad scan Input(KSI) and Keypad Scan Output(KSO) signals -are multiplexed with GPIOs. - -Interrupt generation --------------------- - -Interrupts can be generated by an edge detection on a GPIO -pin or an edge detection on one of the bus interface pins. -Interrupts can also be detected on the keyboard scan interface. -The bus interrupt pin (BC_INT# or SMBUS_INT#) is asserted if -any bit in one of the Interrupt Status registers is 1 and -the corresponding Interrupt Mask bit is also 1. - -In order for software to determine which device is the source -of an interrupt, it should first read the Group Interrupt Status Register -to determine which Status register group is a source for the interrupt. -Software should read both the Status register and the associated Mask register, -then AND the two values together. Bits that are 1 in the result of the AND -are active interrupts. Software clears an interrupt by writing a 1 to the -corresponding bit in the Status register. - -Communication Protocol ----------------------- - -- SMbus slave Interface - The host processor communicates with the ECE1099 device - through a series of read/write registers via the SMBus - interface. SMBus is a serial communication protocol between - a computer host and its peripheral devices. The SMBus data - rate is 10KHz minimum to 400 KHz maximum - -- Slave Bus Interface - The ECE1099 device SMBus implementation is a subset of the - SMBus interface to the host. The device is a slave-only SMBus device. - The implementation in the device is a subset of SMBus since it - only supports four protocols. - - The Write Byte, Read Byte, Send Byte, and Receive Byte protocols are the - only valid SMBus protocols for the device. - -- BC-LinkTM Interface - The BC-Link is a proprietary bus that allows communication - between a Master device and a Companion device. The Master - device uses this serial bus to read and write registers - located on the Companion device. The bus comprises three signals, - BC_CLK, BC_DAT and BC_INT#. The Master device always provides the - clock, BC_CLK, and the Companion device is the source for an - independent asynchronous interrupt signal, BC_INT#. The ECE1099 - supports BC-Link speeds up to 24MHz. diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index d13bb0abfd6f..33df0837ab41 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1193,18 +1193,6 @@ config MFD_SKY81452 This driver can also be built as a module. If so, the module will be called sky81452. -config MFD_SMSC - bool "SMSC ECE1099 series chips" - depends on I2C=y - select MFD_CORE - select REGMAP_I2C - help - If you say yes here you get support for the - ece1099 chips from SMSC. - - To compile this driver as a module, choose M here: the - module will be called smsc. - config MFD_SC27XX_PMIC tristate "Spreadtrum SC27xx PMICs" depends on ARCH_SPRD || COMPILE_TEST diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 1c8d6be3347d..a60e5f835283 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -127,7 +127,6 @@ obj-$(CONFIG_MFD_CPCAP) += motorola-cpcap.o obj-$(CONFIG_MCP) += mcp-core.o obj-$(CONFIG_MCP_SA11X0) += mcp-sa11x0.o obj-$(CONFIG_MCP_UCB1200) += ucb1x00-core.o -obj-$(CONFIG_MFD_SMSC) += smsc-ece1099.o obj-$(CONFIG_MCP_UCB1200_TS) += ucb1x00-ts.o ifeq ($(CONFIG_SA1100_ASSABET),y) diff --git a/drivers/mfd/smsc-ece1099.c b/drivers/mfd/smsc-ece1099.c deleted file mode 100644 index 57b792eb58fd..000000000000 --- a/drivers/mfd/smsc-ece1099.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * TI SMSC MFD Driver - * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com - * - * Author: Sourav Poddar - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; GPL v2. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct regmap_config smsc_regmap_config = { - .reg_bits = 8, - .val_bits = 8, - .max_register = SMSC_VEN_ID_H, - .cache_type = REGCACHE_RBTREE, -}; - -static int smsc_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) -{ - struct smsc *smsc; - int devid, rev, venid_l, venid_h; - int ret; - - smsc = devm_kzalloc(&i2c->dev, sizeof(*smsc), GFP_KERNEL); - if (!smsc) - return -ENOMEM; - - smsc->regmap = devm_regmap_init_i2c(i2c, &smsc_regmap_config); - if (IS_ERR(smsc->regmap)) - return PTR_ERR(smsc->regmap); - - i2c_set_clientdata(i2c, smsc); - smsc->dev = &i2c->dev; - -#ifdef CONFIG_OF - of_property_read_u32(i2c->dev.of_node, "clock", &smsc->clk); -#endif - - regmap_read(smsc->regmap, SMSC_DEV_ID, &devid); - regmap_read(smsc->regmap, SMSC_DEV_REV, &rev); - regmap_read(smsc->regmap, SMSC_VEN_ID_L, &venid_l); - regmap_read(smsc->regmap, SMSC_VEN_ID_H, &venid_h); - - dev_info(&i2c->dev, "SMSCxxx devid: %02x rev: %02x venid: %02x\n", - devid, rev, (venid_h << 8) | venid_l); - - ret = regmap_write(smsc->regmap, SMSC_CLK_CTRL, smsc->clk); - if (ret) - return ret; - -#ifdef CONFIG_OF - if (i2c->dev.of_node) - ret = devm_of_platform_populate(&i2c->dev); -#endif - - return ret; -} - -static const struct i2c_device_id smsc_i2c_id[] = { - { "smscece1099", 0}, - {}, -}; - -static struct i2c_driver smsc_i2c_driver = { - .driver = { - .name = "smsc", - }, - .probe = smsc_i2c_probe, - .id_table = smsc_i2c_id, -}; -builtin_i2c_driver(smsc_i2c_driver); diff --git a/include/linux/mfd/smsc.h b/include/linux/mfd/smsc.h deleted file mode 100644 index 83944124e886..000000000000 --- a/include/linux/mfd/smsc.h +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * SMSC ECE1099 - * - * Copyright 2012 Texas Instruments Inc. - * - * Author: Sourav Poddar - */ - -#ifndef __LINUX_MFD_SMSC_H -#define __LINUX_MFD_SMSC_H - -#include - -#define SMSC_ID_ECE1099 1 -#define SMSC_NUM_CLIENTS 2 - -#define SMSC_BASE_ADDR 0x38 -#define OMAP_GPIO_SMSC_IRQ 151 - -#define SMSC_MAXGPIO 32 -#define SMSC_BANK(offs) ((offs) >> 3) -#define SMSC_BIT(offs) (1u << ((offs) & 0x7)) - -struct smsc { - struct device *dev; - struct i2c_client *i2c_clients[SMSC_NUM_CLIENTS]; - struct regmap *regmap; - int clk; - /* Stored chip id */ - int id; -}; - -struct smsc_gpio; -struct smsc_keypad; - -static inline int smsc_read(struct device *child, unsigned int reg, - unsigned int *dest) -{ - struct smsc *smsc = dev_get_drvdata(child->parent); - - return regmap_read(smsc->regmap, reg, dest); -} - -static inline int smsc_write(struct device *child, unsigned int reg, - unsigned int value) -{ - struct smsc *smsc = dev_get_drvdata(child->parent); - - return regmap_write(smsc->regmap, reg, value); -} - -/* Registers for SMSC */ -#define SMSC_RESET 0xF5 -#define SMSC_GRP_INT 0xF9 -#define SMSC_CLK_CTRL 0xFA -#define SMSC_WKUP_CTRL 0xFB -#define SMSC_DEV_ID 0xFC -#define SMSC_DEV_REV 0xFD -#define SMSC_VEN_ID_L 0xFE -#define SMSC_VEN_ID_H 0xFF - -/* CLK VALUE */ -#define SMSC_CLK_VALUE 0x13 - -/* Registers for function GPIO INPUT */ -#define SMSC_GPIO_DATA_IN_START 0x00 - -/* Registers for function GPIO OUPUT */ -#define SMSC_GPIO_DATA_OUT_START 0x05 - -/* Definitions for SMSC GPIO CONFIGURATION REGISTER*/ -#define SMSC_GPIO_INPUT_LOW 0x01 -#define SMSC_GPIO_INPUT_RISING 0x09 -#define SMSC_GPIO_INPUT_FALLING 0x11 -#define SMSC_GPIO_INPUT_BOTH_EDGE 0x19 -#define SMSC_GPIO_OUTPUT_PP 0x21 -#define SMSC_GPIO_OUTPUT_OP 0x31 - -#define GRP_INT_STAT 0xf9 -#define SMSC_GPI_INT 0x0f -#define SMSC_CFG_START 0x0A - -/* Registers for SMSC GPIO INTERRUPT STATUS REGISTER*/ -#define SMSC_GPIO_INT_STAT_START 0x32 - -/* Registers for SMSC GPIO INTERRUPT MASK REGISTER*/ -#define SMSC_GPIO_INT_MASK_START 0x37 - -/* Registers for SMSC function KEYPAD*/ -#define SMSC_KP_OUT 0x40 -#define SMSC_KP_IN 0x41 -#define SMSC_KP_INT_STAT 0x42 -#define SMSC_KP_INT_MASK 0x43 - -/* Definitions for keypad */ -#define SMSC_KP_KSO 0x70 -#define SMSC_KP_KSI 0x51 -#define SMSC_KSO_ALL_LOW 0x20 -#define SMSC_KP_SET_LOW_PWR 0x0B -#define SMSC_KP_SET_HIGH 0xFF -#define SMSC_KSO_EVAL 0x00 - -#endif /* __LINUX_MFD_SMSC_H */ -- cgit From 819e42e00d2eca536d5187de21971ed345861102 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 6 Jul 2020 11:39:34 -0700 Subject: mfd: motorola-cpcap: Disable interrupt for suspend Otherwise we get spammed with errors on resume after rtcwake: cpcap-core spi0.0: Failed to read IRQ status: -108 Note that rtcwake is still capable of waking up the system with this patch. Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Signed-off-by: Lee Jones --- drivers/mfd/motorola-cpcap.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/mfd/motorola-cpcap.c b/drivers/mfd/motorola-cpcap.c index 52f38e57cdc1..2283d88adcc2 100644 --- a/drivers/mfd/motorola-cpcap.c +++ b/drivers/mfd/motorola-cpcap.c @@ -214,6 +214,28 @@ static const struct regmap_config cpcap_regmap_config = { .val_format_endian = REGMAP_ENDIAN_LITTLE, }; +#ifdef CONFIG_PM_SLEEP +static int cpcap_suspend(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + + disable_irq(spi->irq); + + return 0; +} + +static int cpcap_resume(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + + enable_irq(spi->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(cpcap_pm, cpcap_suspend, cpcap_resume); + static const struct mfd_cell cpcap_mfd_devices[] = { { .name = "cpcap_adc", @@ -313,6 +335,7 @@ static struct spi_driver cpcap_driver = { .driver = { .name = "cpcap-core", .of_match_table = cpcap_of_match, + .pm = &cpcap_pm, }, .probe = cpcap_probe, }; -- cgit From e9faaf056d2812c0b602c3dbeb5b83756b58abc5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 29 Jun 2020 15:52:49 +0300 Subject: dt-bindings: mfd: ti,j721e-system-controller.yaml: Add J721e system controller Add DT binding schema for J721e system controller. Signed-off-by: Roger Quadros Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- .../bindings/mfd/ti,j721e-system-controller.yaml | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml diff --git a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml new file mode 100644 index 000000000000..03d0a232c75e --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/ +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/ti,j721e-system-controller.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI J721e System Controller Registers R/W Device Tree Bindings + +description: | + This represents the Control Module registers (CTRL_MMR0) on the SoC. + System controller node represents a register region containing a set + of miscellaneous registers. The registers are not cohesive enough to + represent as any specific type of device. The typical use-case is + for some other node's driver, or platform-specific code, to acquire + a reference to the syscon node (e.g. by phandle, node path, or + search using a specific compatible value), interrogate the node (or + associated OS driver) to determine the location of the registers, + and access the registers directly. + +maintainers: + - Kishon Vijay Abraham I + - Roger Quadros ; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + serdes_ln_ctrl: serdes-ln-ctrl@4080 { + compatible = "mmio-mux"; + reg = <0x00004080 0x50>; + }; + }; +... -- cgit From a3f673d0092cee167b2a6e3d61454405d0a06b25 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 2 Jul 2020 08:32:33 -0300 Subject: dt-bindings: mfd: st,stmfx: Remove I2C unit name Remove the I2C unit name to fix the following build warning with 'make dt_binding_check': Warning (unit_address_vs_reg): /example-0/i2c@0: node has a unit name, but no reg or ranges property Signed-off-by: Fabio Estevam Acked-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/st,stmfx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml index bed22d4abffb..888ab4b5df45 100644 --- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -93,7 +93,7 @@ required: examples: - | #include - i2c@0 { + i2c { #address-cells = <1>; #size-cells = <0>; stmfx@42 { -- cgit From 091c6110862bce4e2380e353cb062dcb6a56bcb6 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Mon, 13 Jul 2020 10:38:57 +0100 Subject: mfd: da9063: Fix revision handling to correctly select reg tables The current implementation performs checking in the i2c_probe() function of the variant_code but does this immediately after the containing struct has been initialised as all zero. This means the check for variant code will always default to using the BB tables and will never select AD. The variant code is subsequently set by device_init() and later used by the RTC so really it's a little fortunate this mismatch works. This update adds raw I2C read access functionality to read the chip and variant/revision information (common to all revisions) so that it can subsequently correctly choose the proper regmap tables for real initialisation. Signed-off-by: Adam Thomson Signed-off-by: Lee Jones --- drivers/mfd/da9063-core.c | 31 ------ drivers/mfd/da9063-i2c.c | 184 +++++++++++++++++++++++++++++++---- include/linux/mfd/da9063/registers.h | 15 ++- 3 files changed, 177 insertions(+), 53 deletions(-) diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c index b125f90dd375..a353d52210a9 100644 --- a/drivers/mfd/da9063-core.c +++ b/drivers/mfd/da9063-core.c @@ -160,7 +160,6 @@ static int da9063_clear_fault_log(struct da9063 *da9063) int da9063_device_init(struct da9063 *da9063, unsigned int irq) { - int model, variant_id, variant_code; int ret; ret = da9063_clear_fault_log(da9063); @@ -171,36 +170,6 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq) da9063->irq_base = -1; da9063->chip_irq = irq; - ret = regmap_read(da9063->regmap, DA9063_REG_CHIP_ID, &model); - if (ret < 0) { - dev_err(da9063->dev, "Cannot read chip model id.\n"); - return -EIO; - } - if (model != PMIC_CHIP_ID_DA9063) { - dev_err(da9063->dev, "Invalid chip model id: 0x%02x\n", model); - return -ENODEV; - } - - ret = regmap_read(da9063->regmap, DA9063_REG_CHIP_VARIANT, &variant_id); - if (ret < 0) { - dev_err(da9063->dev, "Cannot read chip variant id.\n"); - return -EIO; - } - - variant_code = variant_id >> DA9063_CHIP_VARIANT_SHIFT; - - dev_info(da9063->dev, - "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n", - model, variant_id); - - if (variant_code < PMIC_DA9063_BB && variant_code != PMIC_DA9063_AD) { - dev_err(da9063->dev, - "Cannot support variant code: 0x%02X\n", variant_code); - return -ENODEV; - } - - da9063->variant_code = variant_code; - ret = da9063_irq_init(da9063); if (ret) { dev_err(da9063->dev, "Cannot initialize interrupts.\n"); diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c index 455de74c0dd2..481548948bc0 100644 --- a/drivers/mfd/da9063-i2c.c +++ b/drivers/mfd/da9063-i2c.c @@ -22,12 +22,124 @@ #include #include +/* + * Raw I2C access required for just accessing chip and variant info before we + * know which device is present. The info read from the device using this + * approach is then used to select the correct regmap tables. + */ + +#define DA9063_REG_PAGE_SIZE 0x100 +#define DA9063_REG_PAGED_ADDR_MASK 0xFF + +enum da9063_page_sel_buf_fmt { + DA9063_PAGE_SEL_BUF_PAGE_REG = 0, + DA9063_PAGE_SEL_BUF_PAGE_VAL, + DA9063_PAGE_SEL_BUF_SIZE, +}; + +enum da9063_paged_read_msgs { + DA9063_PAGED_READ_MSG_PAGE_SEL = 0, + DA9063_PAGED_READ_MSG_REG_SEL, + DA9063_PAGED_READ_MSG_DATA, + DA9063_PAGED_READ_MSG_CNT, +}; + +static int da9063_i2c_blockreg_read(struct i2c_client *client, u16 addr, + u8 *buf, int count) +{ + struct i2c_msg xfer[DA9063_PAGED_READ_MSG_CNT]; + u8 page_sel_buf[DA9063_PAGE_SEL_BUF_SIZE]; + u8 page_num, paged_addr; + int ret; + + /* Determine page info based on register address */ + page_num = (addr / DA9063_REG_PAGE_SIZE); + if (page_num > 1) { + dev_err(&client->dev, "Invalid register address provided\n"); + return -EINVAL; + } + + paged_addr = (addr % DA9063_REG_PAGE_SIZE) & DA9063_REG_PAGED_ADDR_MASK; + page_sel_buf[DA9063_PAGE_SEL_BUF_PAGE_REG] = DA9063_REG_PAGE_CON; + page_sel_buf[DA9063_PAGE_SEL_BUF_PAGE_VAL] = + (page_num << DA9063_I2C_PAGE_SEL_SHIFT) & DA9063_REG_PAGE_MASK; + + /* Write reg address, page selection */ + xfer[DA9063_PAGED_READ_MSG_PAGE_SEL].addr = client->addr; + xfer[DA9063_PAGED_READ_MSG_PAGE_SEL].flags = 0; + xfer[DA9063_PAGED_READ_MSG_PAGE_SEL].len = DA9063_PAGE_SEL_BUF_SIZE; + xfer[DA9063_PAGED_READ_MSG_PAGE_SEL].buf = page_sel_buf; + + /* Select register address */ + xfer[DA9063_PAGED_READ_MSG_REG_SEL].addr = client->addr; + xfer[DA9063_PAGED_READ_MSG_REG_SEL].flags = 0; + xfer[DA9063_PAGED_READ_MSG_REG_SEL].len = sizeof(paged_addr); + xfer[DA9063_PAGED_READ_MSG_REG_SEL].buf = &paged_addr; + + /* Read data */ + xfer[DA9063_PAGED_READ_MSG_DATA].addr = client->addr; + xfer[DA9063_PAGED_READ_MSG_DATA].flags = I2C_M_RD; + xfer[DA9063_PAGED_READ_MSG_DATA].len = count; + xfer[DA9063_PAGED_READ_MSG_DATA].buf = buf; + + ret = i2c_transfer(client->adapter, xfer, DA9063_PAGED_READ_MSG_CNT); + if (ret < 0) { + dev_err(&client->dev, "Paged block read failed: %d\n", ret); + return ret; + } + + if (ret != DA9063_PAGED_READ_MSG_CNT) { + dev_err(&client->dev, "Paged block read failed to complete\n"); + return -EIO; + } + + return 0; +} + +enum { + DA9063_DEV_ID_REG = 0, + DA9063_VAR_ID_REG, + DA9063_CHIP_ID_REGS, +}; + +static int da9063_get_device_type(struct i2c_client *i2c, struct da9063 *da9063) +{ + u8 buf[DA9063_CHIP_ID_REGS]; + int ret; + + ret = da9063_i2c_blockreg_read(i2c, DA9063_REG_DEVICE_ID, buf, + DA9063_CHIP_ID_REGS); + if (ret) + return ret; + + if (buf[DA9063_DEV_ID_REG] != PMIC_CHIP_ID_DA9063) { + dev_err(da9063->dev, + "Invalid chip device ID: 0x%02x\n", + buf[DA9063_DEV_ID_REG]); + return -ENODEV; + } + + dev_info(da9063->dev, + "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n", + buf[DA9063_DEV_ID_REG], buf[DA9063_VAR_ID_REG]); + + da9063->variant_code = + (buf[DA9063_VAR_ID_REG] & DA9063_VARIANT_ID_MRC_MASK) + >> DA9063_VARIANT_ID_MRC_SHIFT; + + return 0; +} + +/* + * Variant specific regmap configs + */ + static const struct regmap_range da9063_ad_readable_ranges[] = { regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_AD_REG_SECOND_D), regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), regmap_reg_range(DA9063_REG_T_OFFSET, DA9063_AD_REG_GP_ID_19), - regmap_reg_range(DA9063_REG_CHIP_ID, DA9063_REG_CHIP_VARIANT), + regmap_reg_range(DA9063_REG_DEVICE_ID, DA9063_REG_VARIANT_ID), }; static const struct regmap_range da9063_ad_writeable_ranges[] = { @@ -72,7 +184,7 @@ static const struct regmap_range da9063_bb_readable_ranges[] = { regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), regmap_reg_range(DA9063_REG_T_OFFSET, DA9063_BB_REG_GP_ID_19), - regmap_reg_range(DA9063_REG_CHIP_ID, DA9063_REG_CHIP_VARIANT), + regmap_reg_range(DA9063_REG_DEVICE_ID, DA9063_REG_VARIANT_ID), }; static const struct regmap_range da9063_bb_writeable_ranges[] = { @@ -117,7 +229,7 @@ static const struct regmap_range da9063l_bb_readable_ranges[] = { regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), regmap_reg_range(DA9063_REG_T_OFFSET, DA9063_BB_REG_GP_ID_19), - regmap_reg_range(DA9063_REG_CHIP_ID, DA9063_REG_CHIP_VARIANT), + regmap_reg_range(DA9063_REG_DEVICE_ID, DA9063_REG_VARIANT_ID), }; static const struct regmap_range da9063l_bb_writeable_ranges[] = { @@ -159,7 +271,7 @@ static const struct regmap_access_table da9063l_bb_volatile_table = { static const struct regmap_range_cfg da9063_range_cfg[] = { { .range_min = DA9063_REG_PAGE_CON, - .range_max = DA9063_REG_CHIP_VARIANT, + .range_max = DA9063_REG_CONFIG_ID, .selector_reg = DA9063_REG_PAGE_CON, .selector_mask = 1 << DA9063_I2C_PAGE_SEL_SHIFT, .selector_shift = DA9063_I2C_PAGE_SEL_SHIFT, @@ -173,7 +285,7 @@ static struct regmap_config da9063_regmap_config = { .val_bits = 8, .ranges = da9063_range_cfg, .num_ranges = ARRAY_SIZE(da9063_range_cfg), - .max_register = DA9063_REG_CHIP_VARIANT, + .max_register = DA9063_REG_CONFIG_ID, .cache_type = REGCACHE_RBTREE, }; @@ -199,18 +311,56 @@ static int da9063_i2c_probe(struct i2c_client *i2c, da9063->chip_irq = i2c->irq; da9063->type = id->driver_data; - if (da9063->variant_code == PMIC_DA9063_AD) { - da9063_regmap_config.rd_table = &da9063_ad_readable_table; - da9063_regmap_config.wr_table = &da9063_ad_writeable_table; - da9063_regmap_config.volatile_table = &da9063_ad_volatile_table; - } else if (da9063->type == PMIC_TYPE_DA9063L) { - da9063_regmap_config.rd_table = &da9063l_bb_readable_table; - da9063_regmap_config.wr_table = &da9063l_bb_writeable_table; - da9063_regmap_config.volatile_table = &da9063l_bb_volatile_table; - } else { - da9063_regmap_config.rd_table = &da9063_bb_readable_table; - da9063_regmap_config.wr_table = &da9063_bb_writeable_table; - da9063_regmap_config.volatile_table = &da9063_bb_volatile_table; + ret = da9063_get_device_type(i2c, da9063); + if (ret) + return ret; + + switch (da9063->type) { + case PMIC_TYPE_DA9063: + switch (da9063->variant_code) { + case PMIC_DA9063_AD: + da9063_regmap_config.rd_table = + &da9063_ad_readable_table; + da9063_regmap_config.wr_table = + &da9063_ad_writeable_table; + da9063_regmap_config.volatile_table = + &da9063_ad_volatile_table; + break; + case PMIC_DA9063_BB: + case PMIC_DA9063_CA: + da9063_regmap_config.rd_table = + &da9063_bb_readable_table; + da9063_regmap_config.wr_table = + &da9063_bb_writeable_table; + da9063_regmap_config.volatile_table = + &da9063_bb_volatile_table; + break; + default: + dev_err(da9063->dev, + "Chip variant not supported for DA9063\n"); + return -ENODEV; + } + break; + case PMIC_TYPE_DA9063L: + switch (da9063->variant_code) { + case PMIC_DA9063_BB: + case PMIC_DA9063_CA: + da9063_regmap_config.rd_table = + &da9063l_bb_readable_table; + da9063_regmap_config.wr_table = + &da9063l_bb_writeable_table; + da9063_regmap_config.volatile_table = + &da9063l_bb_volatile_table; + break; + default: + dev_err(da9063->dev, + "Chip variant not supported for DA9063L\n"); + return -ENODEV; + } + break; + default: + dev_err(da9063->dev, "Chip type not supported\n"); + return -ENODEV; } da9063->regmap = devm_regmap_init_i2c(i2c, &da9063_regmap_config); diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index ba706b0e28c2..1dbabf1b3cb8 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -292,8 +292,10 @@ #define DA9063_BB_REG_GP_ID_19 0x134 /* Chip ID and variant */ -#define DA9063_REG_CHIP_ID 0x181 -#define DA9063_REG_CHIP_VARIANT 0x182 +#define DA9063_REG_DEVICE_ID 0x181 +#define DA9063_REG_VARIANT_ID 0x182 +#define DA9063_REG_CUSTOMER_ID 0x183 +#define DA9063_REG_CONFIG_ID 0x184 /* * PMIC registers bits @@ -929,9 +931,6 @@ #define DA9063_RTC_CLOCK 0x40 #define DA9063_OUT_32K_EN 0x80 -/* DA9063_REG_CHIP_VARIANT */ -#define DA9063_CHIP_VARIANT_SHIFT 4 - /* DA9063_REG_BUCK_ILIM_A (addr=0x9A) */ #define DA9063_BIO_ILIM_MASK 0x0F #define DA9063_BMEM_ILIM_MASK 0xF0 @@ -1065,4 +1064,10 @@ #define DA9063_MON_A10_IDX_LDO9 0x04 #define DA9063_MON_A10_IDX_LDO10 0x05 +/* DA9063_REG_VARIANT_ID (addr=0x182) */ +#define DA9063_VARIANT_ID_VRC_SHIFT 0 +#define DA9063_VARIANT_ID_VRC_MASK 0x0F +#define DA9063_VARIANT_ID_MRC_SHIFT 4 +#define DA9063_VARIANT_ID_MRC_MASK 0xF0 + #endif /* _DA9063_REG_H */ -- cgit From 9ece3601aed46f7b460b79cd7d60908b47b2b0d4 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Mon, 13 Jul 2020 10:38:59 +0100 Subject: mfd: da9063: Add support for latest DA silicon revision This update adds new regmap tables to support the latest DA silicon which will automatically be selected based on the chip and variant information read from the device. Signed-off-by: Adam Thomson Signed-off-by: Lee Jones --- drivers/mfd/da9063-i2c.c | 91 ++++++++++++++++++++++++++++++++++++----- include/linux/mfd/da9063/core.h | 1 + 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c index 481548948bc0..b8217ad303ce 100644 --- a/drivers/mfd/da9063-i2c.c +++ b/drivers/mfd/da9063-i2c.c @@ -197,7 +197,7 @@ static const struct regmap_range da9063_bb_writeable_ranges[] = { regmap_reg_range(DA9063_BB_REG_GP_ID_0, DA9063_BB_REG_GP_ID_19), }; -static const struct regmap_range da9063_bb_volatile_ranges[] = { +static const struct regmap_range da9063_bb_da_volatile_ranges[] = { regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_REG_EVENT_D), regmap_reg_range(DA9063_REG_CONTROL_A, DA9063_REG_CONTROL_B), regmap_reg_range(DA9063_REG_CONTROL_E, DA9063_REG_CONTROL_F), @@ -219,9 +219,9 @@ static const struct regmap_access_table da9063_bb_writeable_table = { .n_yes_ranges = ARRAY_SIZE(da9063_bb_writeable_ranges), }; -static const struct regmap_access_table da9063_bb_volatile_table = { - .yes_ranges = da9063_bb_volatile_ranges, - .n_yes_ranges = ARRAY_SIZE(da9063_bb_volatile_ranges), +static const struct regmap_access_table da9063_bb_da_volatile_table = { + .yes_ranges = da9063_bb_da_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_bb_da_volatile_ranges), }; static const struct regmap_range da9063l_bb_readable_ranges[] = { @@ -241,7 +241,7 @@ static const struct regmap_range da9063l_bb_writeable_ranges[] = { regmap_reg_range(DA9063_BB_REG_GP_ID_0, DA9063_BB_REG_GP_ID_19), }; -static const struct regmap_range da9063l_bb_volatile_ranges[] = { +static const struct regmap_range da9063l_bb_da_volatile_ranges[] = { regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_REG_EVENT_D), regmap_reg_range(DA9063_REG_CONTROL_A, DA9063_REG_CONTROL_B), regmap_reg_range(DA9063_REG_CONTROL_E, DA9063_REG_CONTROL_F), @@ -263,9 +263,64 @@ static const struct regmap_access_table da9063l_bb_writeable_table = { .n_yes_ranges = ARRAY_SIZE(da9063l_bb_writeable_ranges), }; -static const struct regmap_access_table da9063l_bb_volatile_table = { - .yes_ranges = da9063l_bb_volatile_ranges, - .n_yes_ranges = ARRAY_SIZE(da9063l_bb_volatile_ranges), +static const struct regmap_access_table da9063l_bb_da_volatile_table = { + .yes_ranges = da9063l_bb_da_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063l_bb_da_volatile_ranges), +}; + +static const struct regmap_range da9063_da_readable_ranges[] = { + regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_BB_REG_SECOND_D), + regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), + regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), + regmap_reg_range(DA9063_REG_T_OFFSET, DA9063_BB_REG_GP_ID_11), + regmap_reg_range(DA9063_REG_DEVICE_ID, DA9063_REG_VARIANT_ID), +}; + +static const struct regmap_range da9063_da_writeable_ranges[] = { + regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_REG_PAGE_CON), + regmap_reg_range(DA9063_REG_FAULT_LOG, DA9063_REG_VSYS_MON), + regmap_reg_range(DA9063_REG_COUNT_S, DA9063_BB_REG_ALARM_Y), + regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), + regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), + regmap_reg_range(DA9063_REG_CONFIG_I, DA9063_BB_REG_MON_REG_4), + regmap_reg_range(DA9063_BB_REG_GP_ID_0, DA9063_BB_REG_GP_ID_11), +}; + +static const struct regmap_access_table da9063_da_readable_table = { + .yes_ranges = da9063_da_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_da_readable_ranges), +}; + +static const struct regmap_access_table da9063_da_writeable_table = { + .yes_ranges = da9063_da_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_da_writeable_ranges), +}; + +static const struct regmap_range da9063l_da_readable_ranges[] = { + regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_REG_MON_A10_RES), + regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), + regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), + regmap_reg_range(DA9063_REG_T_OFFSET, DA9063_BB_REG_GP_ID_11), + regmap_reg_range(DA9063_REG_DEVICE_ID, DA9063_REG_VARIANT_ID), +}; + +static const struct regmap_range da9063l_da_writeable_ranges[] = { + regmap_reg_range(DA9063_REG_PAGE_CON, DA9063_REG_PAGE_CON), + regmap_reg_range(DA9063_REG_FAULT_LOG, DA9063_REG_VSYS_MON), + regmap_reg_range(DA9063_REG_SEQ, DA9063_REG_ID_32_31), + regmap_reg_range(DA9063_REG_SEQ_A, DA9063_REG_AUTO3_LOW), + regmap_reg_range(DA9063_REG_CONFIG_I, DA9063_BB_REG_MON_REG_4), + regmap_reg_range(DA9063_BB_REG_GP_ID_0, DA9063_BB_REG_GP_ID_11), +}; + +static const struct regmap_access_table da9063l_da_readable_table = { + .yes_ranges = da9063l_da_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063l_da_readable_ranges), +}; + +static const struct regmap_access_table da9063l_da_writeable_table = { + .yes_ranges = da9063l_da_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063l_da_writeable_ranges), }; static const struct regmap_range_cfg da9063_range_cfg[] = { @@ -333,7 +388,15 @@ static int da9063_i2c_probe(struct i2c_client *i2c, da9063_regmap_config.wr_table = &da9063_bb_writeable_table; da9063_regmap_config.volatile_table = - &da9063_bb_volatile_table; + &da9063_bb_da_volatile_table; + break; + case PMIC_DA9063_DA: + da9063_regmap_config.rd_table = + &da9063_da_readable_table; + da9063_regmap_config.wr_table = + &da9063_da_writeable_table; + da9063_regmap_config.volatile_table = + &da9063_bb_da_volatile_table; break; default: dev_err(da9063->dev, @@ -350,7 +413,15 @@ static int da9063_i2c_probe(struct i2c_client *i2c, da9063_regmap_config.wr_table = &da9063l_bb_writeable_table; da9063_regmap_config.volatile_table = - &da9063l_bb_volatile_table; + &da9063l_bb_da_volatile_table; + break; + case PMIC_DA9063_DA: + da9063_regmap_config.rd_table = + &da9063l_da_readable_table; + da9063_regmap_config.wr_table = + &da9063l_da_writeable_table; + da9063_regmap_config.volatile_table = + &da9063l_bb_da_volatile_table; break; default: dev_err(da9063->dev, diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 5cd06ab26352..fa7a43f02f27 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -35,6 +35,7 @@ enum da9063_variant_codes { PMIC_DA9063_AD = 0x3, PMIC_DA9063_BB = 0x5, PMIC_DA9063_CA = 0x6, + PMIC_DA9063_DA = 0x7, }; /* Interrupts */ -- cgit From 85c307850784eaa0f81713edf707e6f2e4d64eb3 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Wed, 8 Jul 2020 15:19:36 +0800 Subject: mfd: axp20x: Allow the AXP803 to be probed by I2C The AXP803 can be used both using the RSB proprietary bus, or a more traditional I2C bus. Let's add that possibility. Signed-off-by: Frank Lee Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- drivers/mfd/axp20x-i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/axp20x-i2c.c b/drivers/mfd/axp20x-i2c.c index 3c930316d48b..068e9c083f13 100644 --- a/drivers/mfd/axp20x-i2c.c +++ b/drivers/mfd/axp20x-i2c.c @@ -63,6 +63,7 @@ static const struct of_device_id axp20x_i2c_of_match[] = { { .compatible = "x-powers,axp209", .data = (void *)AXP209_ID }, { .compatible = "x-powers,axp221", .data = (void *)AXP221_ID }, { .compatible = "x-powers,axp223", .data = (void *)AXP223_ID }, + { .compatible = "x-powers,axp803", .data = (void *)AXP803_ID }, { .compatible = "x-powers,axp806", .data = (void *)AXP806_ID }, { }, }; @@ -74,6 +75,7 @@ static const struct i2c_device_id axp20x_i2c_id[] = { { "axp209", 0 }, { "axp221", 0 }, { "axp223", 0 }, + { "axp803", 0 }, { "axp806", 0 }, { }, }; -- cgit From 564de7628dab38f2d51bac2608fe1043704622b7 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 17 Jul 2020 13:41:54 +1000 Subject: mfd: kempld-core: Fix 'assignment of read-only location' error drivers/mfd/kempld-core.c: In function 'kempld_register_cells_generic': drivers/mfd/kempld-core.c:105:13: error: assignment of read-only location 'devs[i++]' 105 | devs[i++] = kempld_devs[KEMPLD_I2C]; | ^ drivers/mfd/kempld-core.c:108:13: error: assignment of read-only location 'devs[i++]' 108 | devs[i++] = kempld_devs[KEMPLD_WDT]; | ^ drivers/mfd/kempld-core.c:111:13: error: assignment of read-only location 'devs[i++]' 111 | devs[i++] = kempld_devs[KEMPLD_GPIO]; | ^ drivers/mfd/kempld-core.c:114:13: error: assignment of read-only location 'devs[i++]' 114 | devs[i++] = kempld_devs[KEMPLD_UART]; | ^ Fixes: e49aa9a9bd22 ("mfd: core: Make a best effort attempt to match devices with the correct of_nodes") Signed-off-by: Stephen Rothwell Signed-off-by: Lee Jones --- drivers/mfd/kempld-core.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index f48e21d8b97c..52bec01149e5 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -79,39 +79,31 @@ enum kempld_cells { KEMPLD_UART, }; -static const struct mfd_cell kempld_devs[] = { - [KEMPLD_I2C] = { - .name = "kempld-i2c", - }, - [KEMPLD_WDT] = { - .name = "kempld-wdt", - }, - [KEMPLD_GPIO] = { - .name = "kempld-gpio", - }, - [KEMPLD_UART] = { - .name = "kempld-uart", - }, +static const char *kempld_dev_names[] = { + [KEMPLD_I2C] = "kempld-i2c", + [KEMPLD_WDT] = "kempld-wdt", + [KEMPLD_GPIO] = "kempld-gpio", + [KEMPLD_UART] = "kempld-uart", }; -#define KEMPLD_MAX_DEVS ARRAY_SIZE(kempld_devs) +#define KEMPLD_MAX_DEVS ARRAY_SIZE(kempld_dev_names) static int kempld_register_cells_generic(struct kempld_device_data *pld) { - struct mfd_cell devs[KEMPLD_MAX_DEVS]; + struct mfd_cell devs[KEMPLD_MAX_DEVS] = {}; int i = 0; if (pld->feature_mask & KEMPLD_FEATURE_BIT_I2C) - devs[i++] = kempld_devs[KEMPLD_I2C]; + devs[i++].name = kempld_dev_names[KEMPLD_I2C]; if (pld->feature_mask & KEMPLD_FEATURE_BIT_WATCHDOG) - devs[i++] = kempld_devs[KEMPLD_WDT]; + devs[i++].name = kempld_dev_names[KEMPLD_WDT]; if (pld->feature_mask & KEMPLD_FEATURE_BIT_GPIO) - devs[i++] = kempld_devs[KEMPLD_GPIO]; + devs[i++].name = kempld_dev_names[KEMPLD_GPIO]; if (pld->feature_mask & KEMPLD_FEATURE_MASK_UART) - devs[i++] = kempld_devs[KEMPLD_UART]; + devs[i++].name = kempld_dev_names[KEMPLD_UART]; return mfd_add_devices(pld->dev, -1, devs, i, NULL, 0, NULL); } -- cgit From cf84dc0bb40f4b41f7853f224b37954cf273ee24 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 21 Jul 2020 09:34:02 +0200 Subject: mfd: rn5t618: Make restart handler atomic safe The restart handler is executed during the shutdown phase which is atomic/irq-less. The i2c framework supports atomic transfers since commit 63b96983a5dd ("i2c: core: introduce callbacks for atomic transfers") to address this use case. Using i2c regmap in that situation is not allowed: [ 165.177465] [ BUG: Invalid wait context ] [ 165.181479] 5.8.0-rc3-00003-g0e9088558027-dirty #11 Not tainted [ 165.187400] ----------------------------- [ 165.191410] systemd-shutdow/1 is trying to lock: [ 165.196030] d85b4438 (rn5t618:170:(&rn5t618_regmap_config)->lock){+.+.}-{3:3}, at: regmap_update_bits_base+0x30/0x70 [ 165.206573] other info that might help us debug this: [ 165.211625] context-{4:4} [ 165.214248] 2 locks held by systemd-shutdow/1: [ 165.218691] #0: c131c47c (system_transition_mutex){+.+.}-{3:3}, at: __do_sys_reboot+0x90/0x204 [ 165.227405] #1: c131efb4 (rcu_read_lock){....}-{1:2}, at: __atomic_notifier_call_chain+0x0/0x118 [ 165.236288] stack backtrace: [ 165.239174] CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.8.0-rc3-00003-g0e9088558027-dirty #11 [ 165.248220] Hardware name: Freescale i.MX6 SoloLite (Device Tree) [ 165.254330] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 165.262084] [] (show_stack) from [] (dump_stack+0xe8/0x120) [ 165.269407] [] (dump_stack) from [] (__lock_acquire+0x81c/0x2ca0) [ 165.277246] [] (__lock_acquire) from [] (lock_acquire+0xe4/0x490) [ 165.285090] [] (lock_acquire) from [] (__mutex_lock+0x74/0x954) [ 165.292756] [] (__mutex_lock) from [] (mutex_lock_nested+0x1c/0x24) [ 165.300769] [] (mutex_lock_nested) from [] (regmap_update_bits_base+0x30/0x70) [ 165.309741] [] (regmap_update_bits_base) from [] (rn5t618_trigger_poweroff_sequence+0x34/0x64) [ 165.320097] [] (rn5t618_trigger_poweroff_sequence) from [] (rn5t618_restart+0xc/0x2c) [ 165.329669] [] (rn5t618_restart) from [] (notifier_call_chain+0x48/0x80) [ 165.338113] [] (notifier_call_chain) from [] (__atomic_notifier_call_chain+0x70/0x118) [ 165.347770] [] (__atomic_notifier_call_chain) from [] (atomic_notifier_call_chain+0x18/0x20) [ 165.357949] [] (atomic_notifier_call_chain) from [] (machine_restart+0x68/0x80) [ 165.367001] [] (machine_restart) from [] (__do_sys_reboot+0x11c/0x204) [ 165.375272] [] (__do_sys_reboot) from [] (ret_fast_syscall+0x0/0x28) [ 165.383364] Exception stack(0xd80a5fa8 to 0xd80a5ff0) [ 165.388420] 5fa0: 00406948 00000000 fee1dead 28121969 01234567 73299b00 [ 165.396602] 5fc0: 00406948 00000000 00000000 00000058 be91abc8 00000000 be91ab60 004056f8 [ 165.404781] 5fe0: 00000058 be91aabc b6ed4d45 b6e56746 Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones --- drivers/mfd/rn5t618.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index 232de50562f9..7497edf6ee41 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -77,7 +77,7 @@ static const struct regmap_irq_chip rc5t619_irq_chip = { .mask_invert = true, }; -static struct rn5t618 *rn5t618_pm_power_off; +static struct i2c_client *rn5t618_pm_power_off; static struct notifier_block rn5t618_restart_handler; static int rn5t618_irq_init(struct rn5t618 *rn5t618) @@ -110,13 +110,38 @@ static int rn5t618_irq_init(struct rn5t618 *rn5t618) static void rn5t618_trigger_poweroff_sequence(bool repower) { + int ret; + /* disable automatic repower-on */ - regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_REPCNT, - RN5T618_REPCNT_REPWRON, - repower ? RN5T618_REPCNT_REPWRON : 0); + ret = i2c_smbus_read_byte_data(rn5t618_pm_power_off, RN5T618_REPCNT); + if (ret < 0) + goto err; + + ret &= ~RN5T618_REPCNT_REPWRON; + if (repower) + ret |= RN5T618_REPCNT_REPWRON; + + ret = i2c_smbus_write_byte_data(rn5t618_pm_power_off, + RN5T618_REPCNT, (u8)ret); + if (ret < 0) + goto err; + /* start power-off sequence */ - regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_SLPCNT, - RN5T618_SLPCNT_SWPWROFF, RN5T618_SLPCNT_SWPWROFF); + ret = i2c_smbus_read_byte_data(rn5t618_pm_power_off, RN5T618_SLPCNT); + if (ret < 0) + goto err; + + ret |= RN5T618_SLPCNT_SWPWROFF; + + ret = i2c_smbus_write_byte_data(rn5t618_pm_power_off, + RN5T618_SLPCNT, (u8)ret); + if (ret < 0) + goto err; + + return; + +err: + dev_alert(&rn5t618_pm_power_off->dev, "Failed to shutdown (err = %d)\n", ret); } static void rn5t618_power_off(void) @@ -189,7 +214,7 @@ static int rn5t618_i2c_probe(struct i2c_client *i2c) return ret; } - rn5t618_pm_power_off = priv; + rn5t618_pm_power_off = i2c; if (of_device_is_system_power_controller(i2c->dev.of_node)) { if (!pm_power_off) pm_power_off = rn5t618_power_off; @@ -211,9 +236,7 @@ static int rn5t618_i2c_probe(struct i2c_client *i2c) static int rn5t618_i2c_remove(struct i2c_client *i2c) { - struct rn5t618 *priv = i2c_get_clientdata(i2c); - - if (priv == rn5t618_pm_power_off) { + if (i2c == rn5t618_pm_power_off) { rn5t618_pm_power_off = NULL; pm_power_off = NULL; } -- cgit From 23ef2b642b85f03a109fbf5958134a5e40193dbd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:29:17 -0700 Subject: mfd: da9055: pdata.h: Drop a duplicated word Drop the repeated word "that" in a comment. Signed-off-by: Randy Dunlap Acked-by: Adam Thomson Signed-off-by: Lee Jones --- include/linux/mfd/da9055/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index eac48e483190..d3f126990ad0 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -35,7 +35,7 @@ struct da9055_pdata { int *gpio_rsel; /* * Regulator mode control bits value (GPI offset) that - * that controls the regulator state, 0 if not available. + * controls the regulator state, 0 if not available. */ enum gpio_select *reg_ren; /* -- cgit From e7b85500885f2a70129f5d3a72153e23b37d0fe5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:29:31 -0700 Subject: mfd: max77693-private: Drop a duplicated word Drop the repeated word "in" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index e798c81aec31..311f7d3d2323 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -131,7 +131,7 @@ enum max77693_pmic_reg { #define FLASH_INT_FLED1_SHORT BIT(3) #define FLASH_INT_OVER_CURRENT BIT(4) -/* Fast charge timer in in hours */ +/* Fast charge timer in hours */ #define DEFAULT_FAST_CHARGE_TIMER 4 /* microamps */ #define DEFAULT_TOP_OFF_THRESHOLD_CURRENT 150000 -- cgit From 594f1935b373019aacaff00929ae61fec0ec83a5 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 17 Jul 2020 23:00:02 +0200 Subject: mfd: rn5t618: Fix caching of battery related registers Battery status changes dynamically, so the corresponding registers need to be considered volatile. Affected registers are: - fuel gauge - battery status - battery interrupt Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones --- drivers/mfd/rn5t618.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index 7497edf6ee41..e25407ed3ad4 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -44,6 +44,9 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) case RN5T618_INTMON: case RN5T618_RTC_CTRL1 ... RN5T618_RTC_CTRL2: case RN5T618_RTC_SECONDS ... RN5T618_RTC_YEAR: + case RN5T618_CHGSTATE: + case RN5T618_CHGCTRL_IRR ... RN5T618_CHGERR_MONI: + case RN5T618_CONTROL ... RN5T618_CC_AVEREG0: return true; default: return false; -- cgit From 1f0fa85c07fb53d756219cf7e6e8e77eab6e75eb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jul 2020 17:15:41 +0100 Subject: mfd: max14577: Remove redundant initialization of variable current_bits The variable current_bits is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- drivers/mfd/max14577.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c index fd8864cafd25..be185e9d5f16 100644 --- a/drivers/mfd/max14577.c +++ b/drivers/mfd/max14577.c @@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(maxim_charger_currents); int maxim_charger_calc_reg_current(const struct maxim_charger_current *limits, unsigned int min_ua, unsigned int max_ua, u8 *dst) { - unsigned int current_bits = 0xf; + unsigned int current_bits; if (min_ua > max_ua) return -EINVAL; -- cgit From 4ee1d9dc8073acd40f04e5fe3771a5dff978d077 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 27 Jul 2020 03:04:07 +0000 Subject: mfd: intel_soc_pmic_mrfld: Simplify the return expression of intel_scu_ipc_dev_iowrite8() Simplify the return expression. Signed-off-by: Xu Wang Signed-off-by: Lee Jones --- drivers/mfd/intel_soc_pmic_mrfld.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_mrfld.c b/drivers/mfd/intel_soc_pmic_mrfld.c index bd94c989d232..71da861e8c27 100644 --- a/drivers/mfd/intel_soc_pmic_mrfld.c +++ b/drivers/mfd/intel_soc_pmic_mrfld.c @@ -91,13 +91,8 @@ static int bcove_ipc_byte_reg_write(void *context, unsigned int reg, { struct intel_soc_pmic *pmic = context; u8 ipc_in = val; - int ret; - ret = intel_scu_ipc_dev_iowrite8(pmic->scu, reg, ipc_in); - if (ret) - return ret; - - return 0; + return intel_scu_ipc_dev_iowrite8(pmic->scu, reg, ipc_in); } static const struct regmap_config bcove_regmap_config = { -- cgit From 114294d276279d6cda81f9c685452239ea89cdb8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jul 2020 11:54:58 +0100 Subject: mfd: mfd-core: Add mechanism for removal of a subset of children Currently, the only way to remove MFD children is with a call to mfd_remove_devices, which will remove all the children. Under some circumstances it is useful to remove only a subset of the child devices. For example if some additional clean up is required between removal of certain child devices. To accomplish this a level field is added to mfd_cell, the normal mfd_remove_devices is modified to not remove devices that are set to a higher level and a corresponding mfd_remove_devices_late function is added to remove those children. See further discussion at: https://lore.kernel.org/lkml/20200616075834.GF2608702@dell/ Suggested-by: Lee Jones Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 16 +++++++++++++++- include/linux/mfd/core.h | 5 +++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index b201842f82ad..c3651f06684f 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -356,6 +356,7 @@ static int mfd_remove_devices_fn(struct device *dev, void *data) { struct platform_device *pdev; const struct mfd_cell *cell; + int *level = data; if (dev->type != &mfd_dev_type) return 0; @@ -363,6 +364,9 @@ static int mfd_remove_devices_fn(struct device *dev, void *data) pdev = to_platform_device(dev); cell = mfd_get_cell(pdev); + if (level && cell->level > *level) + return 0; + regulator_bulk_unregister_supply_alias(dev, cell->parent_supplies, cell->num_parent_supplies); @@ -372,9 +376,19 @@ static int mfd_remove_devices_fn(struct device *dev, void *data) return 0; } +void mfd_remove_devices_late(struct device *parent) +{ + int level = MFD_DEP_LEVEL_HIGH; + + device_for_each_child_reverse(parent, &level, mfd_remove_devices_fn); +} +EXPORT_SYMBOL(mfd_remove_devices_late); + void mfd_remove_devices(struct device *parent) { - device_for_each_child_reverse(parent, NULL, mfd_remove_devices_fn); + int level = MFD_DEP_LEVEL_NORMAL; + + device_for_each_child_reverse(parent, &level, mfd_remove_devices_fn); } EXPORT_SYMBOL(mfd_remove_devices); diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 6d68f44a26a1..4b35baa14d30 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -46,6 +46,9 @@ #define MFD_CELL_NAME(_name) \ MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, 0, false, NULL) +#define MFD_DEP_LEVEL_NORMAL 0 +#define MFD_DEP_LEVEL_HIGH 1 + struct irq_domain; struct property_entry; @@ -63,6 +66,7 @@ struct mfd_cell_acpi_match { struct mfd_cell { const char *name; int id; + int level; int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); @@ -150,6 +154,7 @@ static inline int mfd_add_hotplug_devices(struct device *parent, } extern void mfd_remove_devices(struct device *parent); +extern void mfd_remove_devices_late(struct device *parent); extern int devm_mfd_add_devices(struct device *dev, int id, const struct mfd_cell *cells, int n_devs, -- cgit From 77b3ddab713a2276c789a8e1beb339ce185cebc2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jul 2020 11:54:59 +0100 Subject: mfd: madera: Improve handling of regulator unbinding The current unbinding process for Madera has some issues. The trouble is runtime PM is disabled as the first step of the process, but some of the drivers release IRQs causing regmap IRQ to issue a runtime get which fails. To allow runtime PM to remain enabled during mfd_remove_devices, the DCVDD regulator must remain available. In the case of external DCVDD's this is simple, the regulator can simply be disabled/put after the call to mfd_remove_devices. However, in the case of an internally supplied DCVDD the regulator needs to be released after the other MFD children depending on it. Use the new MFD mfd_remove_devices_late functionality to split the DCVDD regulator off from the other drivers. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/madera-core.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/madera-core.c b/drivers/mfd/madera-core.c index 4724c1a01a39..8a8d733fdce5 100644 --- a/drivers/mfd/madera-core.c +++ b/drivers/mfd/madera-core.c @@ -44,7 +44,10 @@ static const char * const madera_core_supplies[] = { }; static const struct mfd_cell madera_ldo1_devs[] = { - { .name = "madera-ldo1", }, + { + .name = "madera-ldo1", + .level = MFD_DEP_LEVEL_HIGH, + }, }; static const char * const cs47l15_supplies[] = { @@ -743,18 +746,22 @@ int madera_dev_exit(struct madera *madera) /* Prevent any IRQs being serviced while we clean up */ disable_irq(madera->irq); - /* - * DCVDD could be supplied by a child node, we must disable it before - * removing the children, and prevent PM runtime from turning it back on - */ - pm_runtime_disable(madera->dev); + pm_runtime_get_sync(madera->dev); - clk_disable_unprepare(madera->mclk[MADERA_MCLK2].clk); + mfd_remove_devices(madera->dev); + + pm_runtime_disable(madera->dev); regulator_disable(madera->dcvdd); regulator_put(madera->dcvdd); - mfd_remove_devices(madera->dev); + mfd_remove_devices_late(madera->dev); + + pm_runtime_set_suspended(madera->dev); + pm_runtime_put_noidle(madera->dev); + + clk_disable_unprepare(madera->mclk[MADERA_MCLK2].clk); + madera_enable_hard_reset(madera); regulator_bulk_disable(madera->num_core_supplies, -- cgit From 3d858942250820b9adc35f963a257481d6d4c81d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Jul 2020 16:02:46 +0300 Subject: mfd: dln2: Run event handler loop under spinlock The event handler loop must be run with interrupts disabled. Otherwise we will have a warning: [ 1970.785649] irq 31 handler lineevent_irq_handler+0x0/0x20 enabled interrupts [ 1970.792739] WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x162/0x170 [ 1970.860732] RIP: 0010:__handle_irq_event_percpu+0x162/0x170 ... [ 1970.946994] Call Trace: [ 1970.949446] [ 1970.951471] handle_irq_event_percpu+0x2c/0x80 [ 1970.955921] handle_irq_event+0x23/0x43 [ 1970.959766] handle_simple_irq+0x57/0x70 [ 1970.963695] generic_handle_irq+0x42/0x50 [ 1970.967717] dln2_rx+0xc1/0x210 [dln2] [ 1970.971479] ? usb_hcd_unmap_urb_for_dma+0xa6/0x1c0 [ 1970.976362] __usb_hcd_giveback_urb+0x77/0xe0 [ 1970.980727] usb_giveback_urb_bh+0x8e/0xe0 [ 1970.984837] tasklet_action_common.isra.0+0x4a/0xe0 ... Recently xHCI driver switched to tasklets in the commit 36dc01657b49 ("usb: host: xhci: Support running urb giveback in tasklet context"). The handle_irq_event_* functions are expected to be called with interrupts disabled and they rightfully complain here because we run in tasklet context with interrupts enabled. Use a event spinlock to protect event handler from being interrupted. Note, that there are only two users of this GPIO and ADC drivers and both of them are using generic_handle_irq() which makes above happen. Fixes: 338a12814297 ("mfd: Add support for Diolan DLN-2 devices") Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/dln2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 39276fa626d2..83e676a096dc 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -287,7 +287,11 @@ static void dln2_rx(struct urb *urb) len = urb->actual_length - sizeof(struct dln2_header); if (handle == DLN2_HANDLE_EVENT) { + unsigned long flags; + + spin_lock_irqsave(&dln2->event_cb_lock, flags); dln2_run_event_callbacks(dln2, id, echo, data, len); + spin_unlock_irqrestore(&dln2->event_cb_lock, flags); } else { /* URB will be re-submitted in _dln2_transfer (free_rx_slot) */ if (dln2_transfer_complete(dln2, urb, handle, echo)) -- cgit From 4f4ed4543e2096dc0158ff79bd6d8bc09e27fa93 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 22 Jul 2020 21:24:54 +0200 Subject: mfd: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Acked-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/twl-family.txt | 2 +- drivers/mfd/hi6421-pmic-core.c | 2 +- drivers/mfd/lp873x.c | 2 +- drivers/mfd/lp87565.c | 2 +- drivers/mfd/omap-usb-host.c | 2 +- drivers/mfd/omap-usb-tll.c | 2 +- drivers/mfd/ti_am335x_tscadc.c | 2 +- drivers/mfd/tps65086.c | 2 +- drivers/mfd/tps65217.c | 2 +- drivers/mfd/tps65218.c | 2 +- drivers/mfd/tps65912-core.c | 2 +- drivers/mfd/tps65912-i2c.c | 2 +- drivers/mfd/tps65912-spi.c | 2 +- include/linux/mfd/hi6421-pmic.h | 2 +- include/linux/mfd/lp873x.h | 2 +- include/linux/mfd/lp87565.h | 2 +- include/linux/mfd/ti_am335x_tscadc.h | 2 +- include/linux/mfd/tps65086.h | 2 +- include/linux/mfd/tps65217.h | 2 +- include/linux/mfd/tps65218.h | 2 +- include/linux/mfd/tps65912.h | 2 +- 21 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/twl-family.txt b/Documentation/devicetree/bindings/mfd/twl-family.txt index 56f244b5d8a4..c2f9302965de 100644 --- a/Documentation/devicetree/bindings/mfd/twl-family.txt +++ b/Documentation/devicetree/bindings/mfd/twl-family.txt @@ -26,7 +26,7 @@ Optional node: Example: /* * Integrated Power Management Chip - * http://www.ti.com/lit/ds/symlink/twl6030.pdf + * https://www.ti.com/lit/ds/symlink/twl6030.pdf */ twl@48 { compatible = "ti,twl6030"; diff --git a/drivers/mfd/hi6421-pmic-core.c b/drivers/mfd/hi6421-pmic-core.c index edfc172b8607..eba88b80d969 100644 --- a/drivers/mfd/hi6421-pmic-core.c +++ b/drivers/mfd/hi6421-pmic-core.c @@ -5,7 +5,7 @@ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd. * http://www.hisilicon.com * Copyright (c) <2013-2017> Linaro Ltd. - * http://www.linaro.org + * https://www.linaro.org * * Author: Guodong Xu */ diff --git a/drivers/mfd/lp873x.c b/drivers/mfd/lp873x.c index 873c608e6a5d..858c9e0a49a4 100644 --- a/drivers/mfd/lp873x.c +++ b/drivers/mfd/lp873x.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2016 Texas Instruments Incorporated - https://www.ti.com/ * * Author: Keerthy * diff --git a/drivers/mfd/lp87565.c b/drivers/mfd/lp87565.c index 4a5c8ade4ae0..2268be9113f1 100644 --- a/drivers/mfd/lp87565.c +++ b/drivers/mfd/lp87565.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/ * * Author: Keerthy */ diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index aca5a160c1b2..1e6431cb8536 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -2,7 +2,7 @@ /** * omap-usb-host.c - The USBHS core driver for OMAP EHCI & OHCI * - * Copyright (C) 2011-2013 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2011-2013 Texas Instruments Incorporated - https://www.ti.com * Author: Keshava Munegowda * Author: Roger Quadros */ diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c index 04a444007cf4..16fad79c73f1 100644 --- a/drivers/mfd/omap-usb-tll.c +++ b/drivers/mfd/omap-usb-tll.c @@ -2,7 +2,7 @@ /** * omap-usb-tll.c - The USB TLL driver for OMAP EHCI & OHCI * - * Copyright (C) 2012-2013 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012-2013 Texas Instruments Incorporated - https://www.ti.com * Author: Keshava Munegowda * Author: Roger Quadros */ diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 926c289cb040..0e6e25308190 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -1,7 +1,7 @@ /* * TI Touch Screen / ADC MFD driver * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/drivers/mfd/tps65086.c b/drivers/mfd/tps65086.c index 43119a6867fe..341466ef20cc 100644 --- a/drivers/mfd/tps65086.c +++ b/drivers/mfd/tps65086.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c index 923602599549..2d9c282ec917 100644 --- a/drivers/mfd/tps65217.c +++ b/drivers/mfd/tps65217.c @@ -3,7 +3,7 @@ * * TPS65217 chip family multi-function driver * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index d41dd864b472..167e9fc308ef 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -1,7 +1,7 @@ /* * Driver for TPS65218 Integrated power management chipsets * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 as diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c index f33567bc428d..b55b1d5d6955 100644 --- a/drivers/mfd/tps65912-core.c +++ b/drivers/mfd/tps65912-core.c @@ -1,7 +1,7 @@ /* * Core functions for TI TPS65912x PMICs * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/drivers/mfd/tps65912-i2c.c b/drivers/mfd/tps65912-i2c.c index 785d19f6f7c9..f7c22ea7b36c 100644 --- a/drivers/mfd/tps65912-i2c.c +++ b/drivers/mfd/tps65912-i2c.c @@ -1,7 +1,7 @@ /* * I2C access driver for TI TPS65912x PMICs * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c index f78be039e463..21a8d6ac5c4a 100644 --- a/drivers/mfd/tps65912-spi.c +++ b/drivers/mfd/tps65912-spi.c @@ -1,7 +1,7 @@ /* * SPI access driver for TI TPS65912x PMICs * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/include/linux/mfd/hi6421-pmic.h b/include/linux/mfd/hi6421-pmic.h index bbc64484c021..2cadf8897c64 100644 --- a/include/linux/mfd/hi6421-pmic.h +++ b/include/linux/mfd/hi6421-pmic.h @@ -5,7 +5,7 @@ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd. * http://www.hisilicon.com * Copyright (c) <2013-2014> Linaro Ltd. - * http://www.linaro.org + * https://www.linaro.org * * Author: Guodong Xu */ diff --git a/include/linux/mfd/lp873x.h b/include/linux/mfd/lp873x.h index edbec8350a49..5546688c7da7 100644 --- a/include/linux/mfd/lp873x.h +++ b/include/linux/mfd/lp873x.h @@ -1,7 +1,7 @@ /* * Functions to access LP873X power management chip. * - * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2016 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h index ce965354bbad..43716aca46fa 100644 --- a/include/linux/mfd/lp87565.h +++ b/include/linux/mfd/lp87565.h @@ -2,7 +2,7 @@ /* * Functions to access LP87565 power management chip. * - * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef __LINUX_MFD_LP87565_H diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 483168403ae5..ffc091b77633 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -4,7 +4,7 @@ /* * TI Touch Screen / ADC MFD driver * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h index a228ae4c88d9..e0a417e53766 100644 --- a/include/linux/mfd/tps65086.h +++ b/include/linux/mfd/tps65086.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index b5dd108421c8..db7091824ed0 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -3,7 +3,7 @@ * * Functions to access TPS65217 power management chip. * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index b0470c35162d..f4ca367e3473 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -3,7 +3,7 @@ * * Functions to access TPS65219 power management chip. * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index b25d0297ba88..7943e413deae 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or -- cgit From e15d7f2b81d2e7d93115d46fa931b366c1cdebc2 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 27 Jul 2020 16:10:08 -0500 Subject: mfd: syscon: Use a unique name with regmap_config The DT node full name is currently being used in regmap_config which in turn is used to create the regmap debugfs directories. This name however is not guaranteed to be unique and the regmap debugfs registration can fail in the cases where the syscon nodes have the same unit-address but are present in different DT node hierarchies. Replace this logic using the syscon reg resource address instead (inspired from logic used while creating platform devices) to ensure a unique name is given for each syscon. Signed-off-by: Suman Anna Reviewed-by: Arnd Bergmann Signed-off-by: Lee Jones --- drivers/mfd/syscon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 3a97816d0cba..75859e492984 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -101,12 +101,14 @@ static struct syscon *of_syscon_register(struct device_node *np, bool check_clk) } } - syscon_config.name = of_node_full_name(np); + syscon_config.name = kasprintf(GFP_KERNEL, "%pOFn@%llx", np, + (u64)res.start); syscon_config.reg_stride = reg_io_width; syscon_config.val_bits = reg_io_width * 8; syscon_config.max_register = resource_size(&res) - reg_io_width; regmap = regmap_init_mmio(NULL, base, &syscon_config); + kfree(syscon_config.name); if (IS_ERR(regmap)) { pr_err("regmap init failed\n"); ret = PTR_ERR(regmap); -- cgit From 21dfbcd1f5cbff9cf2f9e7e43475aed8d072b0dd Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 12 Aug 2020 14:58:25 +0200 Subject: crypto: algif_aead - fix uninitialized ctx->init In skcipher_accept_parent_nokey() the whole af_alg_ctx structure is cleared by memset() after allocation, so add such memset() also to aead_accept_parent_nokey() so that the new "init" field is also initialized to zero. Without that the initial ctx->init checks might randomly return true and cause errors. While there, also remove the redundant zero assignments in both functions. Found via libkcapi testsuite. Cc: Stephan Mueller Fixes: f3c802a1f300 ("crypto: algif_aead - Only wake up when ctx->more is zero") Suggested-by: Herbert Xu Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 6 ------ crypto/algif_skcipher.c | 7 +------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index d48d2156e621..43c6aa784858 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -558,12 +558,6 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; - ctx->used = 0; - atomic_set(&ctx->rcvused, 0); - ctx->more = 0; - ctx->merge = 0; - ctx->enc = 0; - ctx->aead_assoclen = 0; crypto_init_wait(&ctx->wait); ask->private = ctx; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a51ba22fef58..81c4022285a7 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -333,6 +333,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; + memset(ctx, 0, len); ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(tfm), GFP_KERNEL); @@ -340,16 +341,10 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) sock_kfree_s(sk, ctx, len); return -ENOMEM; } - memset(ctx->iv, 0, crypto_skcipher_ivsize(tfm)); INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; - ctx->used = 0; - atomic_set(&ctx->rcvused, 0); - ctx->more = 0; - ctx->merge = 0; - ctx->enc = 0; crypto_init_wait(&ctx->wait); ask->private = ctx; -- cgit From 5a25de6df789cc805a9b8ba7ab5deef5067af47e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 13 Aug 2020 15:46:30 +0800 Subject: ALSA: echoaudio: Fix potential Oops in snd_echo_resume() Freeing chip on error may lead to an Oops at the next time the system goes to resume. Fix this by removing all snd_echo_free() calls on error. Fixes: 47b5d028fdce8 ("ALSA: Echoaudio - Add suspend support #2") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20200813074632.17022-1-dinghao.liu@zju.edu.cn Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 1b7a67ccabce..a20b2bb5c898 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -2212,7 +2212,6 @@ static int snd_echo_resume(struct device *dev) if (err < 0) { kfree(commpage_bak); dev_err(dev, "resume init_hw err=%d\n", err); - snd_echo_free(chip); return err; } @@ -2239,7 +2238,6 @@ static int snd_echo_resume(struct device *dev) if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { dev_err(chip->card->dev, "cannot grab irq\n"); - snd_echo_free(chip); return -EBUSY; } chip->irq = pci->irq; -- cgit From 5fa4e6f1c2d8c9a4e47e1931b42893172d388f2b Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 13 Aug 2020 09:21:09 +0300 Subject: xen/gntdev: Fix dmabuf import with non-zero sgt offset It is possible that the scatter-gather table during dmabuf import has non-zero offset of the data, but user-space doesn't expect that. Fix this by failing the import, so user-space doesn't access wrong data. Fixes: bf8dc55b1358 ("xen/gntdev: Implement dma-buf import functionality") Signed-off-by: Oleksandr Andrushchenko Acked-by: Juergen Gross Cc: Link: https://lore.kernel.org/r/20200813062113.11030-2-andr2000@gmail.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev-dmabuf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 75d3bb948bf3..b1b6eebafd5d 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -613,6 +613,14 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, goto fail_detach; } + /* Check that we have zero offset. */ + if (sgt->sgl->offset) { + ret = ERR_PTR(-EINVAL); + pr_debug("DMA buffer has %d bytes offset, user-space expects 0\n", + sgt->sgl->offset); + goto fail_unmap; + } + /* Check number of pages that imported buffer has. */ if (attach->dmabuf->size != gntdev_dmabuf->nr_pages << PAGE_SHIFT) { ret = ERR_PTR(-EINVAL); -- cgit From 14dee058610446aa464254fc5c8e88c7535195e0 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 13 Aug 2020 09:21:10 +0300 Subject: drm/xen-front: Fix misused IS_ERR_OR_NULL checks The patch c575b7eeb89f: "drm/xen-front: Add support for Xen PV display frontend" from Apr 3, 2018, leads to the following static checker warning: drivers/gpu/drm/xen/xen_drm_front_gem.c:140 xen_drm_front_gem_create() warn: passing zero to 'ERR_CAST' drivers/gpu/drm/xen/xen_drm_front_gem.c 133 struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, 134 size_t size) 135 { 136 struct xen_gem_object *xen_obj; 137 138 xen_obj = gem_create(dev, size); 139 if (IS_ERR_OR_NULL(xen_obj)) 140 return ERR_CAST(xen_obj); Fix this and the rest of misused places with IS_ERR_OR_NULL in the driver. Fixes: c575b7eeb89f: "drm/xen-front: Add support for Xen PV display frontend" Signed-off-by: Oleksandr Andrushchenko Reported-by: Dan Carpenter Reviewed-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/20200813062113.11030-3-andr2000@gmail.com Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front.c | 4 ++-- drivers/gpu/drm/xen/xen_drm_front_gem.c | 8 ++++---- drivers/gpu/drm/xen/xen_drm_front_kms.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 1fd458e877ca..51818e76facd 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -400,8 +400,8 @@ static int xen_drm_drv_dumb_create(struct drm_file *filp, args->size = args->pitch * args->height; obj = xen_drm_front_gem_create(dev, args->size); - if (IS_ERR_OR_NULL(obj)) { - ret = PTR_ERR_OR_ZERO(obj); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto fail; } diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index f0b85e094111..4ec8a49241e1 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -83,7 +83,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) size = round_up(size, PAGE_SIZE); xen_obj = gem_create_obj(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return xen_obj; if (drm_info->front_info->cfg.be_alloc) { @@ -117,7 +117,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) */ xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); xen_obj->pages = drm_gem_get_pages(&xen_obj->base); - if (IS_ERR_OR_NULL(xen_obj->pages)) { + if (IS_ERR(xen_obj->pages)) { ret = PTR_ERR(xen_obj->pages); xen_obj->pages = NULL; goto fail; @@ -136,7 +136,7 @@ struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, struct xen_gem_object *xen_obj; xen_obj = gem_create(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return ERR_CAST(xen_obj); return &xen_obj->base; @@ -194,7 +194,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, size = attach->dmabuf->size; xen_obj = gem_create_obj(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return ERR_CAST(xen_obj); ret = gem_alloc_pages_array(xen_obj, size); diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index 78096bbcd226..ef11b1e4de39 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -60,7 +60,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp, int ret; fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); - if (IS_ERR_OR_NULL(fb)) + if (IS_ERR(fb)) return fb; gem_obj = fb->obj[0]; -- cgit From 129572999a1eb6e125e645d0436de7d4ab6b8a6c Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 13 Aug 2020 09:21:11 +0300 Subject: drm/xen-front: Add YUYV to supported formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add YUYV to supported formats, so the frontend can work with the formats used by cameras and other HW. Signed-off-by: Oleksandr Andrushchenko Acked-by: Noralf Trønnes Link: https://lore.kernel.org/r/20200813062113.11030-4-andr2000@gmail.com Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front_conn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.c b/drivers/gpu/drm/xen/xen_drm_front_conn.c index 459702fa990e..44f1f70c0aed 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_conn.c +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.c @@ -33,6 +33,7 @@ static const u32 plane_formats[] = { DRM_FORMAT_ARGB4444, DRM_FORMAT_XRGB1555, DRM_FORMAT_ARGB1555, + DRM_FORMAT_YUYV, }; const u32 *xen_drm_front_conn_get_formats(int *format_count) -- cgit From 6f92337b6bffb3d9e509024d6ef5c3f2b112757d Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 13 Aug 2020 09:21:12 +0300 Subject: xen: Sync up with the canonical protocol definition in Xen This is the sync up with the canonical definition of the display protocol in Xen. 1. Add protocol version as an integer Version string, which is in fact an integer, is hard to handle in the code that supports different protocol versions. To simplify that also add the version as an integer. 2. Pass buffer offset with XENDISPL_OP_DBUF_CREATE There are cases when display data buffer is created with non-zero offset to the data start. Handle such cases and provide that offset while creating a display buffer. 3. Add XENDISPL_OP_GET_EDID command Add an optional request for reading Extended Display Identification Data (EDID) structure which allows better configuration of the display connectors over the configuration set in XenStore. With this change connectors may have multiple resolutions defined with respect to detailed timing definitions and additional properties normally provided by displays. If this request is not supported by the backend then visible area is defined by the relevant XenStore's "resolution" property. If backend provides extended display identification data (EDID) with XENDISPL_OP_GET_EDID request then EDID values must take precedence over the resolutions defined in XenStore. 4. Bump protocol version to 2. Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20200813062113.11030-5-andr2000@gmail.com Signed-off-by: Juergen Gross --- include/xen/interface/io/displif.h | 91 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h index fdc279dc4a88..d43ca0361f86 100644 --- a/include/xen/interface/io/displif.h +++ b/include/xen/interface/io/displif.h @@ -38,7 +38,8 @@ * Protocol version ****************************************************************************** */ -#define XENDISPL_PROTOCOL_VERSION "1" +#define XENDISPL_PROTOCOL_VERSION "2" +#define XENDISPL_PROTOCOL_VERSION_INT 2 /* ****************************************************************************** @@ -202,6 +203,9 @@ * Width and height of the connector in pixels separated by * XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the * display. + * If backend provides extended display identification data (EDID) with + * XENDISPL_OP_GET_EDID request then EDID values must take precedence + * over the resolutions defined here. * *------------------ Connector Request Transport Parameters ------------------- * @@ -349,6 +353,8 @@ #define XENDISPL_OP_FB_DETACH 0x13 #define XENDISPL_OP_SET_CONFIG 0x14 #define XENDISPL_OP_PG_FLIP 0x15 +/* The below command is available in protocol version 2 and above. */ +#define XENDISPL_OP_GET_EDID 0x16 /* ****************************************************************************** @@ -377,6 +383,10 @@ #define XENDISPL_FIELD_BE_ALLOC "be-alloc" #define XENDISPL_FIELD_UNIQUE_ID "unique-id" +#define XENDISPL_EDID_BLOCK_SIZE 128 +#define XENDISPL_EDID_BLOCK_COUNT 256 +#define XENDISPL_EDID_MAX_SIZE (XENDISPL_EDID_BLOCK_SIZE * XENDISPL_EDID_BLOCK_COUNT) + /* ****************************************************************************** * STATUS RETURN CODES @@ -451,7 +461,9 @@ * +----------------+----------------+----------------+----------------+ * | gref_directory | 40 * +----------------+----------------+----------------+----------------+ - * | reserved | 44 + * | data_ofs | 44 + * +----------------+----------------+----------------+----------------+ + * | reserved | 48 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ @@ -494,6 +506,7 @@ * buffer size (buffer_sz) exceeds what can be addressed by this single page, * then reference to the next page must be supplied (see gref_dir_next_page * below) + * data_ofs - uint32_t, offset of the data in the buffer, octets */ #define XENDISPL_DBUF_FLG_REQ_ALLOC (1 << 0) @@ -506,6 +519,7 @@ struct xendispl_dbuf_create_req { uint32_t buffer_sz; uint32_t flags; grant_ref_t gref_directory; + uint32_t data_ofs; }; /* @@ -731,6 +745,44 @@ struct xendispl_page_flip_req { uint64_t fb_cookie; }; +/* + * Request EDID - request EDID describing current connector: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_GET_EDID | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | buffer_sz | 8 + * +----------------+----------------+----------------+----------------+ + * | gref_directory | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Notes: + * - This command is not available in protocol version 1 and should be + * ignored. + * - This request is optional and if not supported then visible area + * is defined by the relevant XenStore's "resolution" property. + * - Shared buffer, allocated for EDID storage, must not be less then + * XENDISPL_EDID_MAX_SIZE octets. + * + * buffer_sz - uint32_t, buffer size to be allocated, octets + * gref_directory - grant_ref_t, a reference to the first shared page + * describing EDID buffer references. See XENDISPL_OP_DBUF_CREATE for + * grant page directory structure (struct xendispl_page_directory). + * + * See response format for this request. + */ + +struct xendispl_get_edid_req { + uint32_t buffer_sz; + grant_ref_t gref_directory; +}; + /* *---------------------------------- Responses -------------------------------- * @@ -753,6 +805,35 @@ struct xendispl_page_flip_req { * id - uint16_t, private guest value, echoed from request * status - int32_t, response status, zero on success and -XEN_EXX on failure * + * + * Get EDID response - response for XENDISPL_OP_GET_EDID: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | edid_sz | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Notes: + * - This response is not available in protocol version 1 and should be + * ignored. + * + * edid_sz - uint32_t, size of the EDID, octets + */ + +struct xendispl_get_edid_resp { + uint32_t edid_sz; +}; + +/* *----------------------------------- Events ---------------------------------- * * Events are sent via a shared page allocated by the front and propagated by @@ -804,6 +885,7 @@ struct xendispl_req { struct xendispl_fb_detach_req fb_detach; struct xendispl_set_config_req set_config; struct xendispl_page_flip_req pg_flip; + struct xendispl_get_edid_req get_edid; uint8_t reserved[56]; } op; }; @@ -813,7 +895,10 @@ struct xendispl_resp { uint8_t operation; uint8_t reserved; int32_t status; - uint8_t reserved1[56]; + union { + struct xendispl_get_edid_resp get_edid; + uint8_t reserved1[56]; + } op; }; struct xendispl_evt { -- cgit From a6d996cbd38b42341ad3fce74506b9fdc280e395 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Aug 2020 12:50:26 +0200 Subject: x86/alternatives: Acquire pte lock with interrupts enabled pte lock is never acquired in-IRQ context so it does not require interrupts to be disabled. The lock is a regular spinlock which cannot be acquired with interrupts disabled on RT. RT complains about pte_lock() in __text_poke() because it's invoked after disabling interrupts. __text_poke() has to disable interrupts as use_temporary_mm() expects interrupts to be off because it invokes switch_mm_irqs_off() and uses per-CPU (current active mm) data. Move the PTE lock handling outside the interrupt disabled region. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by; Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200813105026.bvugytmsso6muljw@linutronix.de --- arch/x86/kernel/alternative.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c826cddae157..34a1b8562c31 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -874,8 +874,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); - local_irq_save(flags); - /* * Map the page without the global bit, as TLB flushing is done with * flush_tlb_mm_range(), which is intended for non-global PTEs. @@ -892,6 +890,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ VM_BUG_ON(!ptep); + local_irq_save(flags); + pte = mk_pte(pages[0], pgprot); set_pte_at(poking_mm, poking_addr, ptep, pte); @@ -941,8 +941,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(memcmp(addr, opcode, len)); - pte_unmap_unlock(ptep, ptl); local_irq_restore(flags); + pte_unmap_unlock(ptep, ptl); return addr; } -- cgit From d566a9c2d482640abf5e602957b44e7b3bbe858d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 7 Aug 2020 10:48:44 -0600 Subject: perf sched: Prefer sched_waking event when it exists Commit fbd705a0c618 ("sched: Introduce the 'trace_sched_waking' tracepoint") added sched_waking tracepoint which should be preferred over sched_wakeup when analyzing scheduling delays. Update 'perf sched record' to collect sched_waking events if it exists and fallback to sched_wakeup if it does not. Similarly, update timehist command to skip sched_wakeup events if the session includes sched_waking (ie., sched_waking is preferred over sched_wakeup). Signed-off-by: David Ahern Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200807164844.44870-1-dsahern@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 459e4229945e..0c7d599fa555 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2398,6 +2398,15 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, printf("\n"); } +static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evsel *evsel __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + static int timehist_sched_wakeup_event(struct perf_tool *tool, union perf_event *event __maybe_unused, struct evsel *evsel, @@ -2958,9 +2967,10 @@ static int timehist_check_attr(struct perf_sched *sched, static int perf_sched__timehist(struct perf_sched *sched) { - const struct evsel_str_handler handlers[] = { + struct evsel_str_handler handlers[] = { { "sched:sched_switch", timehist_sched_switch_event, }, { "sched:sched_wakeup", timehist_sched_wakeup_event, }, + { "sched:sched_waking", timehist_sched_wakeup_event, }, { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, }; const struct evsel_str_handler migrate_handlers[] = { @@ -3018,6 +3028,11 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); + /* prefer sched_waking if it is captured */ + if (perf_evlist__find_tracepoint_by_name(session->evlist, + "sched:sched_waking")) + handlers[1].handler = timehist_sched_wakeup_ignore; + /* setup per-evsel handlers */ if (perf_session__set_tracepoints_handlers(session, handlers)) goto out; @@ -3330,12 +3345,16 @@ static int __cmd_record(int argc, const char **argv) "-e", "sched:sched_stat_iowait", "-e", "sched:sched_stat_runtime", "-e", "sched:sched_process_fork", - "-e", "sched:sched_wakeup", "-e", "sched:sched_wakeup_new", "-e", "sched:sched_migrate_task", }; + struct tep_event *waking_event; - rec_argc = ARRAY_SIZE(record_args) + argc - 1; + /* + * +2 for either "-e", "sched:sched_wakeup" or + * "-e", "sched:sched_waking" + */ + rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -3344,6 +3363,13 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + rec_argv[i++] = "-e"; + waking_event = trace_event__tp_format("sched", "sched_waking"); + if (!IS_ERR(waking_event)) + rec_argv[i++] = strdup("sched:sched_waking"); + else + rec_argv[i++] = strdup("sched:sched_wakeup"); + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; -- cgit From 1beaef29c34154ccdcb3f1ae557f6883eda18840 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 10 Aug 2020 15:34:04 +0200 Subject: perf bench mem: Always memset source before memcpy For memcpy, the source pages are memset to zero only when --cycles is used. This leads to wildly different results with or without --cycles, since all sources pages are likely to be mapped to the same zero page without explicit writes. Before this fix: $ export cmd="./perf stat -e LLC-loads -- ./perf bench \ mem memcpy -s 1024MB -l 100 -f default" $ $cmd 2,935,826 LLC-loads 3.821677452 seconds time elapsed $ $cmd --cycles 217,533,436 LLC-loads 8.616725985 seconds time elapsed After this fix: $ $cmd 214,459,686 LLC-loads 8.674301124 seconds time elapsed $ $cmd --cycles 214,758,651 LLC-loads 8.644480006 seconds time elapsed Fixes: 47b5757bac03c338 ("perf bench mem: Move boilerplate memory allocation to the infrastructure") Signed-off-by: Vincent Whitchurch Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel@axis.com Link: http://lore.kernel.org/lkml/20200810133404.30829-1-vincent.whitchurch@axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 9235b76501be..19d45c377ac1 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 0; } -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memcpy_t fn = r->fn.memcpy; - int i; - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(src, 0, size); @@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo * to not measure page fault overhead: */ fn(dst, src, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_prefault(fn, size, src, dst); cycle_start = get_cycles(); for (i = 0; i < nr_loops; ++i) @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void memcpy_t fn = r->fn.memcpy; int i; - /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, src, size); + memcpy_prefault(fn, size, src, dst); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < nr_loops; ++i) -- cgit From 194cb6b50f57676130491341f321394bb16ee624 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Wed, 22 Jul 2020 15:58:45 +0200 Subject: perf test: Allow multiple probes in record+script_probe_vfs_getname.sh Sometimes when adding a kprobe by perf, it results in multiple probe points, such as the following: # ./perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_2 (on getname_flags:73@fs/namei.c with pathname) # cat /sys/kernel/debug/tracing/kprobe_events p:probe/vfs_getname _text+5501804 pathname=+0(+0(%gpr31)):string p:probe/vfs_getname_1 _text+5505388 pathname=+0(+0(%gpr31)):string p:probe/vfs_getname_2 _text+5508396 pathname=+0(+0(%gpr31)):string In this test, we need to record all of them and expect any of them in the perf-script output, since it's not clear which one will be used for the desired syscall: # perf stat -e probe:vfs_getname\* -- touch /tmp/nic Performance counter stats for 'touch /tmp/nic': 31 probe:vfs_getname_2 0 probe:vfs_getname_1 1 probe:vfs_getname 0.001421826 seconds time elapsed 0.001506000 seconds user 0.000000000 seconds sys If the test relies only on probe:vfs_getname, it might easily miss the relevant data. Signed-off-by: Michael Petlan Cc: Jiri Olsa LPU-Reference: 20200722135845.29958-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+script_probe_vfs_getname.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 54030c18bfc2..bf9e729b3ecf 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -20,13 +20,13 @@ file=$(mktemp /tmp/temporary_file.XXXXX) record_open_file() { echo "Recording open file:" - perf record -o ${perfdata} -e probe:vfs_getname touch $file + perf record -o ${perfdata} -e probe:vfs_getname\* touch $file } perf_script_filenames() { echo "Looking at perf.data file for vfs_getname records for the file we touched:" perf script -i ${perfdata} | \ - egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\"" + egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\"" } add_probe_vfs_getname || skip_if_no_debuginfo -- cgit From ce746d43a17493108c6754ce9450bd7317b93f7c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 7 Aug 2020 13:32:41 -0600 Subject: libperf: Fix man page typos Fix various typos and inconsistent capitalization of CPU in the libperf man pages. Signed-off-by: Rob Herring Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200807193241.3904545-1-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf-counting.txt | 14 ++++++++------ tools/lib/perf/Documentation/libperf-sampling.txt | 13 +++++++------ tools/lib/perf/Documentation/libperf.txt | 4 ++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt index cae9757f49c1..8b75efcd67ce 100644 --- a/tools/lib/perf/Documentation/libperf-counting.txt +++ b/tools/lib/perf/Documentation/libperf-counting.txt @@ -7,13 +7,13 @@ libperf-counting - counting interface DESCRIPTION ----------- -The counting interface provides API to meassure and get count for specific perf events. +The counting interface provides API to measure and get count for specific perf events. The following test tries to explain count on `counting.c` example. It is by no means complete guide to counting, but shows libperf basic API for counting. -The `counting.c` comes with libbperf package and can be compiled and run like: +The `counting.c` comes with libperf package and can be compiled and run like: [source,bash] -- @@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242 It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event, which is available only for root. -The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it: +The `counting.c` example monitors two events on the current process and displays +their count, in a nutshell it: * creates events * adds them to the event list @@ -152,7 +153,7 @@ Configure event list with the thread map and open events: -- Both events are created as disabled (note the `disabled = 1` assignment above), -so we need to enable the whole list explicitely (both events). +so we need to enable the whole list explicitly (both events). From this moment events are counting and we can do our workload. @@ -167,7 +168,8 @@ When we are done we disable the events list. 79 perf_evlist__disable(evlist); -- -Now we need to get the counts from events, following code iterates throught the events list and read counts: +Now we need to get the counts from events, following code iterates through the +events list and read counts: [source,c] -- @@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the 85 } -- -And finaly cleanup. +And finally cleanup. We close the whole events list (both events) and remove it together with the threads map: diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d71a7b4fcf5f..d6ca24f6ef78 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -8,13 +8,13 @@ libperf-sampling - sampling interface DESCRIPTION ----------- -The sampling interface provides API to meassure and get count for specific perf events. +The sampling interface provides API to measure and get count for specific perf events. The following test tries to explain count on `sampling.c` example. It is by no means complete guide to sampling, but shows libperf basic API for sampling. -The `sampling.c` comes with libbperf package and can be compiled and run like: +The `sampling.c` comes with libperf package and can be compiled and run like: [source,bash] -- @@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176 It requires root access, because it uses hardware cycles event. -The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it: +The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a +nutshell it: - creates events - adds them to the event list @@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p 36 }; -- -Next step is to prepare cpus map. +Next step is to prepare CPUs map. In this case we will monitor all the available CPUs: @@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers: -- The event is created as disabled (note the `disabled = 1` assignment above), -so we need to enable the events list explicitely. +so we need to enable the events list explicitly. From this moment the cycles event is sampling. @@ -212,7 +213,7 @@ Each sample needs to get parsed: 106 cpu, pid, tid, ip, period); -- -And finaly cleanup. +And finally cleanup. We close the whole events list (both events) and remove it together with the threads map: diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 5a6bb512789d..0c74c30ed23a 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -29,7 +29,7 @@ SYNOPSIS void libperf_init(libperf_print_fn_t fn); -- -*API to handle cpu maps:* +*API to handle CPU maps:* [source,c] -- @@ -217,7 +217,7 @@ Following objects are key to the libperf interface: [horizontal] -struct perf_cpu_map:: Provides a cpu list abstraction. +struct perf_cpu_map:: Provides a CPU list abstraction. struct perf_thread_map:: Provides a thread list abstraction. -- cgit From ceafdd664cae27ffd2d0a4e4e9eef7896c304046 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 7 Aug 2020 13:32:25 -0600 Subject: MAINTAINERS: Add missing tools/lib/perf/ path to perf maintainers Commit 3ce311afb558 ("libperf: Move to tools/lib/perf") moved libperf out of tools/perf/, but failed to update MAINTAINERS. Signed-off-by: Rob Herring Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200807193225.3904108-1-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e627ed60d75a..b1b6801805c9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13554,6 +13554,7 @@ F: arch/*/kernel/perf_event*.c F: include/linux/perf_event.h F: include/uapi/linux/perf_event.h F: kernel/events/* +F: tools/lib/perf/ F: tools/perf/ PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS -- cgit From 1c695c88a1092b4013e3fffbe0ca685149165403 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 8 Aug 2020 14:21:54 +0200 Subject: perf tools: Rename 'enum dso_kernel_type' to 'enum dso_space_type' Rename enum dso_kernel_type to enum dso_space_type, which seems like better fit. Committer notes: This is used with 'struct dso'->kernel, which once was a boolean, so DSO_SPACE__USER is zero, !zero means some sort of kernel space, be it the host kernel space or a guest kernel space. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 2 +- tools/perf/util/dso.h | 10 +++++----- tools/perf/util/header.c | 12 ++++++------ tools/perf/util/machine.c | 16 ++++++++-------- tools/perf/util/map.c | 4 ++-- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 24 ++++++++++++------------ 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index be991cbbe9f8..5a3b4755f0b3 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1265,7 +1265,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->has_build_id = 0; dso->has_srcline = 1; dso->a2l_fails = 1; - dso->kernel = DSO_TYPE_USER; + dso->kernel = DSO_SPACE__USER; dso->needs_swap = DSO_SWAP__UNSET; dso->comp = COMP_ID__NONE; RB_CLEAR_NODE(&dso->rb_node); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 31c3a9244938..8ad17f395a19 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -46,10 +46,10 @@ enum dso_binary_type { DSO_BINARY_TYPE__NOT_FOUND, }; -enum dso_kernel_type { - DSO_TYPE_USER = 0, - DSO_TYPE_KERNEL, - DSO_TYPE_GUEST_KERNEL +enum dso_space_type { + DSO_SPACE__USER = 0, + DSO_SPACE__KERNEL, + DSO_SPACE__KERNEL_GUEST }; enum dso_swap_type { @@ -160,7 +160,7 @@ struct dso { void *a2l; char *symsrc_filename; unsigned int a2l_fails; - enum dso_kernel_type kernel; + enum dso_space_type kernel; enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; enum dso_binary_type binary_type; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 251faa9a5789..e1db72c73e89 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2056,7 +2056,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, struct machine *machine; u16 cpumode; struct dso *dso; - enum dso_kernel_type dso_type; + enum dso_space_type dso_space; machine = perf_session__findnew_machine(session, bev->pid); if (!machine) @@ -2066,14 +2066,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, switch (cpumode) { case PERF_RECORD_MISC_KERNEL: - dso_type = DSO_TYPE_KERNEL; + dso_space = DSO_SPACE__KERNEL; break; case PERF_RECORD_MISC_GUEST_KERNEL: - dso_type = DSO_TYPE_GUEST_KERNEL; + dso_space = DSO_SPACE__KERNEL_GUEST; break; case PERF_RECORD_MISC_USER: case PERF_RECORD_MISC_GUEST_USER: - dso_type = DSO_TYPE_USER; + dso_space = DSO_SPACE__USER; break; default: goto out; @@ -2085,13 +2085,13 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, dso__set_build_id(dso, &bev->build_id); - if (dso_type != DSO_TYPE_USER) { + if (dso_space != DSO_SPACE__USER) { struct kmod_path m = { .name = NULL, }; if (!kmod_path__parse_name(&m, filename) && m.kmod) dso__set_module_info(dso, &m, machine); else - dso->kernel = dso_type; + dso->kernel = dso_space; free(m.name); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 96af544eac8f..208b813e00ea 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -703,7 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); - dso->kernel = DSO_TYPE_KERNEL; + dso->kernel = DSO_SPACE__KERNEL; } dso__get(dso); @@ -753,7 +753,7 @@ static int machine__process_ksymbol_register(struct machine *machine, struct dso *dso = dso__new(event->ksymbol.name); if (dso) { - dso->kernel = DSO_TYPE_KERNEL; + dso->kernel = DSO_SPACE__KERNEL; map = map__new2(0, dso); } @@ -971,14 +971,14 @@ static struct dso *machine__get_kernel(struct machine *machine) vmlinux_name = symbol_conf.vmlinux_name; kernel = machine__findnew_kernel(machine, vmlinux_name, - "[kernel]", DSO_TYPE_KERNEL); + "[kernel]", DSO_SPACE__KERNEL); } else { if (symbol_conf.default_guest_vmlinux_name) vmlinux_name = symbol_conf.default_guest_vmlinux_name; kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", - DSO_TYPE_GUEST_KERNEL); + DSO_SPACE__KERNEL_GUEST); } if (kernel != NULL && (!kernel->has_build_id)) @@ -1606,7 +1606,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { struct map *map; - enum dso_kernel_type kernel_type; + enum dso_space_type dso_space; bool is_kernel_mmap; /* If we have maps from kcore then we do not need or want any others */ @@ -1614,9 +1614,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, return 0; if (machine__is_host(machine)) - kernel_type = DSO_TYPE_KERNEL; + dso_space = DSO_SPACE__KERNEL; else - kernel_type = DSO_TYPE_GUEST_KERNEL; + dso_space = DSO_SPACE__KERNEL_GUEST; is_kernel_mmap = memcmp(event->mmap.filename, machine->mmap_name, @@ -1676,7 +1676,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (kernel == NULL) goto out_problem; - kernel->kernel = kernel_type; + kernel->kernel = dso_space; if (__machine__create_kernel_maps(machine, kernel) < 0) { dso__put(kernel); goto out_problem; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f9dc8c5493ea..1d7210804639 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -486,7 +486,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip) * kernel modules also have DSO_TYPE_USER in dso->kernel, * but all kernel modules are ET_REL, so won't get here. */ - if (map->dso->kernel == DSO_TYPE_USER) + if (map->dso->kernel == DSO_SPACE__USER) return rip + map->dso->text_offset; return map->unmap_ip(map, rip) - map->reloc; @@ -516,7 +516,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) * kernel modules also have DSO_TYPE_USER in dso->kernel, * but all kernel modules are ET_REL, so won't get here. */ - if (map->dso->kernel == DSO_TYPE_USER) + if (map->dso->kernel == DSO_SPACE__USER) return map->unmap_ip(map, ip - map->dso->text_offset); return ip + map->reloc; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5e43054bffea..48716d216445 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -789,7 +789,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (ss->opdshdr.sh_type != SHT_PROGBITS) ss->opdsec = NULL; - if (dso->kernel == DSO_TYPE_USER) + if (dso->kernel == DSO_SPACE__USER) ss->adjust_symbols = true; else ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 053468ffcb8a..1f5fcb828a21 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -808,7 +808,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, if (strcmp(curr_map->dso->short_name, module)) { if (curr_map != initial_map && - dso->kernel == DSO_TYPE_GUEST_KERNEL && + dso->kernel == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(machine)) { /* * We assume all symbols of a module are @@ -865,7 +865,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, goto add_symbol; } - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", kernel_range++); @@ -909,7 +909,7 @@ discard_symbol: } if (curr_map != initial_map && - dso->kernel == DSO_TYPE_GUEST_KERNEL && + dso->kernel == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(kmaps->machine)) { dso__set_loaded(curr_map->dso); } @@ -1387,7 +1387,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, * Set the data type and long name so that kcore can be read via * dso__data_read_addr(). */ - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE; else dso->binary_type = DSO_BINARY_TYPE__KCORE; @@ -1451,7 +1451,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, symbols__fixup_end(&dso->symbols); symbols__fixup_duplicate(&dso->symbols); - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; else dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; @@ -1537,17 +1537,17 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: - return !kmod && dso->kernel == DSO_TYPE_USER; + return !kmod && dso->kernel == DSO_SPACE__USER; case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__KCORE: - return dso->kernel == DSO_TYPE_KERNEL; + return dso->kernel == DSO_SPACE__KERNEL; case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__GUEST_KCORE: - return dso->kernel == DSO_TYPE_GUEST_KERNEL; + return dso->kernel == DSO_SPACE__KERNEL_GUEST; case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -1650,9 +1650,9 @@ int dso__load(struct dso *dso, struct map *map) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; if (dso->kernel && !kmod) { - if (dso->kernel == DSO_TYPE_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL) ret = dso__load_kernel_sym(dso, map); - else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + else if (dso->kernel == DSO_SPACE__KERNEL_GUEST) ret = dso__load_guest_kernel_sym(dso, map); machine = map__kmaps(map)->machine; @@ -1882,7 +1882,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else symbol__join_symfs(symfs_vmlinux, vmlinux); - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else symtab_type = DSO_BINARY_TYPE__VMLINUX; @@ -1894,7 +1894,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, symsrc__destroy(&ss); if (err > 0) { - if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else dso->binary_type = DSO_BINARY_TYPE__VMLINUX; -- cgit From b2fe96a350deb93d080ee7136b1e9fcf6cda83e2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 8 Aug 2020 14:26:56 +0200 Subject: perf tools: Fix module symbol processing The 'dso->kernel' condition is true also for kernel modules now, and there are several places that were omited by the initial change: - we need to identify modules separately in dso__process_kernel_symbol - we need to set 'dso->kernel' for module from buildid table - there's no need to use 'dso->kernel || kmodule' in one condition Committer testing: Before: # perf test -v object Objdump command is: objdump -z -d --start-address=0xffffffff813e682f --stop-address=0xffffffff813e68af /usr/lib/debug/lib/modules/5.7.14-200.fc32.x86_64/vmlinux Bytes read match those read by objdump Reading object code for memory address: 0xffffffffc02dc257 File is: /lib/modules/5.7.14-200.fc32.x86_64/kernel/arch/x86/crypto/crc32c-intel.ko.xz On file address is: 0xffffffffc02dc2e7 dso__data_read_offset failed test child finished with -1 ---- end ---- Object code reading: FAILED! # After: # perf test object 26: Object code reading : Ok # perf test object 26: Object code reading : Ok # perf test object 26: Object code reading : Ok # perf test object 26: Object code reading : Ok # perf test object 26: Object code reading : Ok # Fixes: 02213cec64bb ("perf maps: Mark module DSOs with kernel type") Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Namhyung Kim Signed-off-by: Jiri Olsa --- tools/perf/util/header.c | 3 +-- tools/perf/util/symbol-elf.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e1db72c73e89..9cf4efdcbbbd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2090,9 +2090,8 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, if (!kmod_path__parse_name(&m, filename) && m.kmod) dso__set_module_info(dso, &m, machine); - else - dso->kernel = dso_space; + dso->kernel = dso_space; free(m.name); } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 48716d216445..8cc4b0059fb0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -872,7 +872,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * kallsyms and identity maps. Overwrite it to * map to the kernel dso. */ - if (*remap_kernel && dso->kernel) { + if (*remap_kernel && dso->kernel && !kmodule) { *remap_kernel = false; map->start = shdr->sh_addr + ref_reloc(kmap); map->end = map->start + shdr->sh_size; @@ -1068,7 +1068,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * Initial kernel and module mappings do not map to the dso. * Flag the fixups. */ - if (dso->kernel || kmodule) { + if (dso->kernel) { remap_kernel = true; adjust_kernel_syms = dso->adjust_symbols; } @@ -1130,7 +1130,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, (sym.st_value & 1)) --sym.st_value; - if (dso->kernel || kmodule) { + if (dso->kernel) { if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map, section_name, adjust_kernel_syms, kmodule, &remap_kernel)) goto out_elf_end; -- cgit From fa5c893181ed2ca2f96552f50073786d2cfce6c0 Mon Sep 17 00:00:00 2001 From: Daniel Díaz Date: Wed, 12 Aug 2020 17:15:17 -0500 Subject: tools build feature: Quote CC and CXX for their arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using a cross-compilation environment, such as OpenEmbedded, the CC an CXX variables are set to something more than just a command: there are arguments (such as --sysroot) that need to be passed on to the compiler so that the right set of headers and libraries are used. For the particular case that our systems detected, CC is set to the following: export CC="aarch64-linaro-linux-gcc --sysroot=/oe/build/tmp/work/machine/perf/1.0-r9/recipe-sysroot" Without quotes, detection is as follows: Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libbfd: [ OFF ] ... libcap: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] ... libaio: [ OFF ] ... libzstd: [ OFF ] ... disassembler-four-args: [ OFF ] Makefile.config:414: *** No gnu/libc-version.h found, please install glibc-dev[el]. Stop. Makefile.perf:230: recipe for target 'sub-make' failed make[1]: *** [sub-make] Error 2 Makefile:69: recipe for target 'all' failed make: *** [all] Error 2 With CC and CXX quoted, some of those features are now detected. Fixes: e3232c2f39ac ("tools build feature: Use CC and CXX from parent") Signed-off-by: Daniel Díaz Reviewed-by: Thomas Hebb Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20200812221518.2869003-1-daniel.diaz@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 774f0b0ca28a..e7818b44b48e 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -8,7 +8,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) -- cgit From 509f68e327d0c87e9bc93cb138e445c506ae9ce9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 13 Aug 2020 13:30:42 +0200 Subject: perf bench numa: Fix cpumask memory leak in node_has_cpus() Couple numa_allocate_cpumask() and numa_free_cpumask() functions Signed-off-by: Alexander Gordeev Reviewed-by: Srikar Dronamraju Cc: Alexander Shishkin Cc: Balamuruhan S Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lore.kernel.org/lkml/20200813113041.GA1685@oc3871087118.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 31e2601d39c8..9066511aed47 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -248,16 +248,21 @@ static int is_node_present(int node) static bool node_has_cpus(int node) { struct bitmask *cpu = numa_allocate_cpumask(); + bool ret = false; /* fall back to nocpus */ unsigned int i; - if (cpu && !numa_node_to_cpus(node, cpu)) { + BUG_ON(!cpu); + if (!numa_node_to_cpus(node, cpu)) { for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) - return true; + if (numa_bitmask_isbitset(cpu, i)) { + ret = true; + break; + } } } + numa_free_cpumask(cpu); - return false; /* lets fall back to nocpus safely */ + return ret; } static cpu_set_t bind_to_cpu(int target_cpu) -- cgit From 2db13a9b30f7e438777eb1a462c4b055ba948b89 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 13 Aug 2020 13:32:48 +0200 Subject: perf bench numa: Use numa_node_to_cpus() to bind tasks to nodes It is currently assumed that each node contains at most nr_cpus/nr_nodes CPUs and nodes' CPU ranges do not overlap. That assumption is generally incorrect as there are archs where a CPU number does not depend on to its node number. This update removes the described assumption by simply calling numa_node_to_cpus() interface and using the returned mask for binding CPUs to nodes. Also, variable types and names made consistent in functions using cpumask. Signed-off-by: Alexander Gordeev Reviewed-by: Srikar Dronamraju Cc: Alexander Shishkin Cc: Balamuruhan S Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lore.kernel.org/lkml/20200813113247.GA2014@oc3871087118.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 9066511aed47..6d5c890478cb 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -247,20 +247,20 @@ static int is_node_present(int node) */ static bool node_has_cpus(int node) { - struct bitmask *cpu = numa_allocate_cpumask(); + struct bitmask *cpumask = numa_allocate_cpumask(); bool ret = false; /* fall back to nocpus */ - unsigned int i; + int cpu; - BUG_ON(!cpu); - if (!numa_node_to_cpus(node, cpu)) { - for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) { + BUG_ON(!cpumask); + if (!numa_node_to_cpus(node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) { ret = true; break; } } } - numa_free_cpumask(cpu); + numa_free_cpumask(cpumask); return ret; } @@ -293,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu) static cpu_set_t bind_to_node(int target_node) { - int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); cpu_set_t orig_mask, mask; int cpu; int ret; - BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); - BUG_ON(!cpus_per_node); - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); @@ -310,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node) for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { - int cpu_start = (target_node + 0) * cpus_per_node; - int cpu_stop = (target_node + 1) * cpus_per_node; - - BUG_ON(cpu_stop > g->p.nr_cpus); + struct bitmask *cpumask = numa_allocate_cpumask(); - for (cpu = cpu_start; cpu < cpu_stop; cpu++) - CPU_SET(cpu, &mask); + BUG_ON(!cpumask); + if (!numa_node_to_cpus(target_node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) + CPU_SET(cpu, &mask); + } + } + numa_free_cpumask(cpumask); } ret = sched_setaffinity(0, sizeof(mask), &mask); -- cgit From ebf0d100df0731901c16632f78d78d35f4123bc4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Aug 2020 09:01:38 -0600 Subject: task_work: only grab task signal lock when needed If JOBCTL_TASK_WORK is already set on the targeted task, then we need not go through {lock,unlock}_task_sighand() to set it again and queue a signal wakeup. This is safe as we're checking it _after_ adding the new task_work with cmpxchg(). The ordering is as follows: task_work_add() get_signal() -------------------------------------------------------------- STORE(task->task_works, new_work); STORE(task->jobctl); mb(); mb(); LOAD(task->jobctl); LOAD(task->task_works); This speeds up TWA_SIGNAL handling quite a bit, which is important now that io_uring is relying on it for all task_work deliveries. Cc: Peter Zijlstra Cc: Jann Horn Acked-by: Oleg Nesterov Signed-off-by: Jens Axboe --- kernel/signal.c | 16 +++++++++++++++- kernel/task_work.c | 8 +++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 6f16f7c5d375..42b67d2cea37 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2541,7 +2541,21 @@ bool get_signal(struct ksignal *ksig) relock: spin_lock_irq(&sighand->siglock); - current->jobctl &= ~JOBCTL_TASK_WORK; + /* + * Make sure we can safely read ->jobctl() in task_work add. As Oleg + * states: + * + * It pairs with mb (implied by cmpxchg) before READ_ONCE. So we + * roughly have + * + * task_work_add: get_signal: + * STORE(task->task_works, new_work); STORE(task->jobctl); + * mb(); mb(); + * LOAD(task->jobctl); LOAD(task->task_works); + * + * and we can rely on STORE-MB-LOAD [ in task_work_add]. + */ + smp_store_mb(current->jobctl, current->jobctl & ~JOBCTL_TASK_WORK); if (unlikely(current->task_works)) { spin_unlock_irq(&sighand->siglock); task_work_run(); diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c0848ca1287..613b2d634af8 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -42,7 +42,13 @@ task_work_add(struct task_struct *task, struct callback_head *work, int notify) set_notify_resume(task); break; case TWA_SIGNAL: - if (lock_task_sighand(task, &flags)) { + /* + * Only grab the sighand lock if we don't already have some + * task_work pending. This pairs with the smp_store_mb() + * in get_signal(), see comment there. + */ + if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && + lock_task_sighand(task, &flags)) { task->jobctl |= JOBCTL_TASK_WORK; signal_wake_up(task, 0); unlock_task_sighand(task, &flags); -- cgit From 585c6ed738a5ed2a6fd7662fa1d82f25acfa85de Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 13 Aug 2020 14:00:38 +0200 Subject: drm/xen-front: Pass dumb buffer data offset to the backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While importing a dmabuf it is possible that the data of the buffer is put with offset which is indicated by the SGT offset. Respect the offset value and forward it to the backend. Signed-off-by: Oleksandr Andrushchenko Acked-by: Noralf Trønnes Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front.c | 6 ++++-- drivers/gpu/drm/xen/xen_drm_front.h | 2 +- drivers/gpu/drm/xen/xen_drm_front_gem.c | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 51818e76facd..3dd56794593a 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -157,7 +157,8 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u32 width, u32 height, - u32 bpp, u64 size, struct page **pages) + u32 bpp, u64 size, u32 offset, + struct page **pages) { struct xen_drm_front_evtchnl *evtchnl; struct xen_drm_front_dbuf *dbuf; @@ -194,6 +195,7 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, req->op.dbuf_create.gref_directory = xen_front_pgdir_shbuf_get_dir_start(&dbuf->shbuf); req->op.dbuf_create.buffer_sz = size; + req->op.dbuf_create.data_ofs = offset; req->op.dbuf_create.dbuf_cookie = dbuf_cookie; req->op.dbuf_create.width = width; req->op.dbuf_create.height = height; @@ -408,7 +410,7 @@ static int xen_drm_drv_dumb_create(struct drm_file *filp, ret = xen_drm_front_dbuf_create(drm_info->front_info, xen_drm_front_dbuf_to_cookie(obj), args->width, args->height, args->bpp, - args->size, + args->size, 0, xen_drm_front_gem_get_pages(obj)); if (ret) goto fail_backend; diff --git a/drivers/gpu/drm/xen/xen_drm_front.h b/drivers/gpu/drm/xen/xen_drm_front.h index f92c258350ca..54486d89650e 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.h +++ b/drivers/gpu/drm/xen/xen_drm_front.h @@ -145,7 +145,7 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u32 width, u32 height, - u32 bpp, u64 size, struct page **pages); + u32 bpp, u64 size, u32 offset, struct page **pages); int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u64 fb_cookie, u32 width, diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 4ec8a49241e1..39ff95b75357 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -210,7 +210,8 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, ret = xen_drm_front_dbuf_create(drm_info->front_info, xen_drm_front_dbuf_to_cookie(&xen_obj->base), - 0, 0, 0, size, xen_obj->pages); + 0, 0, 0, size, sgt->sgl->offset, + xen_obj->pages); if (ret < 0) return ERR_PTR(ret); -- cgit From cb75c95c5262328bd4da3dd334f6826a3a34a979 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Thu, 13 Aug 2020 14:02:20 +0800 Subject: Documentation/locking/locktypes: Fix a typo We have three categories locks, not two. Signed-off-by: Huang Shijie Signed-off-by: Ingo Molnar Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200813060220.18199-1-sjhuang@iluvatar.ai --- Documentation/locking/locktypes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 1b577a8bf982..4cefed8048ca 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -10,7 +10,7 @@ Introduction ============ The kernel provides a variety of locking primitives which can be divided -into two categories: +into three categories: - Sleeping locks - CPU local locks -- cgit From 405fa8ac89e7aaa87282df659e525992f2639e76 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 13 Aug 2020 08:21:17 -0400 Subject: futex: Convert to use the preferred 'fallthrough' macro Signed-off-by: Miaohe Lin Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200813122117.51173-1-linmiaohe@huawei.com --- kernel/futex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 61e8153e6c76..a5876694a60e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3744,12 +3744,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; - /* fall through */ + fallthrough; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; - /* fall through */ + fallthrough; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: -- cgit From 9f457179244a1c0316546b1760f8993d0d718861 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Aug 2020 10:40:54 -0400 Subject: mm: memcontrol: fix warning when allocating the root cgroup Commit 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent cgroup") adds memory tracking to the memcg kernel structures themselves to make cgroups liable for the memory they are consuming through the allocation of child groups (which can be significant). This code is a bit awkward as it's spread out through several functions: The outermost function does memalloc_use_memcg(parent) to set up current->active_memcg, which designates which cgroup to charge, and the inner functions pass GFP_ACCOUNT to request charging for specific allocations. To make sure this dependency is satisfied at all times - to make sure we don't randomly charge whoever is calling the functions - the inner functions warn on !current->active_memcg. However, this triggers a false warning when the root memcg itself is allocated. No parent exists in this case, and so current->active_memcg is rightfully NULL. It's a false positive, not indicative of a bug. Delete the warnings for now, we can revisit this later. Fixes: 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent cgroup") Signed-off-by: Johannes Weiner Reported-by: Stephen Rothwell Acked-by: Roman Gushchin Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d59fd9af6e63..9d87082e64aa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5137,9 +5137,6 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - /* We charge the parent cgroup, never the current task */ - WARN_ON_ONCE(!current->active_memcg); - pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { @@ -5222,9 +5219,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - /* We charge the parent cgroup, never the current task */ - WARN_ON_ONCE(!current->active_memcg); - memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) -- cgit From 1edcd4675e44dc0e6e3b34e8e29000d8a05f998c Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Thu, 13 Aug 2020 14:02:20 +0800 Subject: Documentation/locking/locktypes: fix the typo We have three categories locks, not two. Signed-off-by: Huang Shijie Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200813060220.18199-1-sjhuang@iluvatar.ai Signed-off-by: Jonathan Corbet --- Documentation/locking/locktypes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 1b577a8bf982..4cefed8048ca 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -10,7 +10,7 @@ Introduction ============ The kernel provides a variety of locking primitives which can be divided -into two categories: +into three categories: - Sleeping locks - CPU local locks -- cgit From ff6165b2d7f66fccb7095a60ccc7a80813858665 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Aug 2020 09:47:43 -0600 Subject: io_uring: retain iov_iter state over io_read/io_write calls Instead of maintaining (and setting/remembering) iov_iter size and segment counts, just put the iov_iter in the async part of the IO structure. This is mostly a preparation patch for doing appropriate internal retries for short reads, but it also cleans up the state handling nicely and simplifies it quite a bit. Signed-off-by: Jens Axboe --- fs/io_uring.c | 136 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 70 insertions(+), 66 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1ec25ee71372..584f86178671 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -508,9 +508,8 @@ struct io_async_msghdr { struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; - struct iovec *iov; - ssize_t nr_segs; - ssize_t size; + const struct iovec *free_iovec; + struct iov_iter iter; struct wait_page_queue wpq; }; @@ -915,8 +914,8 @@ static void io_file_put_work(struct work_struct *work); static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter, bool needs_lock); -static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, +static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, + const struct iovec *fast_iov, struct iov_iter *iter); static struct kmem_cache *req_cachep; @@ -2299,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ret = io_import_iovec(rw, req, &iovec, &iter, false); if (ret < 0) goto end_req; - ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter); if (!ret) return true; kfree(iovec); @@ -2820,6 +2819,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; u8 opcode; + if (req->io) { + struct io_async_rw *iorw = &req->io->rw; + + *iovec = NULL; + return iov_iter_count(&iorw->iter); + } + opcode = req->opcode; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { *iovec = NULL; @@ -2845,14 +2851,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, return ret < 0 ? ret : sqe_len; } - if (req->io) { - struct io_async_rw *iorw = &req->io->rw; - - iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size); - *iovec = NULL; - return iorw->size; - } - if (req->flags & REQ_F_BUFFER_SELECT) { ret = io_iov_buffer_select(req, *iovec, needs_lock); if (!ret) { @@ -2930,21 +2928,29 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb, return ret; } -static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, - struct iov_iter *iter) +static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, + const struct iovec *fast_iov, struct iov_iter *iter) { struct io_async_rw *rw = &req->io->rw; - rw->nr_segs = iter->nr_segs; - rw->size = io_size; + memcpy(&rw->iter, iter, sizeof(*iter)); + rw->free_iovec = NULL; + /* can only be fixed buffers, no need to do anything */ + if (iter->type == ITER_BVEC) + return; if (!iovec) { - rw->iov = rw->fast_iov; - if (rw->iov != fast_iov) - memcpy(rw->iov, fast_iov, + unsigned iov_off = 0; + + rw->iter.iov = rw->fast_iov; + if (iter->iov != fast_iov) { + iov_off = iter->iov - fast_iov; + rw->iter.iov += iov_off; + } + if (rw->fast_iov != fast_iov) + memcpy(rw->fast_iov + iov_off, fast_iov + iov_off, sizeof(struct iovec) * iter->nr_segs); } else { - rw->iov = iovec; + rw->free_iovec = iovec; req->flags |= REQ_F_NEED_CLEANUP; } } @@ -2963,8 +2969,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req) return __io_alloc_async_ctx(req); } -static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, +static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, + const struct iovec *fast_iov, struct iov_iter *iter) { if (!io_op_defs[req->opcode].async_ctx) @@ -2973,7 +2979,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, if (__io_alloc_async_ctx(req)) return -ENOMEM; - io_req_map_rw(req, io_size, iovec, fast_iov, iter); + io_req_map_rw(req, iovec, fast_iov, iter); } return 0; } @@ -2981,18 +2987,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, static inline int io_rw_prep_async(struct io_kiocb *req, int rw, bool force_nonblock) { - struct io_async_ctx *io = req->io; - struct iov_iter iter; + struct io_async_rw *iorw = &req->io->rw; ssize_t ret; - io->rw.iov = io->rw.fast_iov; + iorw->iter.iov = iorw->fast_iov; + /* reset ->io around the iovec import, we don't want to use it */ req->io = NULL; - ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock); - req->io = io; + ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, + &iorw->iter, !force_nonblock); + req->io = container_of(iorw, struct io_async_ctx, rw); if (unlikely(ret < 0)) return ret; - io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter); + io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter); return 0; } @@ -3090,7 +3097,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ -static bool io_rw_should_retry(struct io_kiocb *req) +static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, + struct iovec *fast_iov, struct iov_iter *iter) { struct kiocb *kiocb = &req->rw.kiocb; int ret; @@ -3113,8 +3121,11 @@ static bool io_rw_should_retry(struct io_kiocb *req) * If request type doesn't require req->io to defer in general, * we need to allocate it here */ - if (!req->io && __io_alloc_async_ctx(req)) - return false; + if (!req->io) { + if (__io_alloc_async_ctx(req)) + return false; + io_req_map_rw(req, iovec, fast_iov, iter); + } ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, io_async_buf_func, req); @@ -3141,12 +3152,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; - struct iov_iter iter; + struct iov_iter __iter, *iter = &__iter; size_t iov_count; - ssize_t io_size, ret, ret2; - unsigned long nr_segs; + ssize_t io_size, ret, ret2 = 0; + + if (req->io) + iter = &req->io->rw.iter; - ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock); + ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; io_size = ret; @@ -3160,30 +3173,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (force_nonblock && !io_file_supports_async(req->file, READ)) goto copy_iov; - iov_count = iov_iter_count(&iter); - nr_segs = iter.nr_segs; + iov_count = iov_iter_count(iter); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); if (unlikely(ret)) goto out_free; - ret2 = io_iter_do_read(req, &iter); + ret2 = io_iter_do_read(req, iter); /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { kiocb_done(kiocb, ret2, cs); } else { - iter.count = iov_count; - iter.nr_segs = nr_segs; copy_iov: - ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, - &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ iovec = NULL; /* if we can retry, do so with the callbacks armed */ - if (io_rw_should_retry(req)) { - ret2 = io_iter_do_read(req, &iter); + if (io_rw_should_retry(req, iovec, inline_vecs, iter)) { + ret2 = io_iter_do_read(req, iter); if (ret2 == -EIOCBQUEUED) { goto out_free; } else if (ret2 != -EAGAIN) { @@ -3223,12 +3232,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; - struct iov_iter iter; + struct iov_iter __iter, *iter = &__iter; size_t iov_count; ssize_t ret, ret2, io_size; - unsigned long nr_segs; - ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock); + if (req->io) + iter = &req->io->rw.iter; + + ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; io_size = ret; @@ -3247,8 +3258,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, (req->flags & REQ_F_ISREG)) goto copy_iov; - iov_count = iov_iter_count(&iter); - nr_segs = iter.nr_segs; + iov_count = iov_iter_count(iter); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); if (unlikely(ret)) goto out_free; @@ -3269,9 +3279,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb->ki_flags |= IOCB_WRITE; if (req->file->f_op->write_iter) - ret2 = call_write_iter(req->file, kiocb, &iter); + ret2 = call_write_iter(req->file, kiocb, iter); else if (req->file->f_op->write) - ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); + ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter); else ret2 = -EINVAL; @@ -3284,16 +3294,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2, cs); } else { - iter.count = iov_count; - iter.nr_segs = nr_segs; copy_iov: - ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, - &iter); - if (ret) - goto out_free; - /* it's copied and will be cleaned with ->io */ - iovec = NULL; - return -EAGAIN; + ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + if (!ret) + return -EAGAIN; } out_free: if (iovec) @@ -5583,8 +5587,8 @@ static void __io_clean_op(struct io_kiocb *req) case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - if (io->rw.iov != io->rw.fast_iov) - kfree(io->rw.iov); + if (io->rw.free_iovec) + kfree(io->rw.free_iovec); break; case IORING_OP_RECVMSG: case IORING_OP_SENDMSG: -- cgit From 94c7eb54c4b8e81618ec79f414fe1ca5767f9720 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2020 10:06:43 -0700 Subject: random32: add a tracepoint for prandom_u32() There has been some heat around prandom_u32() lately, and some people were wondering if there was a simple way to determine how often it was used, before considering making it maybe 10 times more expensive. This tracepoint exports the generated pseudo random value. Tested: perf list | grep prandom_u32 random:prandom_u32 [Tracepoint event] perf record -a [-g] [-C1] -e random:prandom_u32 sleep 1 [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 259.748 MB perf.data (924087 samples) ] perf report --nochildren ... 97.67% ksoftirqd/1 [kernel.vmlinux] [k] prandom_u32 | ---prandom_u32 prandom_u32 | |--48.86%--tcp_v4_syn_recv_sock | tcp_check_req | tcp_v4_rcv | ... --48.81%--tcp_conn_request tcp_v4_conn_request tcp_rcv_state_process ... perf script Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: David S. Miller --- include/trace/events/random.h | 17 +++++++++++++++++ lib/random32.c | 2 ++ 2 files changed, 19 insertions(+) diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 32c10a515e2d..9570a10cb949 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -307,6 +307,23 @@ TRACE_EVENT(urandom_read, __entry->pool_left, __entry->input_left) ); +TRACE_EVENT(prandom_u32, + + TP_PROTO(unsigned int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( unsigned int, ret) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret=%u" , __entry->ret) +); + #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ diff --git a/lib/random32.c b/lib/random32.c index 3d749abb9e80..932345323af0 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -82,6 +83,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); + trace_prandom_u32(res); put_cpu_var(net_rand_state); return res; -- cgit From 88fd1cb80daa20af063bce81e1fad14e945a8dc4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 13 Aug 2020 21:45:25 +0206 Subject: af_packet: TPACKET_V3: fix fill status rwlock imbalance After @blk_fill_in_prog_lock is acquired there is an early out vnet situation that can occur. In that case, the rwlock needs to be released. Also, since @blk_fill_in_prog_lock is only acquired when @tp_version is exactly TPACKET_V3, only release it on that exact condition as well. And finally, add sparse annotation so that it is clearer that prb_fill_curr_block() and prb_clear_blk_fill_status() are acquiring and releasing @blk_fill_in_prog_lock, respectively. sparse is still unable to understand the balance, but the warnings are now on a higher level that make more sense. Fixes: 632ca50f2cbd ("af_packet: TPACKET_V3: replace busy-wait loop") Signed-off-by: John Ogness Reported-by: kernel test robot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0b8160d1a6e0..479c257ded73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -941,6 +941,7 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) + __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); @@ -989,6 +990,7 @@ static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) + __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; @@ -2286,8 +2288,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) + vio_le(), true, 0)) { + if (po->tp_version == TPACKET_V3) + prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; + } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); @@ -2393,7 +2398,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); - } else { + } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } -- cgit From 1f3a090b9033f69de380c03db3ea1a1015c850cf Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 12 Aug 2020 17:56:39 +0800 Subject: net: openvswitch: introduce common code for flushing flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid some issues, for example RCU usage warning and double free, we should flush the flows under ovs_lock. This patch refactors table_instance_destroy and introduces table_instance_flow_flush which can be invoked by __dp_destroy or ovs_flow_tbl_flush. Fixes: 50b0e61b32ee ("net: openvswitch: fix possible memleak on destroy flow-table") Reported-by: Johan Knöös Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-August/050489.html Signed-off-by: Tonghao Zhang Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 10 +++++++++- net/openvswitch/flow_table.c | 35 +++++++++++++++-------------------- net/openvswitch/flow_table.h | 3 +++ 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 42f8cc70bb2c..6e47ef7ef036 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1756,6 +1756,7 @@ err: /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { + struct flow_table *table = &dp->table; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { @@ -1774,7 +1775,14 @@ static void __dp_destroy(struct datapath *dp) */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); - /* RCU destroy the flow table */ + /* Flush sw_flow in the tables. RCU cb only releases resource + * such as dp, ports and tables. That may avoid some issues + * such as RCU usage warning. + */ + table_instance_flow_flush(table, ovsl_dereference(table->ti), + ovsl_dereference(table->ufid_ti)); + + /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8c12675cbb67..e2235849a57e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -473,19 +473,15 @@ static void table_instance_flow_free(struct flow_table *table, flow_mask_remove(table, flow->mask); } -static void table_instance_destroy(struct flow_table *table, - struct table_instance *ti, - struct table_instance *ufid_ti, - bool deferred) +/* Must be called with OVS mutex held. */ +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti) { int i; - if (!ti) - return; - - BUG_ON(!ufid_ti); if (ti->keep_flows) - goto skip_flows; + return; for (i = 0; i < ti->n_buckets; i++) { struct sw_flow *flow; @@ -497,18 +493,16 @@ static void table_instance_destroy(struct flow_table *table, table_instance_flow_free(table, ti, ufid_ti, flow, false); - ovs_flow_free(flow, deferred); + ovs_flow_free(flow, true); } } +} -skip_flows: - if (deferred) { - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); - } else { - __table_instance_destroy(ti); - __table_instance_destroy(ufid_ti); - } +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti) +{ + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); } /* No need for locking this function is called from RCU callback or @@ -523,7 +517,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); - table_instance_destroy(table, ti, ufid_ti, false); + table_instance_destroy(ti, ufid_ti); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -641,7 +635,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); + table_instance_destroy(old_ti, old_ufid_ti); return 0; err_free_ti: diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 74ce48fecba9..6e7d4ac59353 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -105,5 +105,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); void ovs_flow_masks_rebalance(struct flow_table *table); +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti); #endif /* flow_table.h */ -- cgit From 227c0c9673d86732995474d277f84e08ee763e46 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Aug 2020 11:51:40 -0600 Subject: io_uring: internally retry short reads We've had a few application cases of not handling short reads properly, and it is understandable as short reads aren't really expected if the application isn't doing non-blocking IO. Now that we retain the iov_iter over retries, we can implement internal retry pretty trivially. This ensures that we don't return a short read, even for buffered reads on page cache conflicts. Cleanup the deep nesting and hard to read nature of io_read() as well, it's much more straight forward now to read and understand. Added a few comments explaining the logic as well. Signed-off-by: Jens Axboe --- fs/io_uring.c | 109 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 39 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 584f86178671..7dd6df15bc49 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -510,6 +510,7 @@ struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; const struct iovec *free_iovec; struct iov_iter iter; + size_t bytes_done; struct wait_page_queue wpq; }; @@ -916,7 +917,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, bool needs_lock); static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, - struct iov_iter *iter); + struct iov_iter *iter, bool force); static struct kmem_cache *req_cachep; @@ -2298,7 +2299,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ret = io_import_iovec(rw, req, &iovec, &iter, false); if (ret < 0) goto end_req; - ret = io_setup_async_rw(req, iovec, inline_vecs, &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); if (!ret) return true; kfree(iovec); @@ -2588,6 +2589,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); + /* add previously done IO, if any */ + if (req->io && req->io->rw.bytes_done > 0) { + if (ret < 0) + ret = req->io->rw.bytes_done; + else + ret += req->io->rw.bytes_done; + } + if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; if (ret >= 0 && kiocb->ki_complete == io_complete_rw) @@ -2935,6 +2944,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, memcpy(&rw->iter, iter, sizeof(*iter)); rw->free_iovec = NULL; + rw->bytes_done = 0; /* can only be fixed buffers, no need to do anything */ if (iter->type == ITER_BVEC) return; @@ -2971,9 +2981,9 @@ static int io_alloc_async_ctx(struct io_kiocb *req) static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, - struct iov_iter *iter) + struct iov_iter *iter, bool force) { - if (!io_op_defs[req->opcode].async_ctx) + if (!force && !io_op_defs[req->opcode].async_ctx) return 0; if (!req->io) { if (__io_alloc_async_ctx(req)) @@ -3097,8 +3107,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ -static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter) +static bool io_rw_should_retry(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; int ret; @@ -3107,8 +3116,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (req->flags & REQ_F_NOWAIT) return false; - /* already tried, or we're doing O_DIRECT */ - if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ)) + /* Only for buffered IO */ + if (kiocb->ki_flags & IOCB_DIRECT) return false; /* * just use poll if we can, and don't attempt if the fs doesn't @@ -3117,16 +3126,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) return false; - /* - * If request type doesn't require req->io to defer in general, - * we need to allocate it here - */ - if (!req->io) { - if (__io_alloc_async_ctx(req)) - return false; - io_req_map_rw(req, iovec, fast_iov, iter); - } - ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, io_async_buf_func, req); if (!ret) { @@ -3153,8 +3152,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; + ssize_t io_size, ret, ret2; size_t iov_count; - ssize_t io_size, ret, ret2 = 0; if (req->io) iter = &req->io->rw.iter; @@ -3164,6 +3163,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, return ret; io_size = ret; req->result = io_size; + ret = 0; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3178,31 +3178,62 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (unlikely(ret)) goto out_free; - ret2 = io_iter_do_read(req, iter); + ret = io_iter_do_read(req, iter); - /* Catch -EAGAIN return for forced non-blocking submission */ - if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { - kiocb_done(kiocb, ret2, cs); - } else { -copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + if (!ret) { + goto done; + } else if (ret == -EIOCBQUEUED) { + ret = 0; + goto out_free; + } else if (ret == -EAGAIN) { + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (ret) goto out_free; - /* it's copied and will be cleaned with ->io */ - iovec = NULL; - /* if we can retry, do so with the callbacks armed */ - if (io_rw_should_retry(req, iovec, inline_vecs, iter)) { - ret2 = io_iter_do_read(req, iter); - if (ret2 == -EIOCBQUEUED) { - goto out_free; - } else if (ret2 != -EAGAIN) { - kiocb_done(kiocb, ret2, cs); - goto out_free; - } - } + return -EAGAIN; + } else if (ret < 0) { + goto out_free; + } + + /* read it all, or we did blocking attempt. no retry. */ + if (!iov_iter_count(iter) || !force_nonblock) + goto done; + + io_size -= ret; +copy_iov: + ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); + if (ret2) { + ret = ret2; + goto out_free; + } + /* it's copied and will be cleaned with ->io */ + iovec = NULL; + /* now use our persistent iterator, if we aren't already */ + iter = &req->io->rw.iter; +retry: + req->io->rw.bytes_done += ret; + /* if we can retry, do so with the callbacks armed */ + if (!io_rw_should_retry(req)) { kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; } + + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If we + * get -EIOCBQUEUED, then we'll get a notification when the desired + * page gets unlocked. We can also get a partial read here, and if we + * do, then just retry at the new offset. + */ + ret = io_iter_do_read(req, iter); + if (ret == -EIOCBQUEUED) { + ret = 0; + goto out_free; + } else if (ret > 0 && ret < io_size) { + /* we got some bytes, but not all. retry. */ + goto retry; + } +done: + kiocb_done(kiocb, ret, cs); + ret = 0; out_free: if (iovec) kfree(iovec); @@ -3295,7 +3326,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb_done(kiocb, ret2, cs); } else { copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (!ret) return -EAGAIN; } -- cgit From c8c412f976124d85b8ded85c6ac3f760c12b63a3 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 13 Aug 2020 12:38:08 -0500 Subject: SMB3: Fix mkdir when idsfromsid configured on mount mkdir uses a compounded create operation which was not setting the security descriptor on create of a directory. Fix so mkdir now sets the mode and owner info properly when idsfromsid and modefromsid are configured on the mount. Signed-off-by: Steve French CC: Stable # v5.8 Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index b9db73687eaa..eba01d0908dd 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -115,6 +115,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, vars->oparms.fid = &fid; vars->oparms.reconnect = false; vars->oparms.mode = mode; + vars->oparms.cifs_sb = cifs_sb; rqst[num_rqst].rq_iov = &vars->open_iov[0]; rqst[num_rqst].rq_nvec = SMB2_CREATE_IOV_SIZE; -- cgit From f70f74d15ca80d73eca6d5a731257627fb6370c5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 17 Jun 2020 12:02:19 +0900 Subject: kconfig: remove '---help---' support The conversion is done. No more user of '---help---'. Cc: Ulf Magnusson Signed-off-by: Masahiro Yamada --- scripts/checkkconfigsymbols.py | 2 +- scripts/kconfig/lexer.l | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index 00a10a293f4f..1548f9ce4682 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -34,7 +34,7 @@ REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) REGEX_KCONFIG_DEF = re.compile(DEF) REGEX_KCONFIG_EXPR = re.compile(EXPR) REGEX_KCONFIG_STMT = re.compile(STMT) -REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") +REGEX_KCONFIG_HELP = re.compile(r"^\s+help\s*$") REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") REGEX_QUOTES = re.compile("(\"(.*?)\")") diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index 6354c905b006..4b7339ff4c8b 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -105,7 +105,7 @@ n [A-Za-z0-9_-] "endchoice" return T_ENDCHOICE; "endif" return T_ENDIF; "endmenu" return T_ENDMENU; -"help"|"---help---" return T_HELP; +"help" return T_HELP; "hex" return T_HEX; "if" return T_IF; "imply" return T_IMPLY; -- cgit From 22a4ac026c15eba961883ed8466cb341e0447de1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 17 Jun 2020 12:02:20 +0900 Subject: Revert "checkpatch: kconfig: prefer 'help' over '---help---'" This reverts commit 84af7a6194e493fae312a2b7fa5a3b51f76d9282. The conversion is done. Cc: Ulf Magnusson Signed-off-by: Masahiro Yamada --- scripts/checkpatch.pl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4c820607540b..1351e052eb99 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3042,11 +3042,7 @@ sub process { if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) { $is_start = 1; - } elsif ($lines[$ln - 1] =~ /^\+\s*(?:help|---help---)\s*$/) { - if ($lines[$ln - 1] =~ "---help---") { - WARN("CONFIG_DESCRIPTION", - "prefer 'help' over '---help---' for new help texts\n" . $herecurr); - } + } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) { $length = -1; } -- cgit From 8410e6559412c95484b6e6d28c76cea57f753821 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 18 Jul 2020 15:38:47 +0900 Subject: kconfig: constify XPM data Constify arrays as well as strings. Signed-off-by: Masahiro Yamada --- scripts/kconfig/images.c | 30 +++++++++++++++--------------- scripts/kconfig/images.h | 30 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/scripts/kconfig/images.c b/scripts/kconfig/images.c index b4fa0e4a63a5..2f9afffa5d79 100644 --- a/scripts/kconfig/images.c +++ b/scripts/kconfig/images.c @@ -5,7 +5,7 @@ #include "images.h" -const char *xpm_load[] = { +const char * const xpm_load[] = { "22 22 5 1", ". c None", "# c #000000", @@ -35,7 +35,7 @@ const char *xpm_load[] = { "###############.......", "......................"}; -const char *xpm_save[] = { +const char * const xpm_save[] = { "22 22 5 1", ". c None", "# c #000000", @@ -65,7 +65,7 @@ const char *xpm_save[] = { "..##################..", "......................"}; -const char *xpm_back[] = { +const char * const xpm_back[] = { "22 22 3 1", ". c None", "# c #000083", @@ -93,7 +93,7 @@ const char *xpm_back[] = { "......................", "......................"}; -const char *xpm_tree_view[] = { +const char * const xpm_tree_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -120,7 +120,7 @@ const char *xpm_tree_view[] = { "......................", "......................"}; -const char *xpm_single_view[] = { +const char * const xpm_single_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -147,7 +147,7 @@ const char *xpm_single_view[] = { "......................", "......................"}; -const char *xpm_split_view[] = { +const char * const xpm_split_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -174,7 +174,7 @@ const char *xpm_split_view[] = { "......................", "......................"}; -const char *xpm_symbol_no[] = { +const char * const xpm_symbol_no[] = { "12 12 2 1", " c white", ". c black", @@ -191,7 +191,7 @@ const char *xpm_symbol_no[] = { " .......... ", " "}; -const char *xpm_symbol_mod[] = { +const char * const xpm_symbol_mod[] = { "12 12 2 1", " c white", ". c black", @@ -208,7 +208,7 @@ const char *xpm_symbol_mod[] = { " .......... ", " "}; -const char *xpm_symbol_yes[] = { +const char * const xpm_symbol_yes[] = { "12 12 2 1", " c white", ". c black", @@ -225,7 +225,7 @@ const char *xpm_symbol_yes[] = { " .......... ", " "}; -const char *xpm_choice_no[] = { +const char * const xpm_choice_no[] = { "12 12 2 1", " c white", ". c black", @@ -242,7 +242,7 @@ const char *xpm_choice_no[] = { " .... ", " "}; -const char *xpm_choice_yes[] = { +const char * const xpm_choice_yes[] = { "12 12 2 1", " c white", ". c black", @@ -259,7 +259,7 @@ const char *xpm_choice_yes[] = { " .... ", " "}; -const char *xpm_menu[] = { +const char * const xpm_menu[] = { "12 12 2 1", " c white", ". c black", @@ -276,7 +276,7 @@ const char *xpm_menu[] = { " .......... ", " "}; -const char *xpm_menu_inv[] = { +const char * const xpm_menu_inv[] = { "12 12 2 1", " c white", ". c black", @@ -293,7 +293,7 @@ const char *xpm_menu_inv[] = { " .......... ", " "}; -const char *xpm_menuback[] = { +const char * const xpm_menuback[] = { "12 12 2 1", " c white", ". c black", @@ -310,7 +310,7 @@ const char *xpm_menuback[] = { " .......... ", " "}; -const char *xpm_void[] = { +const char * const xpm_void[] = { "12 12 2 1", " c white", ". c black", diff --git a/scripts/kconfig/images.h b/scripts/kconfig/images.h index d8ff614bd087..7212dec2006c 100644 --- a/scripts/kconfig/images.h +++ b/scripts/kconfig/images.h @@ -10,21 +10,21 @@ extern "C" { #endif -extern const char *xpm_load[]; -extern const char *xpm_save[]; -extern const char *xpm_back[]; -extern const char *xpm_tree_view[]; -extern const char *xpm_single_view[]; -extern const char *xpm_split_view[]; -extern const char *xpm_symbol_no[]; -extern const char *xpm_symbol_mod[]; -extern const char *xpm_symbol_yes[]; -extern const char *xpm_choice_no[]; -extern const char *xpm_choice_yes[]; -extern const char *xpm_menu[]; -extern const char *xpm_menu_inv[]; -extern const char *xpm_menuback[]; -extern const char *xpm_void[]; +extern const char * const xpm_load[]; +extern const char * const xpm_save[]; +extern const char * const xpm_back[]; +extern const char * const xpm_tree_view[]; +extern const char * const xpm_single_view[]; +extern const char * const xpm_split_view[]; +extern const char * const xpm_symbol_no[]; +extern const char * const xpm_symbol_mod[]; +extern const char * const xpm_symbol_yes[]; +extern const char * const xpm_choice_no[]; +extern const char * const xpm_choice_yes[]; +extern const char * const xpm_menu[]; +extern const char * const xpm_menu_inv[]; +extern const char * const xpm_menuback[]; +extern const char * const xpm_void[]; #ifdef __cplusplus } -- cgit From 7eb7c106f1538e29b5bab588b072faa7ce65fe7b Mon Sep 17 00:00:00 2001 From: Maxime Chretien Date: Wed, 8 Jul 2020 15:32:15 +0200 Subject: kconfig: qconf: Fix mouse events in search view On menu properties mouse events didn't do anything in search view (listMode). As there are no menus in listMode we can add an exception in tests to always change the value on mouse events if we are in listMode. Signed-off-by: Maxime Chretien Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 23d1cb01a41a..fdef05c62237 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -785,7 +785,8 @@ void ConfigList::mouseReleaseEvent(QMouseEvent* e) break; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; if (ptype == P_MENU && rootEntry != menu && - mode != fullMode && mode != menuMode) + mode != fullMode && mode != menuMode && + mode != listMode) emit menuSelected(menu); else changeValue(item); @@ -835,7 +836,7 @@ void ConfigList::mouseDoubleClickEvent(QMouseEvent* e) if (!menu) goto skip; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - if (ptype == P_MENU) { + if (ptype == P_MENU && mode != listMode) { if (mode == singleMode) emit itemSelected(menu); else if (mode == symbolMode) -- cgit From d41809ff7af91b3f8ed4abe1cb36e69723af00d8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 29 Jul 2020 12:18:37 +0900 Subject: kconfig: add 'static' to some file-local data Fix some warnings from sparce like follows: warning: symbol '...' was not declared. Should it be static? Signed-off-by: Masahiro Yamada --- scripts/kconfig/lexer.l | 2 +- scripts/kconfig/symbol.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index 4b7339ff4c8b..240109f965ae 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -36,7 +36,7 @@ struct buffer { YY_BUFFER_STATE state; }; -struct buffer *current_buf; +static struct buffer *current_buf; static int last_ts, first_ts; diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 9363e37b8870..ffa3ec65cc90 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -15,15 +15,21 @@ struct symbol symbol_yes = { .name = "y", .curr = { "y", yes }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_mod = { +}; + +struct symbol symbol_mod = { .name = "m", .curr = { "m", mod }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_no = { +}; + +struct symbol symbol_no = { .name = "n", .curr = { "n", no }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_empty = { +}; + +static struct symbol symbol_empty = { .name = "", .curr = { "", no }, .flags = SYMBOL_VALID, @@ -31,7 +37,7 @@ struct symbol symbol_yes = { struct symbol *sym_defconfig_list; struct symbol *modules_sym; -tristate modules_val; +static tristate modules_val; enum symbol_type sym_get_type(struct symbol *sym) { -- cgit From 614bf0a89ae215beafe15d827a9cd983277a9b90 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:51 +0900 Subject: kconfig: qconf: remove ->addSeparator() to menuBar I do not understand the purpose of this ->addSeparator(). It does not make any difference. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index fdef05c62237..cf2e06ebc4a8 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1541,7 +1541,6 @@ ConfigMainWindow::ConfigMainWindow(void) optionMenu->addAction(showDebugAction); // create help menu - menu->addSeparator(); QMenu* helpMenu = menu->addMenu("&Help"); helpMenu->addAction(showIntroAction); helpMenu->addAction(showAboutAction); -- cgit From d4b1cea7de0c7472c6f3eb1740fc53af6ee9ded5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:52 +0900 Subject: kconfig: qconf: do not use 'menu' variable for (QMenuBar *) I think it is a bit confusing to use 'menu' to hold a QMenuBar pointer. I want to use 'menu' for a QMenu pointer. You do not need to use a local variable here. Use menuBar() directly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index cf2e06ebc4a8..79275c44d7be 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1382,7 +1382,6 @@ void ConfigSearchWindow::search(void) ConfigMainWindow::ConfigMainWindow(void) : searchWindow(0) { - QMenuBar* menu; bool ok = true; QVariant x, y; int width, height; @@ -1433,7 +1432,6 @@ ConfigMainWindow::ConfigMainWindow(void) setTabOrder(configList, helpText); configList->setFocus(); - menu = menuBar(); toolBar = new QToolBar("Tools", this); addToolBar(toolBar); @@ -1519,7 +1517,7 @@ ConfigMainWindow::ConfigMainWindow(void) toolBar->addAction(fullViewAction); // create config menu - QMenu* config = menu->addMenu("&File"); + QMenu* config = menuBar()->addMenu("&File"); config->addAction(loadAction); config->addAction(saveAction); config->addAction(saveAsAction); @@ -1527,12 +1525,13 @@ ConfigMainWindow::ConfigMainWindow(void) config->addAction(quitAction); // create edit menu - QMenu* editMenu = menu->addMenu("&Edit"); + QMenu* editMenu = menuBar()->addMenu("&Edit"); editMenu->addAction(searchAction); // create options menu - QMenu* optionMenu = menu->addMenu("&Option"); + QMenu* optionMenu = menuBar()->addMenu("&Option"); optionMenu->addAction(showNameAction); + optionMenu->addAction(showRangeAction); optionMenu->addAction(showDataAction); optionMenu->addSeparator(); @@ -1541,7 +1540,7 @@ ConfigMainWindow::ConfigMainWindow(void) optionMenu->addAction(showDebugAction); // create help menu - QMenu* helpMenu = menu->addMenu("&Help"); + QMenu* helpMenu = menuBar()->addMenu("&Help"); helpMenu->addAction(showIntroAction); helpMenu->addAction(showAboutAction); -- cgit From 93ebaacdc59bebbe99c79b69f16bfe5408d30f23 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:53 +0900 Subject: kconfig: qconf: use 'menu' variable for (QMenu *) The variable 'config' for the file menu is inconsistent. You do not need to use different variables. Use 'menu' for every menu. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 79275c44d7be..e14b2b974199 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1516,33 +1516,32 @@ ConfigMainWindow::ConfigMainWindow(void) toolBar->addAction(splitViewAction); toolBar->addAction(fullViewAction); - // create config menu - QMenu* config = menuBar()->addMenu("&File"); - config->addAction(loadAction); - config->addAction(saveAction); - config->addAction(saveAsAction); - config->addSeparator(); - config->addAction(quitAction); + // create file menu + QMenu *menu = menuBar()->addMenu("&File"); + menu->addAction(loadAction); + menu->addAction(saveAction); + menu->addAction(saveAsAction); + menu->addSeparator(); + menu->addAction(quitAction); // create edit menu - QMenu* editMenu = menuBar()->addMenu("&Edit"); - editMenu->addAction(searchAction); + menu = menuBar()->addMenu("&Edit"); + menu->addAction(searchAction); // create options menu - QMenu* optionMenu = menuBar()->addMenu("&Option"); - optionMenu->addAction(showNameAction); - - optionMenu->addAction(showRangeAction); - optionMenu->addAction(showDataAction); - optionMenu->addSeparator(); - optionMenu->addActions(optGroup->actions()); - optionMenu->addSeparator(); - optionMenu->addAction(showDebugAction); + menu = menuBar()->addMenu("&Option"); + menu->addAction(showNameAction); + menu->addAction(showRangeAction); + menu->addAction(showDataAction); + menu->addSeparator(); + menu->addActions(optGroup->actions()); + menu->addSeparator(); + menu->addAction(showDebugAction); // create help menu - QMenu* helpMenu = menuBar()->addMenu("&Help"); - helpMenu->addAction(showIntroAction); - helpMenu->addAction(showAboutAction); + menu = menuBar()->addMenu("&Help"); + menu->addAction(showIntroAction); + menu->addAction(showAboutAction); connect (helpText, SIGNAL (anchorClicked (const QUrl &)), helpText, SLOT (clicked (const QUrl &)) ); -- cgit From 2afb3e26828a2f97e98537f86d3b747a28847ddd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:54 +0900 Subject: kconfig: qconf: remove toolBar from ConfigMainWindow members This pointer is only used in the ConfigMainWindow constructor. Drop it from the private members. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 2 +- scripts/kconfig/qconf.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index e14b2b974199..f2f5875979fe 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1432,7 +1432,7 @@ ConfigMainWindow::ConfigMainWindow(void) setTabOrder(configList, helpText); configList->setFocus(); - toolBar = new QToolBar("Tools", this); + QToolBar *toolBar = new QToolBar("Tools", this); addToolBar(toolBar); backAction = new QAction(QPixmap(xpm_back), "Back", this); diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 5eeab4a8bb43..b80683998635 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -326,7 +326,6 @@ protected: ConfigView *configView; ConfigList *configList; ConfigInfoView *helpText; - QToolBar *toolBar; QAction *backAction; QAction *singleViewAction; QAction *splitViewAction; -- cgit From 860ec3fbcaeba7582f3b65e2f2e805187bc78015 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:55 +0900 Subject: kconfig: qconf: overload addToolBar() to create and insert toolbar Use the overloaded function, addToolBar(const QString &title) to create a QToolBar object, setting its window title, and inserts it into the toolbar area. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index f2f5875979fe..b44058913b97 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1432,9 +1432,6 @@ ConfigMainWindow::ConfigMainWindow(void) setTabOrder(configList, helpText); configList->setFocus(); - QToolBar *toolBar = new QToolBar("Tools", this); - addToolBar(toolBar); - backAction = new QAction(QPixmap(xpm_back), "Back", this); connect(backAction, SIGNAL(triggered(bool)), SLOT(goBack())); @@ -1507,6 +1504,7 @@ ConfigMainWindow::ConfigMainWindow(void) connect(showAboutAction, SIGNAL(triggered(bool)), SLOT(showAbout())); // init tool bar + QToolBar *toolBar = addToolBar("Tools"); toolBar->addAction(backAction); toolBar->addSeparator(); toolBar->addAction(loadAction); -- cgit From 1b4263e7507f5b9fd2f1f675f712f95c1394a18f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:56 +0900 Subject: kconfig: qconf: remove unused ConfigList::listView() I do not know how this function can be useful. In fact, it is unsed. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index b80683998635..6e6bb0a96348 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -88,10 +88,6 @@ public: updateList(NULL); updateAll = false; } - ConfigList* listView() - { - return this; - } void addColumn(colIdx idx) { showColumn(idx); -- cgit From 740fdef8534f17184f0d57390be83a8a37e897f9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:57 +0900 Subject: kconfig: qconf: remove name from ConfigSearchWindow constructor This constructor is only called with "search" as the second argument. Hard-code the name in the constructor, and drop it from the function argument. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 44 +++++++++++++++++++++----------------------- scripts/kconfig/qconf.h | 2 +- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index b44058913b97..7412d5f14a4c 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1288,10 +1288,10 @@ void ConfigInfoView::contextMenuEvent(QContextMenuEvent *e) Parent::contextMenuEvent(e); } -ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *name) +ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) : Parent(parent), result(NULL) { - setObjectName(name); + setObjectName("search"); setWindowTitle("Search Config"); QVBoxLayout* layout1 = new QVBoxLayout(this); @@ -1312,9 +1312,9 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *nam split = new QSplitter(this); split->setOrientation(Qt::Vertical); - list = new ConfigView(split, name); + list = new ConfigView(split, "search"); list->list->mode = listMode; - info = new ConfigInfoView(split, name); + info = new ConfigInfoView(split, "search"); connect(list->list, SIGNAL(menuChanged(struct menu *)), info, SLOT(setInfo(struct menu *))); connect(list->list, SIGNAL(menuChanged(struct menu *)), @@ -1322,25 +1322,23 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *nam layout1->addWidget(split); - if (name) { - QVariant x, y; - int width, height; - bool ok; + QVariant x, y; + int width, height; + bool ok; - configSettings->beginGroup(name); - width = configSettings->value("/window width", parent->width() / 2).toInt(); - height = configSettings->value("/window height", parent->height() / 2).toInt(); - resize(width, height); - x = configSettings->value("/window x"); - y = configSettings->value("/window y"); - if ((x.isValid())&&(y.isValid())) - move(x.toInt(), y.toInt()); - QList sizes = configSettings->readSizes("/split", &ok); - if (ok) - split->setSizes(sizes); - configSettings->endGroup(); - connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); - } + configSettings->beginGroup("search"); + width = configSettings->value("/window width", parent->width() / 2).toInt(); + height = configSettings->value("/window height", parent->height() / 2).toInt(); + resize(width, height); + x = configSettings->value("/window x"); + y = configSettings->value("/window y"); + if (x.isValid() && y.isValid()) + move(x.toInt(), y.toInt()); + QList sizes = configSettings->readSizes("/split", &ok); + if (ok) + split->setSizes(sizes); + configSettings->endGroup(); + connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } void ConfigSearchWindow::saveSettings(void) @@ -1642,7 +1640,7 @@ void ConfigMainWindow::saveConfigAs(void) void ConfigMainWindow::searchConfig(void) { if (!searchWindow) - searchWindow = new ConfigSearchWindow(this, "search"); + searchWindow = new ConfigSearchWindow(this); searchWindow->show(); } diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 6e6bb0a96348..335f0776984f 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -272,7 +272,7 @@ class ConfigSearchWindow : public QDialog { Q_OBJECT typedef class QDialog Parent; public: - ConfigSearchWindow(ConfigMainWindow* parent, const char *name = 0); + ConfigSearchWindow(ConfigMainWindow *parent); public slots: void saveSettings(void); -- cgit From 92641154bf6f138c37bae31cf735359f703749cc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:58 +0900 Subject: kconfig: qconf: omit parent to QHBoxLayout() Instead of passing 0 (i.e. nullptr), leave it empty. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 7412d5f14a4c..5ffe06658c18 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1297,7 +1297,8 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) QVBoxLayout* layout1 = new QVBoxLayout(this); layout1->setContentsMargins(11, 11, 11, 11); layout1->setSpacing(6); - QHBoxLayout* layout2 = new QHBoxLayout(0); + + QHBoxLayout* layout2 = new QHBoxLayout(); layout2->setContentsMargins(0, 0, 0, 0); layout2->setSpacing(6); layout2->addWidget(new QLabel("Find:", this)); -- cgit From cb77043f061a9ec7824784d18a0c484fbfe97059 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:18:59 +0900 Subject: kconfig: qconf: remove unused argument from ConfigList::updateList() This function allocates 'item' before using it, so the argument 'item' is always shadowed. Remove the meaningless argument. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 6 +++--- scripts/kconfig/qconf.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 5ffe06658c18..1640746cc37a 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -404,15 +404,15 @@ void ConfigList::updateSelection(void) emit menuSelected(menu); } -void ConfigList::updateList(ConfigItem* item) +void ConfigList::updateList() { ConfigItem* last = 0; + ConfigItem *item; if (!rootEntry) { if (mode != listMode) goto update; QTreeWidgetItemIterator it(this); - ConfigItem* item; while (*it) { item = (ConfigItem*)(*it); @@ -990,7 +990,7 @@ void ConfigView::updateList(ConfigItem* item) ConfigView* v; for (v = viewList; v; v = v->nextView) - v->list->updateList(item); + v->list->updateList(); } void ConfigView::updateListAll(void) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 335f0776984f..4dc5d34a6bca 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -69,7 +69,7 @@ protected: public slots: void setRootMenu(struct menu *menu); - void updateList(ConfigItem *item); + void updateList(); void setValue(ConfigItem* item, tristate val); void changeValue(ConfigItem* item); void updateSelection(void); @@ -85,7 +85,7 @@ public: void updateListAll(void) { updateAll = true; - updateList(NULL); + updateList(); updateAll = false; } void addColumn(colIdx idx) -- cgit From 1031685c5ec86cc84ccaea5825566c4ed08c66b7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:00 +0900 Subject: kconfig: qconf: remove unused argument from ConfigView::updateList() Now that ConfigList::updateList() takes no argument, the 'item' argument ConfigView::updateList() is no longer used. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 8 ++++---- scripts/kconfig/qconf.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 1640746cc37a..347e89874fa1 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -280,7 +280,7 @@ void ConfigLineEdit::keyPressEvent(QKeyEvent* e) case Qt::Key_Return: case Qt::Key_Enter: sym_set_string_value(item->menu->sym, text().toLatin1()); - parent()->updateList(item); + parent()->updateList(); break; default: Parent::keyPressEvent(e); @@ -471,7 +471,7 @@ void ConfigList::setValue(ConfigItem* item, tristate val) return; if (oldval == no && item->menu->list) item->setExpanded(true); - parent()->updateList(item); + parent()->updateList(); break; } } @@ -505,7 +505,7 @@ void ConfigList::changeValue(ConfigItem* item) item->setExpanded(true); } if (oldexpr != newexpr) - parent()->updateList(item); + parent()->updateList(); break; case S_INT: case S_HEX: @@ -985,7 +985,7 @@ void ConfigList::setAllOpen(bool open) } } -void ConfigView::updateList(ConfigItem* item) +void ConfigView::updateList() { ConfigView* v; diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 4dc5d34a6bca..6d06ec399ff0 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -210,7 +210,7 @@ class ConfigView : public QWidget { public: ConfigView(QWidget* parent, const char *name = 0); ~ConfigView(void); - static void updateList(ConfigItem* item); + static void updateList(); static void updateListAll(void); bool showName(void) const { return list->showName; } -- cgit From 5b75a6c896bc858cd36eb7be5fbc23318cf4205c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:01 +0900 Subject: kconfig: qconf: remove 'parent' from ConfigList::updateMenuList() All the call-sites of this function pass 'this' to the first argument. So, 'parent' is always the 'this' pointer. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 18 +++++++++--------- scripts/kconfig/qconf.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 347e89874fa1..506b1639a780 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -446,7 +446,7 @@ void ConfigList::updateList() return; } update: - updateMenuList(this, rootEntry); + updateMenuList(rootEntry); update(); resizeColumnToContents(0); } @@ -524,7 +524,7 @@ void ConfigList::setRootMenu(struct menu *menu) type = menu && menu->prompt ? menu->prompt->type : P_UNKNOWN; if (type != P_MENU) return; - updateMenuList(this, 0); + updateMenuList(0); rootEntry = menu; updateListAll(); if (currentItem()) { @@ -628,7 +628,7 @@ hide: } } -void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) +void ConfigList::updateMenuList(struct menu *menu) { struct menu* child; ConfigItem* item; @@ -637,19 +637,19 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) enum prop_type type; if (!menu) { - while (parent->topLevelItemCount() > 0) + while (topLevelItemCount() > 0) { - delete parent->takeTopLevelItem(0); + delete takeTopLevelItem(0); } return; } - last = (ConfigItem*)parent->topLevelItem(0); + last = (ConfigItem *)topLevelItem(0); if (last && !last->goParent) last = 0; for (child = menu->list; child; child = child->next) { - item = last ? last->nextSibling() : (ConfigItem*)parent->topLevelItem(0); + item = last ? last->nextSibling() : (ConfigItem *)topLevelItem(0); type = child->prompt ? child->prompt->type : P_UNKNOWN; switch (mode) { @@ -670,7 +670,7 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) if (!child->sym && !child->list && !child->prompt) continue; if (!item || item->menu != child) - item = new ConfigItem(parent, last, child, visible); + item = new ConfigItem(this, last, child, visible); else item->testUpdateMenu(visible); @@ -683,7 +683,7 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) } hide: if (item && item->menu == child) { - last = (ConfigItem*)parent->topLevelItem(0); + last = (ConfigItem *)topLevelItem(0); if (last == item) last = 0; else while (last->nextSibling() != item) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 6d06ec399ff0..952bd98d7912 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -102,7 +102,7 @@ public: bool menuSkip(struct menu *); void updateMenuList(ConfigItem *parent, struct menu*); - void updateMenuList(ConfigList *parent, struct menu*); + void updateMenuList(struct menu *menu); bool updateAll; -- cgit From 3c73ff040ed54b7bda754f4b016d7de31cb85207 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:02 +0900 Subject: kconfig: qconf: drop more localization code This is a remnant of commit 694c49a7c01c ("kconfig: drop localization support"). Get it back to the code prior to commit 3b9fa0931dd8 ("[PATCH] Kconfig i18n support"). Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 506b1639a780..d969beed287a 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -31,11 +31,6 @@ static ConfigSettings *configSettings; QAction *ConfigMainWindow::saveAction; -static inline QString qgettext(const char* str) -{ - return QString::fromLocal8Bit(str); -} - ConfigSettings::ConfigSettings() : QSettings("kernel.org", "qconf") { @@ -109,7 +104,7 @@ void ConfigItem::updateMenu(void) sym = menu->sym; prop = menu->prompt; - prompt = qgettext(menu_get_prompt(menu)); + prompt = menu_get_prompt(menu); if (prop) switch (prop->type) { case P_MENU: @@ -135,7 +130,7 @@ void ConfigItem::updateMenu(void) if (!sym) goto set_prompt; - setText(nameColIdx, QString::fromLocal8Bit(sym->name)); + setText(nameColIdx, sym->name); type = sym_get_type(sym); switch (type) { @@ -265,7 +260,7 @@ void ConfigLineEdit::show(ConfigItem* i) { item = i; if (sym_get_string_value(item->menu->sym)) - setText(QString::fromLocal8Bit(sym_get_string_value(item->menu->sym))); + setText(sym_get_string_value(item->menu->sym)); else setText(QString()); Parent::show(); -- cgit From 711b875bcdbcd198cafbcf0bd87562f9c3725278 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:03 +0900 Subject: kconfig: qconf: remove ConfigItem::pixmap/setPixmap Use QTreeWidgetItem::icon/setIcon directly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 22 +++++++++++----------- scripts/kconfig/qconf.h | 8 -------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index d969beed287a..e58da0ab4309 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -97,7 +97,7 @@ void ConfigItem::updateMenu(void) list = listView(); if (goParent) { - setPixmap(promptColIdx, list->menuBackPix); + setIcon(promptColIdx, list->menuBackPix); prompt = ".."; goto set_prompt; } @@ -114,15 +114,15 @@ void ConfigItem::updateMenu(void) */ if (sym && list->rootEntry == menu) break; - setPixmap(promptColIdx, list->menuPix); + setIcon(promptColIdx, list->menuPix); } else { if (sym) break; - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); } goto set_prompt; case P_COMMENT: - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); goto set_prompt; default: ; @@ -139,7 +139,7 @@ void ConfigItem::updateMenu(void) char ch; if (!sym_is_changeable(sym) && list->optMode == normalOpt) { - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); setText(noColIdx, QString()); setText(modColIdx, QString()); setText(yesColIdx, QString()); @@ -149,22 +149,22 @@ void ConfigItem::updateMenu(void) switch (expr) { case yes: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setPixmap(promptColIdx, list->choiceYesPix); + setIcon(promptColIdx, list->choiceYesPix); else - setPixmap(promptColIdx, list->symbolYesPix); + setIcon(promptColIdx, list->symbolYesPix); setText(yesColIdx, "Y"); ch = 'Y'; break; case mod: - setPixmap(promptColIdx, list->symbolModPix); + setIcon(promptColIdx, list->symbolModPix); setText(modColIdx, "M"); ch = 'M'; break; default: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setPixmap(promptColIdx, list->choiceNoPix); + setIcon(promptColIdx, list->choiceNoPix); else - setPixmap(promptColIdx, list->symbolNoPix); + setIcon(promptColIdx, list->symbolNoPix); setText(noColIdx, "N"); ch = 'N'; break; @@ -769,7 +769,7 @@ void ConfigList::mouseReleaseEvent(QMouseEvent* e) idx = header()->logicalIndexAt(x); switch (idx) { case promptColIdx: - icon = item->pixmap(promptColIdx); + icon = item->icon(promptColIdx); if (!icon.isNull()) { int off = header()->sectionPosition(0) + visualRect(indexAt(p)).x() + 4; // 4 is Hardcoded image offset. There might be a way to do it properly. if (x >= off && x < off + icon.availableSizes().first().width()) { diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 952bd98d7912..5cda89a51740 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -172,14 +172,6 @@ public: { return Parent::text(idx); } - void setPixmap(colIdx idx, const QIcon &icon) - { - Parent::setIcon(idx, icon); - } - const QIcon pixmap(colIdx idx) const - { - return icon(idx); - } // TODO: Implement paintCell ConfigItem* nextItem; -- cgit From abf741a9fa4f9cee63f6c7bebc36bee44c6a8801 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:04 +0900 Subject: kconfig: qconf: remove ConfigList::addColumn/removeColumn Use QTreeView::showColumn/hideColumn directly. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 22 +++++++++++----------- scripts/kconfig/qconf.h | 8 -------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index e58da0ab4309..4341792d472b 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -317,7 +317,7 @@ ConfigList::ConfigList(ConfigView* p, const char *name) connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } - addColumn(promptColIdx); + showColumn(promptColIdx); reinit(); } @@ -335,21 +335,21 @@ bool ConfigList::menuSkip(struct menu *menu) void ConfigList::reinit(void) { - removeColumn(dataColIdx); - removeColumn(yesColIdx); - removeColumn(modColIdx); - removeColumn(noColIdx); - removeColumn(nameColIdx); + hideColumn(dataColIdx); + hideColumn(yesColIdx); + hideColumn(modColIdx); + hideColumn(noColIdx); + hideColumn(nameColIdx); if (showName) - addColumn(nameColIdx); + showColumn(nameColIdx); if (showRange) { - addColumn(noColIdx); - addColumn(modColIdx); - addColumn(yesColIdx); + showColumn(noColIdx); + showColumn(modColIdx); + showColumn(yesColIdx); } if (showData) - addColumn(dataColIdx); + showColumn(dataColIdx); updateListAll(); } diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 5cda89a51740..daa180bdb9b4 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -88,14 +88,6 @@ public: updateList(); updateAll = false; } - void addColumn(colIdx idx) - { - showColumn(idx); - } - void removeColumn(colIdx idx) - { - hideColumn(idx); - } void setAllOpen(bool open); void setParentMenu(void); -- cgit From 5ca534cdf97a6ef760b94a658cafc1e40c6e3e29 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:05 +0900 Subject: kconfig: qconf: remove ConfigItem::text/setText Use QTreeWidgetItem::text/setText directly Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index daa180bdb9b4..012414dcdee5 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -156,14 +156,6 @@ public: return ret; } - void setText(colIdx idx, const QString& text) - { - Parent::setText(idx, text); - } - QString text(colIdx idx) const - { - return Parent::text(idx); - } // TODO: Implement paintCell ConfigItem* nextItem; -- cgit From 4fa91f528f344a86cdef23b17d611b479421b2c8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:06 +0900 Subject: kconfig: qconf: remove unused voidPix, menuInvPix These are initialized, but not used by anyone. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 2 +- scripts/kconfig/qconf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 4341792d472b..a2cc25f036da 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -291,7 +291,7 @@ ConfigList::ConfigList(ConfigView* p, const char *name) updateAll(false), symbolYesPix(xpm_symbol_yes), symbolModPix(xpm_symbol_mod), symbolNoPix(xpm_symbol_no), choiceYesPix(xpm_choice_yes), choiceNoPix(xpm_choice_no), - menuPix(xpm_menu), menuInvPix(xpm_menu_inv), menuBackPix(xpm_menuback), voidPix(xpm_void), + menuPix(xpm_menu), menuBackPix(xpm_menuback), showName(false), showRange(false), showData(false), mode(singleMode), optMode(normalOpt), rootEntry(0), headerPopup(0) { diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 012414dcdee5..c46a79a69001 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -100,7 +100,7 @@ public: QPixmap symbolYesPix, symbolModPix, symbolNoPix; QPixmap choiceYesPix, choiceNoPix; - QPixmap menuPix, menuInvPix, menuBackPix, voidPix; + QPixmap menuPix, menuBackPix; bool showName, showRange, showData; enum listMode mode; -- cgit From 5cb255ffa1e7337669616188e1b2f4bd48305d34 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:07 +0900 Subject: kconfig: qconf: refactor icon setups These icon data are used by ConfigItem, but stored in each instance of ConfigView. There is no point to keep the same data in each of 3 instances, "menu", "config", and "search". Move the icon data to the more relevant ConfigItem class, and make them static members. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 33 +++++++++++++++++++++++---------- scripts/kconfig/qconf.h | 8 ++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index a2cc25f036da..bf471dc7ad70 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -74,6 +74,13 @@ bool ConfigSettings::writeSizes(const QString& key, const QList& value) return true; } +QIcon ConfigItem::symbolYesIcon; +QIcon ConfigItem::symbolModIcon; +QIcon ConfigItem::symbolNoIcon; +QIcon ConfigItem::choiceYesIcon; +QIcon ConfigItem::choiceNoIcon; +QIcon ConfigItem::menuIcon; +QIcon ConfigItem::menubackIcon; /* * set the new data @@ -97,7 +104,7 @@ void ConfigItem::updateMenu(void) list = listView(); if (goParent) { - setIcon(promptColIdx, list->menuBackPix); + setIcon(promptColIdx, menubackIcon); prompt = ".."; goto set_prompt; } @@ -114,7 +121,7 @@ void ConfigItem::updateMenu(void) */ if (sym && list->rootEntry == menu) break; - setIcon(promptColIdx, list->menuPix); + setIcon(promptColIdx, menuIcon); } else { if (sym) break; @@ -149,22 +156,22 @@ void ConfigItem::updateMenu(void) switch (expr) { case yes: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setIcon(promptColIdx, list->choiceYesPix); + setIcon(promptColIdx, choiceYesIcon); else - setIcon(promptColIdx, list->symbolYesPix); + setIcon(promptColIdx, symbolYesIcon); setText(yesColIdx, "Y"); ch = 'Y'; break; case mod: - setIcon(promptColIdx, list->symbolModPix); + setIcon(promptColIdx, symbolModIcon); setText(modColIdx, "M"); ch = 'M'; break; default: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setIcon(promptColIdx, list->choiceNoPix); + setIcon(promptColIdx, choiceNoIcon); else - setIcon(promptColIdx, list->symbolNoPix); + setIcon(promptColIdx, symbolNoIcon); setText(noColIdx, "N"); ch = 'N'; break; @@ -289,9 +296,6 @@ void ConfigLineEdit::keyPressEvent(QKeyEvent* e) ConfigList::ConfigList(ConfigView* p, const char *name) : Parent(p), updateAll(false), - symbolYesPix(xpm_symbol_yes), symbolModPix(xpm_symbol_mod), symbolNoPix(xpm_symbol_no), - choiceYesPix(xpm_choice_yes), choiceNoPix(xpm_choice_no), - menuPix(xpm_menu), menuBackPix(xpm_menuback), showName(false), showRange(false), showData(false), mode(singleMode), optMode(normalOpt), rootEntry(0), headerPopup(0) { @@ -1396,6 +1400,15 @@ ConfigMainWindow::ConfigMainWindow(void) if ((x.isValid())&&(y.isValid())) move(x.toInt(), y.toInt()); + // set up icons + ConfigItem::symbolYesIcon = QIcon(QPixmap(xpm_symbol_yes)); + ConfigItem::symbolModIcon = QIcon(QPixmap(xpm_symbol_mod)); + ConfigItem::symbolNoIcon = QIcon(QPixmap(xpm_symbol_no)); + ConfigItem::choiceYesIcon = QIcon(QPixmap(xpm_choice_yes)); + ConfigItem::choiceNoIcon = QIcon(QPixmap(xpm_choice_no)); + ConfigItem::menuIcon = QIcon(QPixmap(xpm_menu)); + ConfigItem::menubackIcon = QIcon(QPixmap(xpm_menuback)); + QWidget *widget = new QWidget(this); QVBoxLayout *layout = new QVBoxLayout(widget); setCentralWidget(widget); diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index c46a79a69001..460b858b0faa 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -98,10 +98,6 @@ public: bool updateAll; - QPixmap symbolYesPix, symbolModPix, symbolNoPix; - QPixmap choiceYesPix, choiceNoPix; - QPixmap menuPix, menuBackPix; - bool showName, showRange, showData; enum listMode mode; enum optionMode optMode; @@ -162,6 +158,10 @@ public: struct menu *menu; bool visible; bool goParent; + + static QIcon symbolYesIcon, symbolModIcon, symbolNoIcon; + static QIcon choiceYesIcon, choiceNoIcon; + static QIcon menuIcon, menubackIcon; }; class ConfigLineEdit : public QLineEdit { -- cgit From fa8de0a3bf3c02e6f00b7746e7e934db522cdda9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:08 +0900 Subject: kconfig: qconf: do not limit the pop-up menu to the first row If you right-click the first row in the option tree, the pop-up menu shows up, but if you right-click the second row or below, the event is ignored due to the following check: if (e->y() <= header()->geometry().bottom()) { Perhaps, the intention was to show the pop-menu only when the tree header was right-clicked, but this handler is not called in that case. Since the origin of e->y() starts from the bottom of the header, this check is odd. Going forward, you can right-click anywhere in the tree to get the pop-up menu. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 68 ++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index bf471dc7ad70..4440072aa545 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -864,40 +864,40 @@ void ConfigList::focusInEvent(QFocusEvent *e) void ConfigList::contextMenuEvent(QContextMenuEvent *e) { - if (e->y() <= header()->geometry().bottom()) { - if (!headerPopup) { - QAction *action; - - headerPopup = new QMenu(this); - action = new QAction("Show Name", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowName(bool))); - connect(parent(), SIGNAL(showNameChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showName); - headerPopup->addAction(action); - action = new QAction("Show Range", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowRange(bool))); - connect(parent(), SIGNAL(showRangeChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showRange); - headerPopup->addAction(action); - action = new QAction("Show Data", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowData(bool))); - connect(parent(), SIGNAL(showDataChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showData); - headerPopup->addAction(action); - } - headerPopup->exec(e->globalPos()); - e->accept(); - } else - e->ignore(); + if (!headerPopup) { + QAction *action; + + headerPopup = new QMenu(this); + action = new QAction("Show Name", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowName(bool))); + connect(parent(), SIGNAL(showNameChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showName); + headerPopup->addAction(action); + + action = new QAction("Show Range", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowRange(bool))); + connect(parent(), SIGNAL(showRangeChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showRange); + headerPopup->addAction(action); + + action = new QAction("Show Data", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowData(bool))); + connect(parent(), SIGNAL(showDataChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showData); + headerPopup->addAction(action); + } + + headerPopup->exec(e->globalPos()); + e->accept(); } ConfigView*ConfigView::viewList; -- cgit From d4bbe8a1b55aeaadfa0fa982b468eaec9b799f1a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Aug 2020 18:19:09 +0900 Subject: kconfig: qconf: move setOptionMode() to ConfigList from ConfigView ConfigView::setOptionMode() only gets access to the 'list' member. Move it to the more relevant ConfigList class. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 46 +++++++++++++++++++++++----------------------- scripts/kconfig/qconf.h | 9 ++++----- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 4440072aa545..bc390df49f1f 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -358,6 +358,18 @@ void ConfigList::reinit(void) updateListAll(); } +void ConfigList::setOptionMode(QAction *action) +{ + if (action == showNormalAction) + optMode = normalOpt; + else if (action == showAllAction) + optMode = allOpt; + else + optMode = promptOpt; + + updateListAll(); +} + void ConfigList::saveSettings(void) { if (!objectName().isEmpty()) { @@ -901,9 +913,9 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) } ConfigView*ConfigView::viewList; -QAction *ConfigView::showNormalAction; -QAction *ConfigView::showAllAction; -QAction *ConfigView::showPromptAction; +QAction *ConfigList::showNormalAction; +QAction *ConfigList::showAllAction; +QAction *ConfigList::showPromptAction; ConfigView::ConfigView(QWidget* parent, const char *name) : Parent(parent) @@ -934,18 +946,6 @@ ConfigView::~ConfigView(void) } } -void ConfigView::setOptionMode(QAction *act) -{ - if (act == showNormalAction) - list->optMode = normalOpt; - else if (act == showAllAction) - list->optMode = allOpt; - else - list->optMode = promptOpt; - - list->updateListAll(); -} - void ConfigView::setShowName(bool b) { if (list->showName != b) { @@ -1488,17 +1488,17 @@ ConfigMainWindow::ConfigMainWindow(void) QActionGroup *optGroup = new QActionGroup(this); optGroup->setExclusive(true); - connect(optGroup, SIGNAL(triggered(QAction*)), configView, + connect(optGroup, SIGNAL(triggered(QAction*)), configList, SLOT(setOptionMode(QAction *))); - connect(optGroup, SIGNAL(triggered(QAction *)), menuView, + connect(optGroup, SIGNAL(triggered(QAction *)), menuList, SLOT(setOptionMode(QAction *))); - configView->showNormalAction = new QAction("Show Normal Options", optGroup); - configView->showAllAction = new QAction("Show All Options", optGroup); - configView->showPromptAction = new QAction("Show Prompt Options", optGroup); - configView->showNormalAction->setCheckable(true); - configView->showAllAction->setCheckable(true); - configView->showPromptAction->setCheckable(true); + ConfigList::showNormalAction = new QAction("Show Normal Options", optGroup); + ConfigList::showNormalAction->setCheckable(true); + ConfigList::showAllAction = new QAction("Show All Options", optGroup); + ConfigList::showAllAction->setCheckable(true); + ConfigList::showPromptAction = new QAction("Show Prompt Options", optGroup); + ConfigList::showPromptAction->setCheckable(true); QAction *showDebugAction = new QAction("Show Debug Info", this); showDebugAction->setCheckable(true); diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 460b858b0faa..461df6419f15 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -74,6 +74,8 @@ public slots: void changeValue(ConfigItem* item); void updateSelection(void); void saveSettings(void); + void setOptionMode(QAction *action); + signals: void menuChanged(struct menu *menu); void menuSelected(struct menu *menu); @@ -105,6 +107,8 @@ public: QPalette disabledColorGroup; QPalette inactivedColorGroup; QMenu* headerPopup; + + static QAction *showNormalAction, *showAllAction, *showPromptAction; }; class ConfigItem : public QTreeWidgetItem { @@ -196,7 +200,6 @@ public slots: void setShowName(bool); void setShowRange(bool); void setShowData(bool); - void setOptionMode(QAction *); signals: void showNameChanged(bool); void showRangeChanged(bool); @@ -207,10 +210,6 @@ public: static ConfigView* viewList; ConfigView* nextView; - - static QAction *showNormalAction; - static QAction *showAllAction; - static QAction *showPromptAction; }; class ConfigInfoView : public QTextBrowser { -- cgit From 4bb5fcb97a5df0bbc0a27e0252b1e7ce140a8431 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Aug 2020 23:31:47 +0800 Subject: perf/x86/rapl: Fix missing psys sysfs attributes This fixes a problem introduced by commit: 5fb5273a905c ("perf/x86/rapl: Use new MSR detection interface") that perf event sysfs attributes for psys RAPL domain are missing. Fixes: 5fb5273a905c ("perf/x86/rapl: Use new MSR detection interface") Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Reviewed-by: Len Brown Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20200811153149.12242-2-rui.zhang@intel.com --- arch/x86/events/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 68b38820b10e..e9723833551f 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -665,7 +665,7 @@ static const struct attribute_group *rapl_attr_update[] = { &rapl_events_pkg_group, &rapl_events_ram_group, &rapl_events_gpu_group, - &rapl_events_gpu_group, + &rapl_events_psys_group, NULL, }; -- cgit From 74f41adab0f4a61857833e1b6fa8e9ad12c251b6 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Aug 2020 23:31:48 +0800 Subject: perf/x86/rapl: Support multiple RAPL unit quirks There will be more platforms with different fixed energy units. Enhance the code to support different RAPL unit quirks for different platforms. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Reviewed-by: Len Brown Link: https://lore.kernel.org/r/20200811153149.12242-3-rui.zhang@intel.com --- arch/x86/events/rapl.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index e9723833551f..d0002eb971b7 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -130,11 +130,16 @@ struct rapl_pmus { struct rapl_pmu *pmus[]; }; +enum rapl_unit_quirk { + RAPL_UNIT_QUIRK_NONE, + RAPL_UNIT_QUIRK_INTEL_HSW, +}; + struct rapl_model { struct perf_msr *rapl_msrs; unsigned long events; unsigned int msr_power_unit; - bool apply_quirk; + enum rapl_unit_quirk unit_quirk; }; /* 1/2^hw_unit Joule */ @@ -612,14 +617,20 @@ static int rapl_check_hw_unit(struct rapl_model *rm) for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + switch (rm->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ - if (rm->apply_quirk) + case RAPL_UNIT_QUIRK_INTEL_HSW: rapl_hw_unit[PERF_RAPL_RAM] = 16; + break; + default: + break; + } + /* * Calculate the timer rate: @@ -698,7 +709,6 @@ static struct rapl_model model_snb = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_PP1), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -707,7 +717,6 @@ static struct rapl_model model_snbep = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -717,7 +726,6 @@ static struct rapl_model model_hsw = { BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -726,7 +734,7 @@ static struct rapl_model model_hsx = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = true, + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -734,7 +742,7 @@ static struct rapl_model model_hsx = { static struct rapl_model model_knl = { .events = BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = true, + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -745,14 +753,12 @@ static struct rapl_model model_skl = { BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1) | BIT(PERF_RAPL_PSYS), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_amd_fam17h = { .events = BIT(PERF_RAPL_PKG), - .apply_quirk = false, .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; -- cgit From bcfd218b66790243ef303c1b35ce59f786ded225 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Aug 2020 23:31:49 +0800 Subject: perf/x86/rapl: Add support for Intel SPR platform Intel SPR platform uses fixed 16 bit energy unit for DRAM RAPL domain, and fixed 0 bit energy unit for Psys RAPL domain. After this, on SPR platform the energy counters appear in perf list. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Acked-by: Len Brown Link: https://lore.kernel.org/r/20200811153149.12242-4-rui.zhang@intel.com --- arch/x86/events/rapl.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index d0002eb971b7..67b411f7e8c4 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -133,6 +133,7 @@ struct rapl_pmus { enum rapl_unit_quirk { RAPL_UNIT_QUIRK_NONE, RAPL_UNIT_QUIRK_INTEL_HSW, + RAPL_UNIT_QUIRK_INTEL_SPR, }; struct rapl_model { @@ -627,6 +628,14 @@ static int rapl_check_hw_unit(struct rapl_model *rm) case RAPL_UNIT_QUIRK_INTEL_HSW: rapl_hw_unit[PERF_RAPL_RAM] = 16; break; + /* + * SPR shares the same DRAM domain energy unit as HSW, plus it + * also has a fixed energy unit for Psys domain. + */ + case RAPL_UNIT_QUIRK_INTEL_SPR: + rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_hw_unit[PERF_RAPL_PSYS] = 0; + break; default: break; } @@ -757,6 +766,16 @@ static struct rapl_model model_skl = { .rapl_msrs = intel_rapl_msrs, }; +static struct rapl_model model_spr = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PSYS), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + static struct rapl_model model_amd_fam17h = { .events = BIT(PERF_RAPL_PKG), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, @@ -793,6 +812,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h), {}, -- cgit From cc172ff301d8079e941a6eb31758951a6d764084 Mon Sep 17 00:00:00 2001 From: Libing Zhou Date: Fri, 14 Aug 2020 11:02:36 +0800 Subject: sched/debug: Fix the alignment of the show-state debug output Current sysrq(t) output task fields name are not aligned with actual task fields value, e.g.: kernel: sysrq: Show State kernel: task PC stack pid father kernel: systemd S12456 1 0 0x00000000 kernel: Call Trace: kernel: ? __schedule+0x240/0x740 To make it more readable, print fields name together with task fields value in the same line, with fixed width: kernel: sysrq: Show State kernel: task:systemd state:S stack:12920 pid: 1 ppid: 0 flags:0x00000000 kernel: Call Trace: kernel: __schedule+0x282/0x620 Signed-off-by: Libing Zhou Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200814030236.37835-1-libing.zhou@nokia-sbell.com --- kernel/sched/core.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4a0e7b449b88..09fd62568ba9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6387,10 +6387,10 @@ void sched_show_task(struct task_struct *p) if (!try_get_task_stack(p)) return; - printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); + pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); if (p->state == TASK_RUNNING) - printk(KERN_CONT " running task "); + pr_cont(" running task "); #ifdef CONFIG_DEBUG_STACK_USAGE free = stack_not_used(p); #endif @@ -6399,8 +6399,8 @@ void sched_show_task(struct task_struct *p) if (pid_alive(p)) ppid = task_pid_nr(rcu_dereference(p->real_parent)); rcu_read_unlock(); - printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, - task_pid_nr(p), ppid, + pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", + free, task_pid_nr(p), ppid, (unsigned long)task_thread_info(p)->flags); print_worker_info(KERN_INFO, p); @@ -6435,13 +6435,6 @@ void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; -#if BITS_PER_LONG == 32 - printk(KERN_INFO - " task PC stack pid father\n"); -#else - printk(KERN_INFO - " task PC stack pid father\n"); -#endif rcu_read_lock(); for_each_process_thread(g, p) { /* -- cgit From eb6d31ae22d7767789e3c079444157ea40114884 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:24 +0800 Subject: perf ftrace: Select function/function_graph tracer automatically The '-g/-G' options have already implied function_graph tracer should be used instead of function tracer. So we don't need extra option '--tracer' in this case. This patch changes the behavior as below: - If '-g' or '-G' option is on, then function_graph tracer is used. - If '-T' or '-N' option is on, then function tracer is used. - The function_graph has priority over function tracer. - The option '--tracer' only take effect if neither -g/-G nor -T/-N is specified. Here are some examples. This will start tracing all functions using default tracer: $ sudo perf ftrace This will trace all functions using function graph tracer: $ sudo perf ftrace -G '*' This will trace function vfs_read using function graph tracer: $ sudo perf ftrace -G vfs_read This will trace function vfs_read using function tracer: $ sudo perf ftrace -T vfs_read Committer notes: Using '-h -G' will tell what that option is about, so to further clarify the above examples: # perf ftrace -h -G -G, --graph-funcs Set graph filter on given functions # perf ftrace -h -g -g, --nograph-funcs Set nograph filter on given functions # perf ftrace -h -T -T, --trace-funcs trace given functions only # perf ftrace -h -N -N, --notrace-funcs do not trace given functions # Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-2-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 ++-- tools/perf/Documentation/perf-ftrace.txt | 41 +++++++++++++++++--------------- tools/perf/builtin-ftrace.c | 25 +++++++++++++++++-- 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c7d3df5798e2..76408d986aed 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -614,8 +614,9 @@ trace.*:: ftrace.*:: ftrace.tracer:: - Can be used to select the default tracer. Possible values are - 'function' and 'function_graph'. + Can be used to select the default tracer when neither -G nor + -F option is not specified. Possible values are 'function' and + 'function_graph'. llvm.*:: llvm.clang-path:: diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index b80c84307dc9..821d4d334a09 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -24,7 +24,8 @@ OPTIONS -t:: --tracer=:: - Tracer to use: function_graph or function. + Tracer to use when neither -G nor -F option is not + specified: function_graph or function. -v:: --verbose=:: @@ -50,33 +51,35 @@ OPTIONS -T:: --trace-funcs=:: - Only trace functions given by the argument. Multiple functions - can be given by using this option more than once. The function - argument also can be a glob pattern. It will be passed to - 'set_ftrace_filter' in tracefs. + Select function tracer and set function filter on the given + function (or a glob pattern). Multiple functions can be given + by using this option more than once. The function argument also + can be a glob pattern. It will be passed to 'set_ftrace_filter' + in tracefs. -N:: --notrace-funcs=:: - Do not trace functions given by the argument. Like -T option, - this can be used more than once to specify multiple functions - (or glob patterns). It will be passed to 'set_ftrace_notrace' - in tracefs. + Select function tracer and do not trace functions given by the + argument. Like -T option, this can be used more than once to + specify multiple functions (or glob patterns). It will be + passed to 'set_ftrace_notrace' in tracefs. -G:: --graph-funcs=:: - Set graph filter on the given function (or a glob pattern). - This is useful for the function_graph tracer only and enables - tracing for functions executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_function' in tracefs. + Select function_graph tracer and set graph filter on the given + function (or a glob pattern). This is useful to trace for + functions executed from the given function. This can be used more + than once to specify multiple functions. It will be passed to + 'set_graph_function' in tracefs. -g:: --nograph-funcs=:: - Set graph notrace filter on the given function (or a glob pattern). - Like -G option, this is useful for the function_graph tracer only - and disables tracing for function executed from the given function. - This can be used more than once to specify multiple functions. - It will be passed to 'set_graph_notrace' in tracefs. + Select function_graph tracer and set graph notrace filter on the + given function (or a glob pattern). Like -G option, this is useful + for the function_graph tracer only and disables tracing for function + executed from the given function. This can be used more than once to + specify multiple functions. It will be passed to 'set_graph_notrace' + in tracefs. -D:: --graph-depth=:: diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 2bfc1b0db536..048a11192b59 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -455,6 +455,23 @@ static void delete_filter_func(struct list_head *head) } } +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} + int cmd_ftrace(int argc, const char **argv) { int ret; @@ -479,11 +496,13 @@ int cmd_ftrace(int argc, const char **argv) OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", - "trace given functions only", parse_filter_func), + "trace given functions using function tracer", + parse_filter_func), OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", "do not trace given functions", parse_filter_func), OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", - "Set graph filter on given functions", parse_filter_func), + "trace given functions using function_graph tracer", + parse_filter_func), OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, @@ -505,6 +524,8 @@ int cmd_ftrace(int argc, const char **argv) if (!argc && target__none(&ftrace.target)) ftrace.target.system_wide = true; + select_tracer(&ftrace); + ret = target__validate(&ftrace.target); if (ret) { char errbuf[512]; -- cgit From d6d81bfe429ee7e3e59388d16741b354a2484f0f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:25 +0800 Subject: perf ftrace: Add option '-F/--funcs' to list available functions This adds an option '-F/--funcs' to list all available functions to trace, which is read from tracing file 'available_filter_functions'. $ sudo ./perf ftrace -F | head trace_initcall_finish_cb initcall_blacklisted do_one_initcall do_one_initcall trace_initcall_start_cb run_init_process try_to_run_init_process match_dev_by_label match_dev_by_uuid rootfs_init_fs_context $ Committer notes: This is the same command line option and for the same purpose as in 'perf probe'. Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-3-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 4 +++ tools/perf/builtin-ftrace.c | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 821d4d334a09..4f5628445a63 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -31,6 +31,10 @@ OPTIONS --verbose=:: Verbosity level. +-F:: +--funcs:: + List all available functions to trace. + -p:: --pid=:: Trace on existing process id (comma separated list). diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 048a11192b59..1ebf71ecc1a8 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -33,6 +33,7 @@ struct perf_ftrace { struct evlist *evlist; struct target target; const char *tracer; + bool list_avail_functions; struct list_head filters; struct list_head notrace; struct list_head graph_funcs; @@ -128,6 +129,46 @@ static int append_tracing_file(const char *name, const char *val) return __write_tracing_file(name, val, true); } +static int read_tracing_file_to_stdout(const char *name) +{ + char buf[4096]; + char *file; + int fd; + int ret = -1; + + file = get_tracing_file(name); + if (!file) { + pr_debug("cannot get tracing file: %s\n", name); + return -1; + } + + fd = open(file, O_RDONLY); + if (fd < 0) { + pr_debug("cannot open tracing file: %s: %s\n", + name, str_error_r(errno, buf, sizeof(buf))); + goto out; + } + + /* read contents to stdout */ + while (true) { + int n = read(fd, buf, sizeof(buf)); + if (n == 0) + break; + else if (n < 0) + goto out_close; + + if (fwrite(buf, n, 1, stdout) != 1) + goto out_close; + } + ret = 0; + +out_close: + close(fd); +out: + put_tracing_file(file); + return ret; +} + static int reset_tracing_cpu(void); static void reset_tracing_filters(void); @@ -302,6 +343,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGPIPE, sig_handler); + if (ftrace->list_avail_functions) + return read_tracing_file_to_stdout("available_filter_functions"); + if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); goto out; @@ -487,6 +531,8 @@ int cmd_ftrace(int argc, const char **argv) const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "tracer to use: function_graph(default) or function"), + OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions, + "Show available functions to filter"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "trace on existing process id"), OPT_INCR('v', "verbose", &verbose, -- cgit From 68faab0f934d83d374b3d696d861d8502641fa19 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:26 +0800 Subject: perf ftrace: Factor out function write_tracing_file_int() We will reuse this function later. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-4-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 1ebf71ecc1a8..4b3fcee5725a 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -169,6 +169,17 @@ out: return ret; } +static int write_tracing_file_int(const char *name, int value) +{ + char buf[16]; + + snprintf(buf, sizeof(buf), "%d", value); + if (write_tracing_file(name, buf) < 0) + return -1; + + return 0; +} + static int reset_tracing_cpu(void); static void reset_tracing_filters(void); @@ -299,8 +310,6 @@ static void reset_tracing_filters(void) static int set_tracing_depth(struct perf_ftrace *ftrace) { - char buf[16]; - if (ftrace->graph_depth == 0) return 0; @@ -309,9 +318,7 @@ static int set_tracing_depth(struct perf_ftrace *ftrace) return -1; } - snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth); - - if (write_tracing_file("max_graph_depth", buf) < 0) + if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0) return -1; return 0; -- cgit From 846e1939801a0893ea71a6a56d8dbe2bf46682d1 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:27 +0800 Subject: perf ftrace: Add option '-m/--buffer-size' to set per-cpu buffer size This adds an option '-m/--buffer-size' to allow us set the size of per-cpu tracing buffer. Committer testing: Before running with this option: # find /sys/kernel/tracing/ -name buffer_size_kb | xargs cat 1408 1408 1408 1408 1408 1408 1408 1408 1408 # Then, run: # perf ftrace -m 2048K | head -10 2) | mutex_unlock() { 2) ==========> | 2) | smp_irq_work_interrupt() { 2) | irq_enter() { 2) 0.121 us | rcu_irq_enter(); 2) 0.128 us | irqtime_account_irq(); 2) 0.719 us | } 2) | __wake_up() { 2) | __wake_up_common_lock() { 2) 0.105 us | _raw_spin_lock_irqsave(); # Now look at those tracefs knobs: # find /sys/kernel/tracing/ -name buffer_size_kb | xargs cat 2048 2048 2048 2048 2048 2048 2048 2048 2048 # This should be similar to the -m option in the other perf tools, such as 'perf record', 'perf trace', etc. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-5-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 5 +++ tools/perf/builtin-ftrace.c | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 4f5628445a63..7a5d915f60b0 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -53,6 +53,11 @@ OPTIONS Ranges of CPUs are specified with -: 0-2. Default is to trace on all online CPUs. +-m:: +--buffer-size:: + Set the size of per-cpu tracing buffer, is expected to + be a number with appended unit character - B/K/M/G. + -T:: --trace-funcs=:: Select function tracer and set function filter on the given diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 4b3fcee5725a..a3a4f4be9dde 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -26,6 +26,7 @@ #include "thread_map.h" #include "util/cap.h" #include "util/config.h" +#include "util/units.h" #define DEFAULT_TRACER "function_graph" @@ -39,6 +40,7 @@ struct perf_ftrace { struct list_head graph_funcs; struct list_head nograph_funcs; int graph_depth; + unsigned long percpu_buffer_size; }; struct filter_entry { @@ -324,6 +326,21 @@ static int set_tracing_depth(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace) +{ + int ret; + + if (ftrace->percpu_buffer_size == 0) + return 0; + + ret = write_tracing_file_int("buffer_size_kb", + ftrace->percpu_buffer_size / 1024); + if (ret < 0) + return ret; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -388,6 +405,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_percpu_buffer_size(ftrace) < 0) { + pr_err("failed to set tracing per-cpu buffer size\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -506,6 +528,37 @@ static void delete_filter_func(struct list_head *head) } } +static int parse_buffer_size(const struct option *opt, + const char *str, int unset) +{ + unsigned long *s = (unsigned long *)opt->value; + static struct parse_tag tags_size[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + unsigned long val; + + if (unset) { + *s = 0; + return 0; + } + + val = parse_tag_value(str, tags_size); + if (val != (unsigned long) -1) { + if (val < 1024) { + pr_err("buffer size too small, must larger than 1KB."); + return -1; + } + *s = val; + return 0; + } + + return -1; +} + static void select_tracer(struct perf_ftrace *ftrace) { bool graph = !list_empty(&ftrace->graph_funcs) || @@ -560,6 +613,8 @@ int cmd_ftrace(int argc, const char **argv) "Set nograph filter on given functions", parse_filter_func), OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, "Max depth for function graph tracer"), + OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", + "size of per cpu buffer", parse_buffer_size), OPT_END() }; -- cgit From 81523c1e579bb95106e22b864fbef687d585f2b9 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:28 +0800 Subject: perf ftrace: Show trace column header This makes 'perf ftrace' display column header before printing trace. $ sudo perf ftrace # tracer: function # # entries-in-buffer/entries-written: 0/0 #P:8 # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | <...>-9246 [006] 10726.262760: mutex_unlock <-rb_simple_write <...>-9246 [006] 10726.262764: __fsnotify_parent <-vfs_write <...>-9246 [006] 10726.262765: fsnotify <-vfs_write <...>-9246 [006] 10726.262766: __sb_end_write <-vfs_write <...>-9246 [006] 10726.262767: fpregs_assert_state_consistent <-do_syscall_64 Signed-off-by: Changbin Du Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-6-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index a3a4f4be9dde..39c694be2b71 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -435,6 +435,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) fcntl(trace_fd, F_SETFL, O_NONBLOCK); pollfd.fd = trace_fd; + /* display column headers */ + read_tracing_file_to_stdout("trace"); + if (write_tracing_file("tracing_on", "1") < 0) { pr_err("can't enable tracing\n"); goto out_close_fd; -- cgit From 5b34747238686021a39495f7ae0bfadc27fe3e83 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:29 +0800 Subject: perf ftrace: Add option '--inherit' to trace children processes This adds an option '--inherit' to allow us trace children processes spawned by our target. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-7-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 3 +++ tools/perf/builtin-ftrace.c | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 7a5d915f60b0..c46d0a09b38c 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -58,6 +58,9 @@ OPTIONS Set the size of per-cpu tracing buffer, is expected to be a number with appended unit character - B/K/M/G. +--inherit:: + Trace children processes spawned by our target. + -T:: --trace-funcs=:: Select function tracer and set function filter on the given diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 39c694be2b71..07b81d0c1658 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -41,6 +41,7 @@ struct perf_ftrace { struct list_head nograph_funcs; int graph_depth; unsigned long percpu_buffer_size; + bool inherit; }; struct filter_entry { @@ -182,9 +183,27 @@ static int write_tracing_file_int(const char *name, int value) return 0; } +static int write_tracing_option_file(const char *name, const char *val) +{ + char *file; + int ret; + + if (asprintf(&file, "options/%s", name) < 0) + return -1; + + ret = __write_tracing_file(file, val, false); + free(file); + return ret; +} + static int reset_tracing_cpu(void); static void reset_tracing_filters(void); +static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) +{ + write_tracing_option_file("function-fork", "0"); +} + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -203,6 +222,7 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) return -1; reset_tracing_filters(); + reset_tracing_options(ftrace); return 0; } @@ -341,6 +361,17 @@ static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_trace_inherit(struct perf_ftrace *ftrace) +{ + if (!ftrace->inherit) + return 0; + + if (write_tracing_option_file("function-fork", "1") < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -410,6 +441,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_trace_inherit(ftrace) < 0) { + pr_err("failed to set tracing option function-fork\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -618,6 +654,8 @@ int cmd_ftrace(int argc, const char **argv) "Max depth for function graph tracer"), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), + OPT_BOOLEAN(0, "inherit", &ftrace.inherit, + "trace children processes"), OPT_END() }; -- cgit From a80abe2a9a8a420b4dd88713ba0ad3c163e7a79b Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:30 +0800 Subject: perf tools: Add general function to parse sublevel options This factors out a general function perf_parse_sublevel_options() to parse sublevel options. The 'sublevel' options is something like the '--debug' options which allow more sublevel options. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-8-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/debug.c | 61 ++++++++-------------------- tools/perf/util/parse-sublevel-options.c | 70 ++++++++++++++++++++++++++++++++ tools/perf/util/parse-sublevel-options.h | 11 +++++ 4 files changed, 99 insertions(+), 44 deletions(-) create mode 100644 tools/perf/util/parse-sublevel-options.c create mode 100644 tools/perf/util/parse-sublevel-options.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 494626e303f5..cd5e41960e64 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -117,6 +117,7 @@ endif perf-y += parse-branch-options.o perf-y += dump-insn.o perf-y += parse-regs-options.o +perf-y += parse-sublevel-options.o perf-y += term.o perf-y += help-unknown-cmd.o perf-y += mem-events.o diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index adb656745ecc..5cda5565777a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ #include "target.h" #include "ui/helpline.h" #include "ui/ui.h" +#include "util/parse-sublevel-options.h" #include @@ -173,65 +174,37 @@ void trace_event(union perf_event *event) trace_event_printer, event); } -static struct debug_variable { - const char *name; - int *ptr; -} debug_variables[] = { - { .name = "verbose", .ptr = &verbose }, - { .name = "ordered-events", .ptr = &debug_ordered_events}, - { .name = "stderr", .ptr = &redirect_to_stderr}, - { .name = "data-convert", .ptr = &debug_data_convert }, - { .name = "perf-event-open", .ptr = &debug_peo_args }, +static struct sublevel_option debug_opts[] = { + { .name = "verbose", .value_ptr = &verbose }, + { .name = "ordered-events", .value_ptr = &debug_ordered_events}, + { .name = "stderr", .value_ptr = &redirect_to_stderr}, + { .name = "data-convert", .value_ptr = &debug_data_convert }, + { .name = "perf-event-open", .value_ptr = &debug_peo_args }, { .name = NULL, } }; int perf_debug_option(const char *str) { - struct debug_variable *var = &debug_variables[0]; - char *vstr, *s = strdup(str); - int v = 1; - - vstr = strchr(s, '='); - if (vstr) - *vstr++ = 0; - - while (var->name) { - if (!strcmp(s, var->name)) - break; - var++; - } - - if (!var->name) { - pr_err("Unknown debug variable name '%s'\n", s); - free(s); - return -1; - } + int ret; - if (vstr) { - v = atoi(vstr); - /* - * Allow only values in range (0, 10), - * otherwise set 0. - */ - v = (v < 0) || (v > 10) ? 0 : v; - } + ret = perf_parse_sublevel_options(str, debug_opts); + if (ret) + return ret; - if (quiet) - v = -1; + /* Allow only verbose value in range (0, 10), otherwise set 0. */ + verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; - *var->ptr = v; - free(s); return 0; } int perf_quiet_option(void) { - struct debug_variable *var = &debug_variables[0]; + struct sublevel_option *opt = &debug_opts[0]; /* disable all debug messages */ - while (var->name) { - *var->ptr = -1; - var++; + while (opt->name) { + *opt->value_ptr = -1; + opt++; } return 0; diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c new file mode 100644 index 000000000000..a841d17ffd57 --- /dev/null +++ b/tools/perf/util/parse-sublevel-options.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +#include "util/debug.h" +#include "util/parse-sublevel-options.h" + +static int parse_one_sublevel_option(const char *str, + struct sublevel_option *opts) +{ + struct sublevel_option *opt = opts; + char *vstr, *s = strdup(str); + int v = 1; + + if (!s) { + pr_err("no memory\n"); + return -1; + } + + vstr = strchr(s, '='); + if (vstr) + *vstr++ = 0; + + while (opt->name) { + if (!strcmp(s, opt->name)) + break; + opt++; + } + + if (!opt->name) { + pr_err("Unknown option name '%s'\n", s); + free(s); + return -1; + } + + if (vstr) + v = atoi(vstr); + + *opt->value_ptr = v; + free(s); + return 0; +} + +/* parse options like --foo a=,b,c... */ +int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts) +{ + char *s = strdup(str); + char *p = NULL; + int ret; + + if (!s) { + pr_err("no memory\n"); + return -1; + } + + p = strtok(s, ","); + while (p) { + ret = parse_one_sublevel_option(p, opts); + if (ret) { + free(s); + return ret; + } + + p = strtok(NULL, ","); + } + + free(s); + return 0; +} diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h new file mode 100644 index 000000000000..9b9efcc2aaad --- /dev/null +++ b/tools/perf/util/parse-sublevel-options.h @@ -0,0 +1,11 @@ +#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H +#define _PERF_PARSE_SUBLEVEL_OPTIONS_H + +struct sublevel_option { + const char *name; + int *value_ptr; +}; + +int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts); + +#endif \ No newline at end of file -- cgit From b1d84af6f58022a0d75f5745367624549cd0bfbf Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:31 +0800 Subject: perf ftrace: Add support for tracing option 'func_stack_trace' This adds support to display call trace for function tracer. To do this, just specify a '--func-opts call-graph' option. Example: $ sudo perf ftrace -T vfs_read --func-opts call-graph iio-sensor-prox-855 [003] 6168.369657: vfs_read <-ksys_read iio-sensor-prox-855 [003] 6168.369677: => vfs_read => ksys_read => __x64_sys_read => do_syscall_64 => entry_SYSCALL_64_after_hwframe ... Signed-off-by: Changbin Du Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-9-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 4 +++ tools/perf/builtin-ftrace.c | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index c46d0a09b38c..8f08ad0992c2 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -76,6 +76,10 @@ OPTIONS specify multiple functions (or glob patterns). It will be passed to 'set_ftrace_notrace' in tracefs. +--func-opts:: + List of options allowed to set: + call-graph - Display kernel stack trace for function tracer. + -G:: --graph-funcs=:: Select function_graph tracer and set graph filter on the given diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 07b81d0c1658..469b89748c42 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -27,6 +27,7 @@ #include "util/cap.h" #include "util/config.h" #include "util/units.h" +#include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" @@ -42,6 +43,7 @@ struct perf_ftrace { int graph_depth; unsigned long percpu_buffer_size; bool inherit; + int func_stack_trace; }; struct filter_entry { @@ -202,6 +204,7 @@ static void reset_tracing_filters(void); static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) { write_tracing_option_file("function-fork", "0"); + write_tracing_option_file("func_stack_trace", "0"); } static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) @@ -278,6 +281,17 @@ static int set_tracing_cpu(struct perf_ftrace *ftrace) return set_tracing_cpumask(cpumap); } +static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace) +{ + if (!ftrace->func_stack_trace) + return 0; + + if (write_tracing_option_file("func_stack_trace", "1") < 0) + return -1; + + return 0; +} + static int reset_tracing_cpu(void) { struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); @@ -426,6 +440,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_func_stack_trace(ftrace) < 0) { + pr_err("failed to set tracing option func_stack_trace\n"); + goto out_reset; + } + if (set_tracing_filters(ftrace) < 0) { pr_err("failed to set tracing filters\n"); goto out_reset; @@ -598,6 +617,26 @@ static int parse_buffer_size(const struct option *opt, return -1; } +static int parse_func_tracer_opts(const struct option *opt, + const char *str, int unset) +{ + int ret; + struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; + struct sublevel_option func_tracer_opts[] = { + { .name = "call-graph", .value_ptr = &ftrace->func_stack_trace }, + { .name = NULL, } + }; + + if (unset) + return 0; + + ret = perf_parse_sublevel_options(str, func_tracer_opts); + if (ret) + return ret; + + return 0; +} + static void select_tracer(struct perf_ftrace *ftrace) { bool graph = !list_empty(&ftrace->graph_funcs) || @@ -645,6 +684,9 @@ int cmd_ftrace(int argc, const char **argv) parse_filter_func), OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", "do not trace given functions", parse_filter_func), + OPT_CALLBACK(0, "func-opts", &ftrace, "options", + "function tracer options, available options: call-graph", + parse_func_tracer_opts), OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", "trace given functions using function_graph tracer", parse_filter_func), -- cgit From 38988f2e7ed515784b8b19e2be265c9c6984ce6f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:32 +0800 Subject: perf ftrace: Add support for trace option sleep-time This adds an option '--graph-opts nosleep-time' which allow us only to measure on-CPU time. This option is function_graph tracer only. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-10-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 4 ++++ tools/perf/builtin-ftrace.c | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 8f08ad0992c2..3380a2e2c9ad 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -101,6 +101,10 @@ OPTIONS --graph-depth=:: Set max depth for function graph tracer to follow +--graph-opts:: + List of options allowed to set: + nosleep-time - Measure on-CPU time only for function_graph tracer. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-trace[1] diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 469b89748c42..47d63bba6a48 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -44,6 +44,7 @@ struct perf_ftrace { unsigned long percpu_buffer_size; bool inherit; int func_stack_trace; + int graph_nosleep_time; }; struct filter_entry { @@ -205,6 +206,7 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) { write_tracing_option_file("function-fork", "0"); write_tracing_option_file("func_stack_trace", "0"); + write_tracing_option_file("sleep-time", "1"); } static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) @@ -386,6 +388,17 @@ static int set_tracing_trace_inherit(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_sleep_time(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_nosleep_time) + return 0; + + if (write_tracing_option_file("sleep-time", "0") < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -465,6 +478,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_sleep_time(ftrace) < 0) { + pr_err("failed to set tracing option sleep-time\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -637,6 +655,26 @@ static int parse_func_tracer_opts(const struct option *opt, return 0; } +static int parse_graph_tracer_opts(const struct option *opt, + const char *str, int unset) +{ + int ret; + struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; + struct sublevel_option graph_tracer_opts[] = { + { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, + { .name = NULL, } + }; + + if (unset) + return 0; + + ret = perf_parse_sublevel_options(str, graph_tracer_opts); + if (ret) + return ret; + + return 0; +} + static void select_tracer(struct perf_ftrace *ftrace) { bool graph = !list_empty(&ftrace->graph_funcs) || @@ -694,6 +732,9 @@ int cmd_ftrace(int argc, const char **argv) "Set nograph filter on given functions", parse_filter_func), OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, "Max depth for function graph tracer"), + OPT_CALLBACK(0, "graph-opts", &ftrace, "options", + "graph tracer options, available options: nosleep-time", + parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), OPT_BOOLEAN(0, "inherit", &ftrace.inherit, -- cgit From d1bcf17cda8001e2826d409019866b33adb82219 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:33 +0800 Subject: perf ftrace: Add support for trace option funcgraph-irqs This adds an option '--graph-opts noirqs' to filter out functions executed in irq context. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-11-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 1 + tools/perf/builtin-ftrace.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 3380a2e2c9ad..fca55ac55ff3 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -104,6 +104,7 @@ OPTIONS --graph-opts:: List of options allowed to set: nosleep-time - Measure on-CPU time only for function_graph tracer. + noirqs - Ignore functions that happen inside interrupt. SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 47d63bba6a48..b4c821be4fb5 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -45,6 +45,7 @@ struct perf_ftrace { bool inherit; int func_stack_trace; int graph_nosleep_time; + int graph_noirqs; }; struct filter_entry { @@ -207,6 +208,7 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) write_tracing_option_file("function-fork", "0"); write_tracing_option_file("func_stack_trace", "0"); write_tracing_option_file("sleep-time", "1"); + write_tracing_option_file("funcgraph-irqs", "1"); } static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) @@ -399,6 +401,17 @@ static int set_tracing_sleep_time(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_noirqs) + return 0; + + if (write_tracing_option_file("funcgraph-irqs", "0") < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -483,6 +496,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_funcgraph_irqs(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-irqs\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -662,6 +680,7 @@ static int parse_graph_tracer_opts(const struct option *opt, struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; struct sublevel_option graph_tracer_opts[] = { { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, + { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, { .name = NULL, } }; @@ -733,7 +752,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, "Max depth for function graph tracer"), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "graph tracer options, available options: nosleep-time", + "graph tracer options, available options: nosleep-time,noirqs", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), -- cgit From c81fc34e318503f28c571250d3434d8a788ca747 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:34 +0800 Subject: perf ftrace: Add support for tracing option 'irq-info' This adds support to display irq context info for function tracer. To do this, just specify a '--func-opts irq-info' option. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-12-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 1 + tools/perf/builtin-ftrace.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index fca55ac55ff3..37ea3ea97922 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -79,6 +79,7 @@ OPTIONS --func-opts:: List of options allowed to set: call-graph - Display kernel stack trace for function tracer. + irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index b4c821be4fb5..d1241febe143 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -44,6 +44,7 @@ struct perf_ftrace { unsigned long percpu_buffer_size; bool inherit; int func_stack_trace; + int func_irq_info; int graph_nosleep_time; int graph_noirqs; }; @@ -209,6 +210,7 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) write_tracing_option_file("func_stack_trace", "0"); write_tracing_option_file("sleep-time", "1"); write_tracing_option_file("funcgraph-irqs", "1"); + write_tracing_option_file("irq-info", "0"); } static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) @@ -296,6 +298,17 @@ static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace) +{ + if (!ftrace->func_irq_info) + return 0; + + if (write_tracing_option_file("irq-info", "1") < 0) + return -1; + + return 0; +} + static int reset_tracing_cpu(void) { struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); @@ -471,6 +484,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_func_irqinfo(ftrace) < 0) { + pr_err("failed to set tracing option irq-info\n"); + goto out_reset; + } + if (set_tracing_filters(ftrace) < 0) { pr_err("failed to set tracing filters\n"); goto out_reset; @@ -660,6 +678,7 @@ static int parse_func_tracer_opts(const struct option *opt, struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value; struct sublevel_option func_tracer_opts[] = { { .name = "call-graph", .value_ptr = &ftrace->func_stack_trace }, + { .name = "irq-info", .value_ptr = &ftrace->func_irq_info }, { .name = NULL, } }; @@ -742,7 +761,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", "do not trace given functions", parse_filter_func), OPT_CALLBACK(0, "func-opts", &ftrace, "options", - "function tracer options, available options: call-graph", + "function tracer options, available options: call-graph,irq-info", parse_func_tracer_opts), OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", "trace given functions using function_graph tracer", -- cgit From 59486fb0c8bfda4ecd32b905d4cf2af92ec49f4c Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:35 +0800 Subject: perf ftrace: Add option 'verbose' to show more info for graph tracer Sometimes we want ftrace display more and longer information about the trace. $ sudo perf ftrace -G '*' 2) 0.979 us | mutex_unlock(); 2) 1.540 us | __fsnotify_parent(); 2) 0.433 us | fsnotify(); $ sudo perf ftrace -G '*' --graph-opts verbose 14160.770883 | 0) <...>-47814 | .... | 1.289 us | mutex_unlock(); 14160.770886 | 0) <...>-47814 | .... | 1.624 us | __fsnotify_parent(); 14160.770887 | 0) <...>-47814 | .... | 0.636 us | fsnotify(); 14160.770888 | 0) <...>-47814 | .... | 0.328 us | __sb_end_write(); 14160.770888 | 0) <...>-47814 | d... | 0.430 us | fpregs_assert_state_consistent(); 14160.770889 | 0) <...>-47814 | d... | | do_syscall_64() { 14160.770889 | 0) <...>-47814 | .... | | __x64_sys_close() { Signed-off-by: Changbin Du Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-13-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 1 + tools/perf/builtin-ftrace.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 37ea3ea97922..a2056aaf2ece 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -106,6 +106,7 @@ OPTIONS List of options allowed to set: nosleep-time - Measure on-CPU time only for function_graph tracer. noirqs - Ignore functions that happen inside interrupt. + verbose - Show process names, PIDs, timestamps, etc. SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d1241febe143..8ee5287bd84a 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -47,6 +47,7 @@ struct perf_ftrace { int func_irq_info; int graph_nosleep_time; int graph_noirqs; + int graph_verbose; }; struct filter_entry { @@ -210,6 +211,9 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused) write_tracing_option_file("func_stack_trace", "0"); write_tracing_option_file("sleep-time", "1"); write_tracing_option_file("funcgraph-irqs", "1"); + write_tracing_option_file("funcgraph-proc", "0"); + write_tracing_option_file("funcgraph-abstime", "0"); + write_tracing_option_file("latency-format", "0"); write_tracing_option_file("irq-info", "0"); } @@ -425,6 +429,23 @@ static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_verbose) + return 0; + + if (write_tracing_option_file("funcgraph-proc", "1") < 0) + return -1; + + if (write_tracing_option_file("funcgraph-abstime", "1") < 0) + return -1; + + if (write_tracing_option_file("latency-format", "1") < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -519,6 +540,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_funcgraph_verbose(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -700,6 +726,7 @@ static int parse_graph_tracer_opts(const struct option *opt, struct sublevel_option graph_tracer_opts[] = { { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, + { .name = "verbose", .value_ptr = &ftrace->graph_verbose }, { .name = NULL, } }; @@ -771,7 +798,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, "Max depth for function graph tracer"), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "graph tracer options, available options: nosleep-time,noirqs", + "graph tracer options, available options: nosleep-time,noirqs,verbose", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), -- cgit From 00c85d5f45b6b366f68c876cd89b8b790f50ea0f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:36 +0800 Subject: perf ftrace: Add support for trace option tracing_thresh This adds an option '--graph-opts thresh' to setup trace duration threshold for funcgraph tracer. $ sudo ./perf ftrace -G '*' --graph-opts thresh=100 3) ! 184.060 us | } /* schedule */ 3) ! 185.600 us | } /* exit_to_usermode_loop */ 2) ! 225.989 us | } /* schedule_idle */ 2) # 4140.051 us | } /* do_idle */ Signed-off-by: Changbin Du Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-14-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 1 + tools/perf/builtin-ftrace.c | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index a2056aaf2ece..6fa927e5971b 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -107,6 +107,7 @@ OPTIONS nosleep-time - Measure on-CPU time only for function_graph tracer. noirqs - Ignore functions that happen inside interrupt. verbose - Show process names, PIDs, timestamps, etc. + thresh= - Setup trace duration threshold in microseconds. SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 8ee5287bd84a..ca61f8b8bbf6 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -48,6 +48,7 @@ struct perf_ftrace { int graph_nosleep_time; int graph_noirqs; int graph_verbose; + int graph_thresh; }; struct filter_entry { @@ -234,6 +235,9 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (write_tracing_file("max_graph_depth", "0") < 0) return -1; + if (write_tracing_file("tracing_thresh", "0") < 0) + return -1; + reset_tracing_filters(); reset_tracing_options(ftrace); return 0; @@ -446,6 +450,20 @@ static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_thresh(struct perf_ftrace *ftrace) +{ + int ret; + + if (ftrace->graph_thresh == 0) + return 0; + + ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh); + if (ret < 0) + return ret; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -545,6 +563,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_thresh(ftrace) < 0) { + pr_err("failed to set tracing thresh\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -727,6 +750,7 @@ static int parse_graph_tracer_opts(const struct option *opt, { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time }, { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, { .name = "verbose", .value_ptr = &ftrace->graph_verbose }, + { .name = "thresh", .value_ptr = &ftrace->graph_thresh }, { .name = NULL, } }; @@ -798,7 +822,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, "Max depth for function graph tracer"), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "graph tracer options, available options: nosleep-time,noirqs,verbose", + "graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), -- cgit From a8f87a5cb466888bb70330b9ba55cdc0b9a1c20e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:37 +0800 Subject: perf: ftrace: Allow set graph depth by '--graph-opts' This is to have a consistent view of all graph tracer options. The original option '--graph-depth' is marked as deprecated. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-15-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 5 +---- tools/perf/builtin-ftrace.c | 5 ++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 6fa927e5971b..08216634d2bc 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -98,16 +98,13 @@ OPTIONS specify multiple functions. It will be passed to 'set_graph_notrace' in tracefs. --D:: ---graph-depth=:: - Set max depth for function graph tracer to follow - --graph-opts:: List of options allowed to set: nosleep-time - Measure on-CPU time only for function_graph tracer. noirqs - Ignore functions that happen inside interrupt. verbose - Show process names, PIDs, timestamps, etc. thresh= - Setup trace duration threshold in microseconds. + depth= - Set max depth for function graph tracer to follow. SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index ca61f8b8bbf6..bc3b35d18167 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -751,6 +751,7 @@ static int parse_graph_tracer_opts(const struct option *opt, { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs }, { .name = "verbose", .value_ptr = &ftrace->graph_verbose }, { .name = "thresh", .value_ptr = &ftrace->graph_thresh }, + { .name = "depth", .value_ptr = &ftrace->graph_depth }, { .name = NULL, } }; @@ -819,10 +820,8 @@ int cmd_ftrace(int argc, const char **argv) parse_filter_func), OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), - OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, - "Max depth for function graph tracer"), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=", + "graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=,depth=", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "size of per cpu buffer", parse_buffer_size), -- cgit From 6555c2f6db2196ef1b6d7149e7d342d0ba2ec57e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:38 +0800 Subject: perf ftrace: Add option -D/--delay to delay tracing This adds an option '-D/--delay' to allow us to start tracing some times later after workload is launched. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-16-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 4 ++++ tools/perf/builtin-ftrace.c | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 08216634d2bc..29c684b3b3fd 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -39,6 +39,10 @@ OPTIONS --pid=:: Trace on existing process id (comma separated list). +-D:: +--delay:: + Time (ms) to wait before starting tracing after program start. + -a:: --all-cpus:: Force system-wide collection. Scripts run without a diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index bc3b35d18167..5f9a9ebea0a2 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -49,6 +49,7 @@ struct perf_ftrace { int graph_noirqs; int graph_verbose; int graph_thresh; + unsigned int initial_delay; }; struct filter_entry { @@ -596,13 +597,23 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) /* display column headers */ read_tracing_file_to_stdout("trace"); - if (write_tracing_file("tracing_on", "1") < 0) { - pr_err("can't enable tracing\n"); - goto out_close_fd; + if (!ftrace->initial_delay) { + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } } perf_evlist__start_workload(ftrace->evlist); + if (ftrace->initial_delay) { + usleep(ftrace->initial_delay * 1000); + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } + } + while (!done) { if (poll(&pollfd, 1, -1) < 0) break; @@ -827,6 +838,8 @@ int cmd_ftrace(int argc, const char **argv) "size of per cpu buffer", parse_buffer_size), OPT_BOOLEAN(0, "inherit", &ftrace.inherit, "trace children processes"), + OPT_UINTEGER('D', "delay", &ftrace.initial_delay, + "ms to wait before starting tracing after program start"), OPT_END() }; -- cgit From 42145d71ddf369a536940a1b0d63b9aef6468516 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:39 +0800 Subject: perf ftrace: Add option --tid to filter by thread id This allows us to trace single thread instead of the whole process. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-17-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 3 +++ tools/perf/builtin-ftrace.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 29c684b3b3fd..78358af9a1c4 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -39,6 +39,9 @@ OPTIONS --pid=:: Trace on existing process id (comma separated list). +--tid=:: + Trace on existing thread id (comma separated list). + -D:: --delay:: Time (ms) to wait before starting tracing after program start. diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 5f9a9ebea0a2..13abb737fd12 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -812,6 +812,9 @@ int cmd_ftrace(int argc, const char **argv) "Show available functions to filter"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "trace on existing process id"), + /* TODO: Add short option -t after -t/--tracer can be removed. */ + OPT_STRING(0, "tid", &ftrace.target.tid, "tid", + "trace on existing thread id (exclusive to --pid)"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, -- cgit From 3c4dc21b75a74d4a8458624649a3e09af399a384 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:40 +0800 Subject: perf: ftrace: Add set_tracing_options() to set all trace options Now the __cmd_ftrace() becomes a bit long. This moves the trace option setting code to a separate function set_tracing_options(). Suggested-by: Namhyung Kim Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-18-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 118 +++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 13abb737fd12..9da38dc8b542 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -465,110 +465,118 @@ static int set_tracing_thresh(struct perf_ftrace *ftrace) return 0; } -static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +static int set_tracing_options(struct perf_ftrace *ftrace) { - char *trace_file; - int trace_fd; - char buf[4096]; - struct pollfd pollfd = { - .events = POLLIN, - }; - - if (!(perf_cap__capable(CAP_PERFMON) || - perf_cap__capable(CAP_SYS_ADMIN))) { - pr_err("ftrace only works for %s!\n", -#ifdef HAVE_LIBCAP_SUPPORT - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" -#else - "root" -#endif - ); - return -1; - } - - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); - signal(SIGPIPE, sig_handler); - - if (ftrace->list_avail_functions) - return read_tracing_file_to_stdout("available_filter_functions"); - - if (reset_tracing_files(ftrace) < 0) { - pr_err("failed to reset ftrace\n"); - goto out; - } - - /* reset ftrace buffer */ - if (write_tracing_file("trace", "0") < 0) - goto out; - - if (argc && perf_evlist__prepare_workload(ftrace->evlist, - &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { - goto out; - } - if (set_tracing_pid(ftrace) < 0) { pr_err("failed to set ftrace pid\n"); - goto out_reset; + return -1; } if (set_tracing_cpu(ftrace) < 0) { pr_err("failed to set tracing cpumask\n"); - goto out_reset; + return -1; } if (set_tracing_func_stack_trace(ftrace) < 0) { pr_err("failed to set tracing option func_stack_trace\n"); - goto out_reset; + return -1; } if (set_tracing_func_irqinfo(ftrace) < 0) { pr_err("failed to set tracing option irq-info\n"); - goto out_reset; + return -1; } if (set_tracing_filters(ftrace) < 0) { pr_err("failed to set tracing filters\n"); - goto out_reset; + return -1; } if (set_tracing_depth(ftrace) < 0) { pr_err("failed to set graph depth\n"); - goto out_reset; + return -1; } if (set_tracing_percpu_buffer_size(ftrace) < 0) { pr_err("failed to set tracing per-cpu buffer size\n"); - goto out_reset; + return -1; } if (set_tracing_trace_inherit(ftrace) < 0) { pr_err("failed to set tracing option function-fork\n"); - goto out_reset; + return -1; } if (set_tracing_sleep_time(ftrace) < 0) { pr_err("failed to set tracing option sleep-time\n"); - goto out_reset; + return -1; } if (set_tracing_funcgraph_irqs(ftrace) < 0) { pr_err("failed to set tracing option funcgraph-irqs\n"); - goto out_reset; + return -1; } if (set_tracing_funcgraph_verbose(ftrace) < 0) { pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n"); - goto out_reset; + return -1; } if (set_tracing_thresh(ftrace) < 0) { pr_err("failed to set tracing thresh\n"); - goto out_reset; + return -1; + } + + return 0; +} + +static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +{ + char *trace_file; + int trace_fd; + char buf[4096]; + struct pollfd pollfd = { + .events = POLLIN, + }; + + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { + pr_err("ftrace only works for %s!\n", +#ifdef HAVE_LIBCAP_SUPPORT + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" +#else + "root" +#endif + ); + return -1; } + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); + + if (ftrace->list_avail_functions) + return read_tracing_file_to_stdout("available_filter_functions"); + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + goto out; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + goto out; + + if (argc && perf_evlist__prepare_workload(ftrace->evlist, + &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { + goto out; + } + + if (set_tracing_options(ftrace) < 0) + goto out_reset; + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; -- cgit From 0094024a186045ac1a48d839bdb56c42b8f8279d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 8 Aug 2020 10:31:41 +0800 Subject: perf ftrace: Add change log Add a change log after previous enhancements. Signed-off-by: Changbin Du Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200808023141.14227-19-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9da38dc8b542..167f1f57aba3 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -3,6 +3,7 @@ * builtin-ftrace.c * * Copyright (c) 2013 LG Electronics, Namhyung Kim + * Copyright (c) 2020 Changbin Du , significant enhancement. */ #include "builtin.h" -- cgit From 8989f5f076051f3ceb1da083181b7d69808fff0e Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Thu, 13 Aug 2020 17:21:55 -0500 Subject: perf stat: Update POWER9 metrics to utilize other metrics These changes take advantage of the new capability added in merge commit 00e4db51259a5f936fec1424b884f029479d3981 "Allow using computed metrics in calculating other metrics". The net is a simplification of the expressions for a handful of metrics, but no functional change. Signed-off-by: Paul Clarke Reviewed-by: Kajol Jain Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Madhavan Srinivasan Link: http://lore.kernel.org/lkml/20200813222155.268183-1-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index 80816d6402e9..f8784c608479 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -60,7 +60,7 @@ }, { "BriefDescription": "Stalls due to short latency decimal floating ops.", - "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "dfu_stall_cpi - dflong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dfu_other_stall_cpi" }, @@ -72,7 +72,7 @@ }, { "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_non_local_stall_cpi - dmiss_remote_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_distant_stall_cpi" }, @@ -90,7 +90,7 @@ }, { "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_l2l3_stall_cpi - dmiss_l2l3_conflict_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_l2l3_noconflict_stall_cpi" }, @@ -114,7 +114,7 @@ }, { "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory", - "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL", + "MetricExpr": "dmiss_l3miss_stall_cpi - dmiss_l21_l31_stall_cpi - dmiss_lmem_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dmiss_non_local_stall_cpi" }, @@ -126,7 +126,7 @@ }, { "BriefDescription": "Stalls due to short latency double precision ops.", - "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "dp_stall_cpi - dplong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "dp_other_stall_cpi" }, @@ -155,7 +155,7 @@ "MetricName": "emq_full_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL", + "MetricExpr": "erat_miss_stall_cpi + emq_full_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "emq_stall_cpi" }, @@ -173,7 +173,7 @@ }, { "BriefDescription": "Completion stall due to execution units for other reasons.", - "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricExpr": "exec_unit_stall_cpi - scalar_stall_cpi - vector_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "exec_unit_other_stall_cpi" }, @@ -197,7 +197,7 @@ }, { "BriefDescription": "Stalls due to short latency integer ops", - "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "fxu_stall_cpi - fxlong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "fxu_other_stall_cpi" }, @@ -221,7 +221,7 @@ }, { "BriefDescription": "Instruction Completion Table other stalls", - "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", + "MetricExpr": "nothing_dispatched_cpi - ict_noslot_ic_miss_cpi - ict_noslot_br_mpred_icmiss_cpi - ict_noslot_br_mpred_cpi - ict_noslot_disp_held_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_cyc_other_cpi" }, @@ -245,7 +245,7 @@ }, { "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", + "MetricExpr": "ict_noslot_disp_held_cpi - ict_noslot_disp_held_hb_full_cpi - ict_noslot_disp_held_sync_cpi - ict_noslot_disp_held_tbegin_cpi - ict_noslot_disp_held_issq_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_disp_held_other_cpi" }, @@ -263,7 +263,7 @@ }, { "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", + "MetricExpr": "ict_noslot_ic_miss_cpi - ict_noslot_ic_l3_cpi - ict_noslot_ic_l3miss_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "ict_noslot_ic_l2_cpi" }, @@ -286,7 +286,7 @@ "MetricName": "ict_noslot_ic_miss_cpi" }, { - "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", + "MetricExpr": "ntc_issue_held_darq_full_cpi + ntc_issue_held_arb_cpi + ntc_issue_held_other_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "issue_hold_cpi" }, @@ -327,7 +327,7 @@ "MetricName": "lrq_other_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL", + "MetricExpr": "lmq_full_stall_cpi + st_fwd_stall_cpi + lhs_stall_cpi + lsu_mfspr_stall_cpi + larx_stall_cpi + lrq_other_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lrq_stall_cpi" }, @@ -338,7 +338,7 @@ "MetricName": "lsaq_arb_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL", + "MetricExpr": "lrq_full_stall_cpi + srq_full_stall_cpi + lsaq_arb_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lsaq_stall_cpi" }, @@ -362,7 +362,7 @@ }, { "BriefDescription": "Completion LSU stall for other reasons", - "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL", + "MetricExpr": "lsu_stall_cpi - lsu_fin_stall_cpi - store_finish_stall_cpi - srq_stall_cpi - load_finish_stall_cpi + lsu_stall_dcache_miss_cpi - lrq_stall_cpi + emq_stall_cpi - lsaq_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "lsu_other_stall_cpi" }, @@ -434,13 +434,13 @@ }, { "BriefDescription": "Cycles unaccounted for.", - "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL", + "MetricExpr": "run_cpi - completion_cpi - thread_block_stall_cpi - stall_cpi - nothing_dispatched_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "other_cpi" }, { "BriefDescription": "Completion stall for other reasons", - "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", + "MetricExpr": "stall_cpi - ntc_disp_fin_stall_cpi - ntc_flush_stall_cpi - lsu_stall_cpi - exec_unit_stall_cpi - bru_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "other_stall_cpi" }, @@ -469,7 +469,7 @@ "MetricName": "run_cyc_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL", + "MetricExpr": "fxu_stall_cpi + dp_stall_cpi + dfu_stall_cpi + pm_stall_cpi + crypto_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "scalar_stall_cpi" }, @@ -492,7 +492,7 @@ "MetricName": "srq_full_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL", + "MetricExpr": "store_data_stall_cpi + eieio_stall_cpi + stcx_stall_cpi + slb_stall_cpi + tend_stall_cpi + paste_stall_cpi + tlbie_stall_cpi + store_pipe_arb_stall_cpi + store_fin_arb_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "srq_stall_cpi" }, @@ -558,7 +558,7 @@ }, { "BriefDescription": "Vector stalls due to small latency double precision ops", - "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "vdp_stall_cpi - vdplong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vdp_other_stall_cpi" }, @@ -575,7 +575,7 @@ "MetricName": "vdplong_stall_cpi" }, { - "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricExpr": "vfxu_stall_cpi + vdp_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vector_stall_cpi" }, @@ -587,7 +587,7 @@ }, { "BriefDescription": "Vector stalls due to small latency integer ops", - "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL", + "MetricExpr": "vfxu_stall_cpi - vfxlong_stall_cpi", "MetricGroup": "cpi_breakdown", "MetricName": "vfxu_other_stall_cpi" }, @@ -1844,7 +1844,7 @@ }, { "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", - "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricExpr": "dl1_reload_from_l31_mod_rate_percent + dl1_reload_from_l31_shr_rate_percent", "MetricName": "dl1_reload_from_l31_rate_percent" }, { @@ -1979,7 +1979,7 @@ }, { "BriefDescription": "Completion stall because a different thread was using the completion pipe", - "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL", + "MetricExpr": "thread_block_stall_cpi - exception_stall_cpi - any_sync_stall_cpi - sync_pmu_int_stall_cpi - spec_finish_stall_cpi - flush_any_thread_stall_cpi - lsu_flush_next_stall_cpi - nested_tbegin_stall_cpi - nested_tend_stall_cpi - mtfpscr_stall_cpi", "MetricName": "other_thread_cmpl_stall" }, { -- cgit From a508d061ef04b163a85f55ba8a748c026f89da45 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 14 Aug 2020 18:44:54 +0800 Subject: perf bench numa: Remove dead code in parse_nodes_opt() In the function parse_nodes_opt(), the statement "return 0;" is dead code, remove it. Signed-off-by: Peng Fan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1597401894-27549-1-git-send-email-fanpeng@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 6d5c890478cb..f85bceccc459 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -733,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused, return -1; return parse_node_list(arg); - - return 0; } #define BIT(x) (1ul << x) -- cgit From c7a14fdcb3fa77368273b3daab52ad8e86d358b6 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Thu, 13 Aug 2020 10:22:04 +0200 Subject: perf build-ids: Fall back to debuginfod query if debuginfo not found During a perf-record, use the -ldebuginfod API to query a debuginfod server, should the debug data not be found in the usual system locations. If successful, the usual $HOME/.debug dir is populated. Tested with: $ find . . ./ctags-debuginfo-5.8-26.fc31.x86_64.rpm ./usr ./usr/lib ./usr/lib/debug ./usr/lib/debug/.build-id ./usr/lib/debug/.build-id/ca ./usr/lib/debug/.build-id/ca/46f6ae6a0cee57d85abc1d461c49074248908d ./usr/lib/debug/.build-id/ca/46f6ae6a0cee57d85abc1d461c49074248908d.debug ./usr/lib/debug/usr ./usr/lib/debug/usr/bin ./usr/lib/debug/usr/bin/ctags-5.8-26.fc31.x86_64.debug $ debuginfod -F . ... $ rm -rf ~/.debug/ ; mkdir ~/.debug $ perf record make tags BUILD: Doing 'make -j8' parallel build GEN tags [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.107 MB perf.data (1483 samples) ] $ find ~/.debug | grep ctags /home/jolsa/.debug/usr/bin/ctags /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d/elf /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d/probes $ rm -rf ~/.debug/ ; mkdir ~/.debug $ DEBUGINFOD_URLS=http://localhost:8002 perf record make tags BUILD: Doing 'make -j8' parallel build GEN tags [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.108 MB perf.data (1531 samples) ] $ find ~/.debug | grep ctag /home/jolsa/.debug/usr/bin/ctags /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d/debug /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d/elf /home/jolsa/.debug/usr/bin/ctags/ca46f6ae6a0cee57d85abc1d461c49074248908d/probes Note the 'debug' file is created in the last run. Note that currently the debuginfo data are downloaded only on record path, we still need add this support to perf build-id/report.. and test ;-) Tested-by: Jiri Olsa Signed-off-by: Jiri Olsa Signed-off-by: Frank Ch. Eigler Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 3 ++- tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libdebuginfod.c | 8 ++++++++ tools/perf/Makefile.config | 8 ++++++++ tools/perf/Makefile.perf | 2 ++ tools/perf/util/build-id.c | 19 +++++++++++++++++++ 6 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libdebuginfod.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e7818b44b48e..c1daf4d57518 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \ llvm-version \ clang \ libbpf \ - libpfm4 + libpfm4 \ + libdebuginfod FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 846ee1341a5c..d220fe952747 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -26,6 +26,7 @@ FILES= \ test-libelf-gelf_getnote.bin \ test-libelf-getshdrstrndx.bin \ test-libelf-mmap.bin \ + test-libdebuginfod.bin \ test-libnuma.bin \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ @@ -157,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin: $(OUTPUT)test-libelf-getshdrstrndx.bin: $(BUILD) -lelf +$(OUTPUT)test-libdebuginfod.bin: + $(BUILD) -ldebuginfod + $(OUTPUT)test-libnuma.bin: $(BUILD) -lnuma diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c new file mode 100644 index 000000000000..da22548b8413 --- /dev/null +++ b/tools/build/feature/test-libdebuginfod.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + debuginfod_client* c = debuginfod_begin(); + return (long)c; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 513633809c81..190be4fa5c21 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -501,6 +501,14 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT endif + ifndef NO_LIBDEBUGINFOD + $(call feature_check,libdebuginfod) + ifeq ($(feature-libdebuginfod), 1) + CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT + EXTLIBS += -ldebuginfod + endif + endif + ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e339e225410c..6031167939ae 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -124,6 +124,8 @@ include ../scripts/utilities.mak # # Define LIBPFM4 to enable libpfm4 events extension. # +# Define NO_LIBDEBUGINFOD if you do not want support debuginfod +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index c076fc7fe025..31207b6e2066 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -31,6 +31,10 @@ #include "probe-file.h" #include "strlist.h" +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + #include #include @@ -636,6 +640,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id, if (realname && access(realname, R_OK)) zfree(&realname); nsinfo__mountns_exit(&nsc); + +#ifdef HAVE_DEBUGINFOD_SUPPORT + if (realname == NULL) { + debuginfod_client* c = debuginfod_begin(); + if (c != NULL) { + int fd = debuginfod_find_debuginfo(c, + (const unsigned char*)sbuild_id, 0, + &realname); + if (fd >= 0) + close(fd); /* retaining reference by realname */ + debuginfod_end(c); + } + } +#endif + out: free(debugfile); return realname; -- cgit From 492e4edba6e2fc0620a69266d33f29c4a1f9ac1e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Aug 2020 09:55:33 -0300 Subject: perf ftrace: Make option description initials all capital letters And improve a bit the -m description to state that a B/K/M/G suffix is needed. Cc: Changbin Du Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 167f1f57aba3..1d44bc2f63d8 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -816,42 +816,42 @@ int cmd_ftrace(int argc, const char **argv) }; const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "tracer to use: function_graph(default) or function"), + "Tracer to use: function_graph(default) or function"), OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions, "Show available functions to filter"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", - "trace on existing process id"), + "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ OPT_STRING(0, "tid", &ftrace.target.tid, "tid", - "trace on existing thread id (exclusive to --pid)"), + "Trace on existing thread id (exclusive to --pid)"), OPT_INCR('v', "verbose", &verbose, - "be more verbose"), + "Be more verbose"), OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, - "system-wide collection from all CPUs"), + "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", - "list of cpus to monitor"), + "List of cpus to monitor"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", - "trace given functions using function tracer", + "Trace given functions using function tracer", parse_filter_func), OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", - "do not trace given functions", parse_filter_func), + "Do not trace given functions", parse_filter_func), OPT_CALLBACK(0, "func-opts", &ftrace, "options", - "function tracer options, available options: call-graph,irq-info", + "Function tracer options, available options: call-graph,irq-info", parse_func_tracer_opts), OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", - "trace given functions using function_graph tracer", + "Trace given functions using function_graph tracer", parse_filter_func), OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), OPT_CALLBACK(0, "graph-opts", &ftrace, "options", - "graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=,depth=", + "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=,depth=", parse_graph_tracer_opts), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", - "size of per cpu buffer", parse_buffer_size), + "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size), OPT_BOOLEAN(0, "inherit", &ftrace.inherit, - "trace children processes"), + "Trace children processes"), OPT_UINTEGER('D', "delay", &ftrace.initial_delay, - "ms to wait before starting tracing after program start"), + "Number of milliseconds to wait before starting tracing after program start"), OPT_END() }; -- cgit From f516fb704d02fff292cb79a6ad85c56529f7c8cf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 20 Apr 2020 21:24:47 -0500 Subject: dt-bindings: Whitespace clean-ups in schema files Clean-up incorrect indentation, extra spaces, long lines, and missing EOF newline in schema files. Most of the clean-ups are for list indentation which should always be 2 spaces more than the preceding keyword. Found with yamllint (which I plan to integrate into the checks). Cc: linux-arm-kernel@lists.infradead.org Cc: linux-clk@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-spi@vger.kernel.org Cc: linux-gpio@vger.kernel.org Cc: linux-remoteproc@vger.kernel.org Cc: linux-hwmon@vger.kernel.org Cc: linux-i2c@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: linux-iio@vger.kernel.org Cc: linux-input@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-mmc@vger.kernel.org Cc: linux-mtd@lists.infradead.org Cc: netdev@vger.kernel.org Cc: linux-rtc@vger.kernel.org Cc: linux-serial@vger.kernel.org Cc: linux-usb@vger.kernel.org Acked-by: Sam Ravnborg Signed-off-by: Rob Herring --- .../devicetree/bindings/arm/arm,integrator.yaml | 6 +- .../devicetree/bindings/arm/arm,realview.yaml | 66 +++++++-------- .../devicetree/bindings/arm/arm,vexpress-juno.yaml | 12 +-- .../devicetree/bindings/arm/bcm/brcm,bcm11351.yaml | 2 +- .../devicetree/bindings/arm/bcm/brcm,bcm21664.yaml | 2 +- .../devicetree/bindings/arm/bcm/brcm,bcm23550.yaml | 2 +- .../devicetree/bindings/arm/bcm/brcm,cygnus.yaml | 20 ++--- .../devicetree/bindings/arm/bcm/brcm,hr2.yaml | 2 +- .../devicetree/bindings/arm/bcm/brcm,ns2.yaml | 4 +- .../devicetree/bindings/arm/bcm/brcm,nsp.yaml | 14 ++-- .../devicetree/bindings/arm/bcm/brcm,stingray.yaml | 6 +- .../bindings/arm/bcm/brcm,vulcan-soc.yaml | 4 +- .../devicetree/bindings/arm/coresight-cti.yaml | 20 ++--- Documentation/devicetree/bindings/arm/cpus.yaml | 4 +- Documentation/devicetree/bindings/arm/fsl.yaml | 13 +-- .../devicetree/bindings/arm/intel,keembay.yaml | 2 +- .../bindings/arm/mediatek/mediatek,pericfg.yaml | 30 +++---- .../devicetree/bindings/bus/baikal,bt1-apb.yaml | 2 +- .../devicetree/bindings/bus/baikal,bt1-axi.yaml | 2 +- .../devicetree/bindings/clock/idt,versaclock5.yaml | 8 +- .../devicetree/bindings/clock/ingenic,cgu.yaml | 16 ++-- .../devicetree/bindings/clock/qcom,mmcc.yaml | 2 +- .../bindings/clock/renesas,cpg-clocks.yaml | 16 ++-- .../bindings/clock/sprd,sc9863a-clk.yaml | 2 +- .../bindings/display/bridge/nwl-dsi.yaml | 15 ++-- .../bindings/display/bridge/renesas,lvds.yaml | 18 ++-- .../bindings/display/bridge/simple-bridge.yaml | 18 ++-- .../bindings/display/dsi-controller.yaml | 10 +-- .../bindings/display/ilitek,ili9486.yaml | 4 +- .../devicetree/bindings/display/ingenic,ipu.yaml | 8 +- .../devicetree/bindings/display/ingenic,lcd.yaml | 10 +-- .../devicetree/bindings/display/msm/gmu.yaml | 38 ++++----- .../display/panel/asus,z00t-tm5p5-nt35596.yaml | 4 +- .../bindings/display/panel/boe,tv101wum-nl6.yaml | 12 +-- .../bindings/display/panel/elida,kd35t133.yaml | 4 +- .../display/panel/feixin,k101-im2ba02.yaml | 6 +- .../bindings/display/panel/ilitek,ili9322.yaml | 3 +- .../bindings/display/panel/ilitek,ili9881c.yaml | 3 +- .../display/panel/leadtek,ltk050h3146w.yaml | 4 +- .../display/panel/leadtek,ltk500hd1829.yaml | 4 +- .../bindings/display/panel/novatek,nt35510.yaml | 4 +- .../bindings/display/panel/panel-dsi-cm.yaml | 8 +- .../bindings/display/panel/panel-timing.yaml | 20 ++--- .../bindings/display/panel/raydium,rm68200.yaml | 4 +- .../display/panel/samsung,s6e88a0-ams452ef01.yaml | 4 +- .../bindings/display/panel/visionox,rm69299.yaml | 2 +- .../devicetree/bindings/display/st,stm32-dsi.yaml | 3 +- .../bindings/display/ti/ti,j721e-dss.yaml | 2 +- Documentation/devicetree/bindings/dsp/fsl,dsp.yaml | 4 +- .../devicetree/bindings/example-schema.yaml | 4 +- .../devicetree/bindings/fsi/ibm,fsi2spi.yaml | 2 +- .../bindings/gpio/brcm,xgs-iproc-gpio.yaml | 6 +- .../bindings/gpio/renesas,rcar-gpio.yaml | 58 ++++++------- .../devicetree/bindings/gpu/vivante,gc.yaml | 3 +- .../bindings/hwmon/adi,axi-fan-control.yaml | 2 +- Documentation/devicetree/bindings/i2c/i2c-imx.yaml | 32 ++++---- Documentation/devicetree/bindings/i2c/i2c-pxa.yaml | 4 +- .../devicetree/bindings/iio/adc/adi,ad7606.yaml | 8 +- .../devicetree/bindings/iio/adc/maxim,max1238.yaml | 2 +- .../bindings/iio/adc/qcom,spmi-vadc.yaml | 19 ++--- .../bindings/iio/adc/rockchip-saradc.yaml | 8 +- .../bindings/iio/amplifiers/adi,hmc425a.yaml | 4 +- .../bindings/iio/chemical/atlas,sensor.yaml | 4 +- .../devicetree/bindings/iio/dac/adi,ad5770r.yaml | 60 +++++++------- .../bindings/iio/light/vishay,vcnl4000.yaml | 22 ++--- .../iio/magnetometer/asahi-kasei,ak8975.yaml | 16 ++-- .../bindings/iio/proximity/vishay,vcnl3020.yaml | 4 +- .../bindings/iio/temperature/adi,ltc2983.yaml | 2 +- .../devicetree/bindings/input/imx-keypad.yaml | 26 +++--- .../input/touchscreen/cypress,cy8ctma140.yaml | 2 +- .../bindings/input/touchscreen/edt-ft5x06.yaml | 10 +-- .../bindings/input/touchscreen/goodix.yaml | 5 +- .../bindings/input/touchscreen/touchscreen.yaml | 12 +-- .../bindings/interconnect/fsl,imx8m-noc.yaml | 20 ++--- .../bindings/interconnect/qcom,sc7180.yaml | 2 +- .../bindings/interconnect/qcom,sdm845.yaml | 2 +- .../bindings/interrupt-controller/arm,gic.yaml | 4 +- .../interrupt-controller/ingenic,intc.yaml | 22 ++--- .../bindings/leds/backlight/qcom-wled.yaml | 3 +- .../devicetree/bindings/mailbox/fsl,mu.yaml | 12 +-- .../devicetree/bindings/mailbox/qcom-ipcc.yaml | 2 +- .../media/allwinner,sun8i-a83t-de2-rotate.yaml | 4 +- .../media/allwinner,sun8i-h3-deinterlace.yaml | 4 +- .../devicetree/bindings/media/i2c/adv7180.yaml | 43 +++++----- .../bindings/media/i2c/imi,rdacm2x-gmsl.yaml | 2 +- .../bindings/media/i2c/maxim,max9286.yaml | 2 +- .../devicetree/bindings/media/i2c/ov8856.yaml | 3 +- .../devicetree/bindings/media/renesas,csi2.yaml | 18 ++-- .../devicetree/bindings/media/rockchip-vpu.yaml | 4 +- .../bindings/media/xilinx/xlnx,csi2rxss.yaml | 15 ++-- .../bindings/memory-controllers/fsl/mmdc.yaml | 12 +-- .../bindings/memory-controllers/ingenic,nemc.yaml | 8 +- .../memory-controllers/renesas,rpc-if.yaml | 8 +- .../devicetree/bindings/mfd/cirrus,madera.yaml | 34 ++++---- .../devicetree/bindings/mfd/gateworks-gsc.yaml | 4 +- .../devicetree/bindings/mfd/st,stpmic1.yaml | 24 +++--- .../bindings/mfd/ti,j721e-system-controller.yaml | 11 ++- .../devicetree/bindings/mfd/wlf,arizona.yaml | 22 ++--- .../bindings/mmc/amlogic,meson-mx-sdhc.yaml | 6 +- .../devicetree/bindings/mmc/ingenic,mmc.yaml | 14 ++-- .../devicetree/bindings/mmc/renesas,sdhi.yaml | 6 +- .../bindings/mtd/arasan,nand-controller.yaml | 8 +- .../devicetree/bindings/mtd/gpmi-nand.yaml | 2 +- .../devicetree/bindings/mtd/mxc-nand.yaml | 2 +- .../bindings/mtd/st,stm32-fmc2-nand.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 4 +- .../devicetree/bindings/net/qcom,ipa.yaml | 12 +-- .../bindings/net/socionext,uniphier-ave4.yaml | 12 +-- .../devicetree/bindings/net/stm32-dwmac.yaml | 12 +-- .../devicetree/bindings/net/ti,cpsw-switch.yaml | 62 +++++++------- .../bindings/net/ti,k3-am654-cpsw-nuss.yaml | 66 +++++++-------- .../devicetree/bindings/nvmem/imx-ocotp.yaml | 24 +++--- .../devicetree/bindings/nvmem/qcom,qfprom.yaml | 4 +- .../bindings/phy/amlogic,meson-g12a-usb2-phy.yaml | 6 +- .../bindings/phy/phy-rockchip-inno-usb2.yaml | 6 +- .../devicetree/bindings/phy/qcom,qmp-phy.yaml | 13 ++- .../bindings/phy/qcom,qmp-usb3-dp-phy.yaml | 9 +- .../devicetree/bindings/phy/qcom,qusb2-phy.yaml | 30 +++---- .../bindings/phy/socionext,uniphier-pcie-phy.yaml | 8 +- .../phy/socionext,uniphier-usb3hs-phy.yaml | 10 +-- .../phy/socionext,uniphier-usb3ss-phy.yaml | 22 ++--- .../devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 3 +- .../bindings/pinctrl/aspeed,ast2400-pinctrl.yaml | 32 ++++---- .../bindings/pinctrl/aspeed,ast2500-pinctrl.yaml | 36 ++++---- .../bindings/pinctrl/aspeed,ast2600-pinctrl.yaml | 96 +++++++++++----------- .../bindings/pinctrl/ingenic,pinctrl.yaml | 24 +++--- .../bindings/pinctrl/qcom,ipq6018-pinctrl.yaml | 54 ++++++------ .../bindings/pinctrl/qcom,sm8250-pinctrl.yaml | 32 ++++---- .../bindings/pinctrl/st,stm32-pinctrl.yaml | 4 +- .../devicetree/bindings/power/power-domain.yaml | 14 ++-- .../bindings/power/supply/gpio-charger.yaml | 4 +- .../bindings/regulator/qcom,smd-rpm-regulator.yaml | 3 +- .../bindings/regulator/qcom-labibb-regulator.yaml | 4 +- .../bindings/remoteproc/ti,k3-dsp-rproc.yaml | 20 ++--- .../devicetree/bindings/reset/fsl,imx7-src.yaml | 6 +- .../devicetree/bindings/rtc/ingenic,rtc.yaml | 16 ++-- .../devicetree/bindings/serial/ingenic,uart.yaml | 20 ++--- .../soc/microchip/atmel,at91rm9200-tcb.yaml | 12 +-- .../devicetree/bindings/soc/qcom/qcom,geni-se.yaml | 30 +++---- .../devicetree/bindings/sound/amlogic,aiu.yaml | 11 ++- .../bindings/sound/amlogic,g12a-toacodec.yaml | 10 +-- .../devicetree/bindings/sound/cirrus,cs42l51.yaml | 2 +- .../devicetree/bindings/sound/ingenic,aic.yaml | 12 +-- .../devicetree/bindings/sound/maxim,max98390.yaml | 2 +- .../devicetree/bindings/sound/rockchip-i2s.yaml | 24 +++--- .../devicetree/bindings/sound/rockchip-spdif.yaml | 4 +- .../devicetree/bindings/sound/tas2770.yaml | 4 +- .../devicetree/bindings/sound/tlv320adcx140.yaml | 28 +++---- .../bindings/spi/allwinner,sun6i-a31-spi.yaml | 8 +- .../devicetree/bindings/spi/fsl-imx-cspi.yaml | 26 +++--- .../bindings/spi/mikrotik,rb4xx-spi.yaml | 2 +- Documentation/devicetree/bindings/spi/spi-mux.yaml | 74 ++++++++--------- .../devicetree/bindings/spi/spi-rockchip.yaml | 14 ++-- .../bindings/thermal/thermal-cooling-devices.yaml | 6 +- .../devicetree/bindings/thermal/thermal-idle.yaml | 45 +++++----- .../devicetree/bindings/timer/fsl,imxgpt.yaml | 14 ++-- .../devicetree/bindings/timer/ingenic,tcu.yaml | 50 +++++------ .../bindings/timer/snps,dw-apb-timer.yaml | 4 +- .../devicetree/bindings/trivial-devices.yaml | 2 +- Documentation/devicetree/bindings/usb/dwc2.yaml | 31 +++---- .../devicetree/bindings/usb/generic-ehci.yaml | 2 +- .../devicetree/bindings/usb/ingenic,musb.yaml | 8 +- .../devicetree/bindings/usb/nvidia,tegra-xudc.yaml | 10 +-- .../devicetree/bindings/usb/ti,j721e-usb.yaml | 6 +- .../devicetree/bindings/usb/ti,keystone-dwc3.yaml | 4 +- .../devicetree/bindings/vendor-prefixes.yaml | 3 +- 166 files changed, 1107 insertions(+), 1118 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/arm,integrator.yaml b/Documentation/devicetree/bindings/arm/arm,integrator.yaml index 192ded470e32..f0daf990e077 100644 --- a/Documentation/devicetree/bindings/arm/arm,integrator.yaml +++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml @@ -67,9 +67,9 @@ patternProperties: compatible: items: - enum: - - arm,integrator-ap-syscon - - arm,integrator-cp-syscon - - arm,integrator-sp-syscon + - arm,integrator-ap-syscon + - arm,integrator-cp-syscon + - arm,integrator-sp-syscon - const: syscon reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml index d6e85d198afe..1d0b4e2bc7d2 100644 --- a/Documentation/devicetree/bindings/arm/arm,realview.yaml +++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml @@ -55,20 +55,20 @@ properties: compatible: oneOf: - items: - - const: arm,realview-eb-soc - - const: simple-bus + - const: arm,realview-eb-soc + - const: simple-bus - items: - - const: arm,realview-pb1176-soc - - const: simple-bus + - const: arm,realview-pb1176-soc + - const: simple-bus - items: - - const: arm,realview-pb11mp-soc - - const: simple-bus + - const: arm,realview-pb11mp-soc + - const: simple-bus - items: - - const: arm,realview-pba8-soc - - const: simple-bus + - const: arm,realview-pba8-soc + - const: simple-bus - items: - - const: arm,realview-pbx-soc - - const: simple-bus + - const: arm,realview-pbx-soc + - const: simple-bus patternProperties: "^.*syscon@[0-9a-f]+$": @@ -79,35 +79,35 @@ properties: compatible: oneOf: - items: - - const: arm,realview-eb11mp-revb-syscon - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-eb11mp-revb-syscon + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-eb11mp-revc-syscon - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-eb11mp-revc-syscon + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-pb1176-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-pb1176-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-pb11mp-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-pb11mp-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-pba8-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-pba8-syscon + - const: syscon + - const: simple-mfd - items: - - const: arm,realview-pbx-syscon - - const: syscon - - const: simple-mfd + - const: arm,realview-pbx-syscon + - const: syscon + - const: simple-mfd required: - compatible diff --git a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml index a3420c81cf35..26829a803fda 100644 --- a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml +++ b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml @@ -165,10 +165,10 @@ patternProperties: compatible: oneOf: - items: - - enum: - - arm,vexpress,v2m-p1 - - arm,vexpress,v2p-p1 - - const: simple-bus + - enum: + - arm,vexpress,v2m-p1 + - arm,vexpress,v2p-p1 + - const: simple-bus - const: simple-bus motherboard: type: object @@ -186,8 +186,8 @@ patternProperties: compatible: items: - enum: - - arm,vexpress,v2m-p1 - - arm,vexpress,v2p-p1 + - arm,vexpress,v2m-p1 + - arm,vexpress,v2p-p1 - const: simple-bus arm,v2m-memory-map: description: This describes the memory map type. diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml index b5ef2666e6b2..497600a2ffb9 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml @@ -15,7 +15,7 @@ properties: compatible: items: - enum: - - brcm,bcm28155-ap + - brcm,bcm28155-ap - const: brcm,bcm11351 ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml index aafbd6a27708..e0ee931723dc 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml @@ -15,7 +15,7 @@ properties: compatible: items: - enum: - - brcm,bcm21664-garnet + - brcm,bcm21664-garnet - const: brcm,bcm21664 ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml index c4b4efd28a55..40d12ea56e54 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml @@ -15,7 +15,7 @@ properties: compatible: items: - enum: - - brcm,bcm23550-sparrow + - brcm,bcm23550-sparrow - const: brcm,bcm23550 ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml index fe111e72dac3..9ba7b16e1fc4 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml @@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Broadcom Cygnus device tree bindings maintainers: - - Ray Jui - - Scott Branden + - Ray Jui + - Scott Branden properties: $nodename: @@ -16,14 +16,14 @@ properties: compatible: items: - enum: - - brcm,bcm11300 - - brcm,bcm11320 - - brcm,bcm11350 - - brcm,bcm11360 - - brcm,bcm58300 - - brcm,bcm58302 - - brcm,bcm58303 - - brcm,bcm58305 + - brcm,bcm11300 + - brcm,bcm11320 + - brcm,bcm11350 + - brcm,bcm11360 + - brcm,bcm58300 + - brcm,bcm58302 + - brcm,bcm58303 + - brcm,bcm58305 - const: brcm,cygnus ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml index 1158f49b0b83..ae614b6722c2 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml @@ -21,7 +21,7 @@ properties: compatible: items: - enum: - - ubnt,unifi-switch8 + - ubnt,unifi-switch8 - const: brcm,bcm53342 - const: brcm,hr2 diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml index 2451704f87f0..0749adf94c28 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml @@ -16,8 +16,8 @@ properties: compatible: items: - enum: - - brcm,ns2-svk - - brcm,ns2-xmc + - brcm,ns2-svk + - brcm,ns2-xmc - const: brcm,ns2 ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml index fe364cebf57f..8c2cacb2bb4f 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml @@ -24,13 +24,13 @@ properties: compatible: items: - enum: - - brcm,bcm58522 - - brcm,bcm58525 - - brcm,bcm58535 - - brcm,bcm58622 - - brcm,bcm58623 - - brcm,bcm58625 - - brcm,bcm88312 + - brcm,bcm58522 + - brcm,bcm58525 + - brcm,bcm58535 + - brcm,bcm58622 + - brcm,bcm58623 + - brcm,bcm58625 + - brcm,bcm88312 - const: brcm,nsp ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml index 4ad2b2124ab4..c13cb96545a2 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml @@ -16,9 +16,9 @@ properties: compatible: items: - enum: - - brcm,bcm958742k - - brcm,bcm958742t - - brcm,bcm958802a802x + - brcm,bcm958742k + - brcm,bcm958742t + - brcm,bcm958802a802x - const: brcm,stingray ... diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml index c5b6f31c20b9..ccdf9f99cb2b 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml @@ -15,8 +15,8 @@ properties: compatible: items: - enum: - - brcm,vulcan-eval - - cavium,thunderx2-cn9900 + - brcm,vulcan-eval + - cavium,thunderx2-cn9900 - const: brcm,vulcan-soc ... diff --git a/Documentation/devicetree/bindings/arm/coresight-cti.yaml b/Documentation/devicetree/bindings/arm/coresight-cti.yaml index 17df5cd12d8d..e42ff69d8bfb 100644 --- a/Documentation/devicetree/bindings/arm/coresight-cti.yaml +++ b/Documentation/devicetree/bindings/arm/coresight-cti.yaml @@ -82,12 +82,12 @@ properties: compatible: oneOf: - items: - - const: arm,coresight-cti - - const: arm,primecell + - const: arm,coresight-cti + - const: arm,primecell - items: - - const: arm,coresight-cti-v8-arch - - const: arm,coresight-cti - - const: arm,primecell + - const: arm,coresight-cti-v8-arch + - const: arm,coresight-cti + - const: arm,primecell reg: maxItems: 1 @@ -191,16 +191,16 @@ patternProperties: anyOf: - required: - - arm,trig-in-sigs + - arm,trig-in-sigs - required: - - arm,trig-out-sigs + - arm,trig-out-sigs oneOf: - required: - - arm,trig-conn-name + - arm,trig-conn-name - required: - - cpu + - cpu - required: - - arm,cs-dev-assoc + - arm,cs-dev-assoc required: - reg diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 40f692c846f0..1222bf1831fa 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -330,8 +330,8 @@ if: - enable-method then: - required: - - secondary-boot-reg + required: + - secondary-boot-reg required: - device_type diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index f63895c8ce2d..6da9d734cdb7 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -273,8 +273,8 @@ properties: - fsl,imx6ull-14x14-evk # i.MX6 UltraLiteLite 14x14 EVK Board - kontron,imx6ull-n6411-som # Kontron N6411 SOM - myir,imx6ull-mys-6ulx-eval # MYiR Tech iMX6ULL Evaluation Board - - toradex,colibri-imx6ull-eval # Colibri iMX6ULL Module on Colibri Evaluation Board - - toradex,colibri-imx6ull-wifi-eval # Colibri iMX6ULL Wi-Fi / Bluetooth Module on Colibri Evaluation Board + - toradex,colibri-imx6ull-eval # Colibri iMX6ULL Module on Colibri Eval Board + - toradex,colibri-imx6ull-wifi-eval # Colibri iMX6ULL Wi-Fi / BT Module on Colibri Eval Board - const: fsl,imx6ull - description: Kontron N6411 S Board @@ -312,9 +312,12 @@ properties: - toradex,colibri-imx7d # Colibri iMX7 Dual Module - toradex,colibri-imx7d-aster # Colibri iMX7 Dual Module on Aster Carrier Board - toradex,colibri-imx7d-emmc # Colibri iMX7 Dual 1GB (eMMC) Module - - toradex,colibri-imx7d-emmc-aster # Colibri iMX7 Dual 1GB (eMMC) Module on Aster Carrier Board - - toradex,colibri-imx7d-emmc-eval-v3 # Colibri iMX7 Dual 1GB (eMMC) Module on Colibri Evaluation Board V3 - - toradex,colibri-imx7d-eval-v3 # Colibri iMX7 Dual Module on Colibri Evaluation Board V3 + - toradex,colibri-imx7d-emmc-aster # Colibri iMX7 Dual 1GB (eMMC) Module on + # Aster Carrier Board + - toradex,colibri-imx7d-emmc-eval-v3 # Colibri iMX7 Dual 1GB (eMMC) Module on + # Colibri Evaluation Board V3 + - toradex,colibri-imx7d-eval-v3 # Colibri iMX7 Dual Module on + # Colibri Evaluation Board V3 - tq,imx7d-mba7 # i.MX7D TQ MBa7 with TQMa7D SoM - zii,imx7d-rmu2 # ZII RMU2 Board - zii,imx7d-rpu2 # ZII RPU2 Board diff --git a/Documentation/devicetree/bindings/arm/intel,keembay.yaml b/Documentation/devicetree/bindings/arm/intel,keembay.yaml index 4d925785f504..06a7b05f435f 100644 --- a/Documentation/devicetree/bindings/arm/intel,keembay.yaml +++ b/Documentation/devicetree/bindings/arm/intel,keembay.yaml @@ -14,6 +14,6 @@ properties: compatible: items: - enum: - - intel,keembay-evm + - intel,keembay-evm - const: intel,keembay ... diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml index e271c4682ebc..1af30174b2d0 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml @@ -17,22 +17,22 @@ properties: compatible: oneOf: - items: - - enum: - - mediatek,mt2701-pericfg - - mediatek,mt2712-pericfg - - mediatek,mt6765-pericfg - - mediatek,mt7622-pericfg - - mediatek,mt7629-pericfg - - mediatek,mt8135-pericfg - - mediatek,mt8173-pericfg - - mediatek,mt8183-pericfg - - mediatek,mt8516-pericfg - - const: syscon + - enum: + - mediatek,mt2701-pericfg + - mediatek,mt2712-pericfg + - mediatek,mt6765-pericfg + - mediatek,mt7622-pericfg + - mediatek,mt7629-pericfg + - mediatek,mt8135-pericfg + - mediatek,mt8173-pericfg + - mediatek,mt8183-pericfg + - mediatek,mt8516-pericfg + - const: syscon - items: - # Special case for mt7623 for backward compatibility - - const: mediatek,mt7623-pericfg - - const: mediatek,mt2701-pericfg - - const: syscon + # Special case for mt7623 for backward compatibility + - const: mediatek,mt7623-pericfg + - const: mediatek,mt2701-pericfg + - const: syscon reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml index 68b0131a31d0..37ba3337f944 100644 --- a/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml +++ b/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml @@ -19,7 +19,7 @@ description: | reported to the APB terminator (APB Errors Handler Block). allOf: - - $ref: /schemas/simple-bus.yaml# + - $ref: /schemas/simple-bus.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml index 29e1aaea132b..0bee4694578a 100644 --- a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml +++ b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml @@ -23,7 +23,7 @@ description: | accessible by means of the Baikal-T1 System Controller. allOf: - - $ref: /schemas/simple-bus.yaml# + - $ref: /schemas/simple-bus.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml index 3d4e1685cc55..28c6461b9a9a 100644 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml @@ -95,10 +95,10 @@ allOf: # Devices without builtin crystal properties: clock-names: - minItems: 1 - maxItems: 2 - items: - enum: [ xin, clkin ] + minItems: 1 + maxItems: 2 + items: + enum: [ xin, clkin ] clocks: minItems: 1 maxItems: 2 diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml index a952d5811823..5dd7ea8a78e4 100644 --- a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml +++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml @@ -47,12 +47,12 @@ properties: compatible: items: - enum: - - ingenic,jz4740-cgu - - ingenic,jz4725b-cgu - - ingenic,jz4770-cgu - - ingenic,jz4780-cgu - - ingenic,x1000-cgu - - ingenic,x1830-cgu + - ingenic,jz4740-cgu + - ingenic,jz4725b-cgu + - ingenic,jz4770-cgu + - ingenic,jz4780-cgu + - ingenic,x1000-cgu + - ingenic,x1830-cgu - const: simple-mfd minItems: 1 @@ -68,8 +68,8 @@ properties: items: - const: ext - enum: - - rtc - - osc32k # Different name, same clock + - rtc + - osc32k # Different name, same clock assigned-clocks: minItems: 1 diff --git a/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml index 1b16a863b355..af32dee14fc6 100644 --- a/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml @@ -65,7 +65,7 @@ properties: protected-clocks: description: - Protected clock specifier list as per common clock binding + Protected clock specifier list as per common clock binding vdd-gfx-supply: description: diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml index b83f4138f2f8..9185d101737e 100644 --- a/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml @@ -19,15 +19,15 @@ description: properties: compatible: oneOf: - - const: renesas,r8a73a4-cpg-clocks # R-Mobile APE6 - - const: renesas,r8a7740-cpg-clocks # R-Mobile A1 - - const: renesas,r8a7778-cpg-clocks # R-Car M1 - - const: renesas,r8a7779-cpg-clocks # R-Car H1 + - const: renesas,r8a73a4-cpg-clocks # R-Mobile APE6 + - const: renesas,r8a7740-cpg-clocks # R-Mobile A1 + - const: renesas,r8a7778-cpg-clocks # R-Car M1 + - const: renesas,r8a7779-cpg-clocks # R-Car H1 - items: - - enum: - - renesas,r7s72100-cpg-clocks # RZ/A1H - - const: renesas,rz-cpg-clocks # RZ/A1 - - const: renesas,sh73a0-cpg-clocks # SH-Mobile AG5 + - enum: + - renesas,r7s72100-cpg-clocks # RZ/A1H + - const: renesas,rz-cpg-clocks # RZ/A1 + - const: renesas,sh73a0-cpg-clocks # SH-Mobile AG5 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml b/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml index 29813873cfbc..c6d091518650 100644 --- a/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml +++ b/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml @@ -16,7 +16,7 @@ properties: "#clock-cells": const: 1 - compatible : + compatible: enum: - sprd,sc9863a-ap-clk - sprd,sc9863a-aon-clk diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml index 2c4c34e3bc29..04099f5bea3f 100644 --- a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml @@ -162,14 +162,13 @@ required: additionalProperties: false examples: - - | + - | + #include + #include + #include + #include - #include - #include - #include - #include - - mipi_dsi: mipi_dsi@30a00000 { + mipi_dsi: mipi_dsi@30a00000 { #address-cells = <1>; #size-cells = <0>; compatible = "fsl,imx8mq-nwl-dsi"; @@ -224,4 +223,4 @@ examples: }; }; }; - }; + }; diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml index 98c7330a9485..baaf2a2a6fed 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml +++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml @@ -119,17 +119,17 @@ then: # The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks. # These clocks are optional. - enum: - - extal - - dclkin.0 - - dclkin.1 + - extal + - dclkin.0 + - dclkin.1 - enum: - - extal - - dclkin.0 - - dclkin.1 + - extal + - dclkin.0 + - dclkin.1 - enum: - - extal - - dclkin.0 - - dclkin.1 + - extal + - dclkin.0 + - dclkin.1 required: - clock-names diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml index 0880cbf217d5..3ddb35fcf0a2 100644 --- a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml +++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml @@ -18,16 +18,16 @@ properties: compatible: oneOf: - items: - - enum: - - ti,ths8134a - - ti,ths8134b - - const: ti,ths8134 + - enum: + - ti,ths8134a + - ti,ths8134b + - const: ti,ths8134 - enum: - - adi,adv7123 - - dumb-vga-dac - - ti,opa362 - - ti,ths8134 - - ti,ths8135 + - adi,adv7123 + - dumb-vga-dac + - ti,opa362 + - ti,ths8134 + - ti,ths8135 ports: type: object diff --git a/Documentation/devicetree/bindings/display/dsi-controller.yaml b/Documentation/devicetree/bindings/display/dsi-controller.yaml index 85b71b1fd28a..a02039e3aca0 100644 --- a/Documentation/devicetree/bindings/display/dsi-controller.yaml +++ b/Documentation/devicetree/bindings/display/dsi-controller.yaml @@ -55,11 +55,11 @@ patternProperties: clock-master: type: boolean description: - Should be enabled if the host is being used in conjunction with - another DSI host to drive the same peripheral. Hardware supporting - such a configuration generally requires the data on both the busses - to be driven by the same clock. Only the DSI host instance - controlling this clock should contain this property. + Should be enabled if the host is being used in conjunction with + another DSI host to drive the same peripheral. Hardware supporting + such a configuration generally requires the data on both the busses + to be driven by the same clock. Only the DSI host instance + controlling this clock should contain this property. enforce-video-mode: type: boolean diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml index 66e93e563653..aecff34f505d 100644 --- a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml +++ b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml @@ -21,9 +21,9 @@ properties: items: - enum: # Waveshare 3.5" 320x480 Color TFT LCD - - waveshare,rpi-lcd-35 + - waveshare,rpi-lcd-35 # Ozzmaker 3.5" 320x480 Color TFT LCD - - ozzmaker,piscreen + - ozzmaker,piscreen - const: ilitek,ili9486 spi-max-frequency: diff --git a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml index 5bfc33eb32c9..12064a8e7a92 100644 --- a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml +++ b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml @@ -13,11 +13,11 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4725b-ipu - - ingenic,jz4760-ipu + - ingenic,jz4725b-ipu + - ingenic,jz4760-ipu - items: - - const: ingenic,jz4770-ipu - - const: ingenic,jz4760-ipu + - const: ingenic,jz4770-ipu + - const: ingenic,jz4760-ipu reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml index d56db1802fad..768050f30dba 100644 --- a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml +++ b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml @@ -58,11 +58,11 @@ properties: - port@0 required: - - compatible - - reg - - interrupts - - clocks - - clock-names + - compatible + - reg + - interrupts + - clocks + - clock-names if: properties: diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml index 0b8736a9384e..53056dd02597 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml @@ -38,10 +38,10 @@ properties: clocks: items: - - description: GMU clock - - description: GPU CX clock - - description: GPU AXI clock - - description: GPU MEMNOC clock + - description: GMU clock + - description: GPU CX clock + - description: GPU AXI clock + - description: GPU MEMNOC clock clock-names: items: @@ -52,8 +52,8 @@ properties: interrupts: items: - - description: GMU HFI interrupt - - description: GMU interrupt + - description: GMU HFI interrupt + - description: GMU interrupt interrupt-names: @@ -62,14 +62,14 @@ properties: - const: gmu power-domains: - items: - - description: CX power domain - - description: GX power domain + items: + - description: CX power domain + - description: GX power domain power-domain-names: - items: - - const: cx - - const: gx + items: + - const: cx + - const: gx iommus: maxItems: 1 @@ -90,13 +90,13 @@ required: - operating-points-v2 examples: - - | - #include - #include - #include - #include + - | + #include + #include + #include + #include - gmu: gmu@506a000 { + gmu: gmu@506a000 { compatible="qcom,adreno-gmu-630.2", "qcom,adreno-gmu"; reg = <0x506a000 0x30000>, @@ -120,4 +120,4 @@ examples: iommus = <&adreno_smmu 5>; operating-points-v2 = <&gmu_opp_table>; - }; + }; diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 083d2b9d0c69..75a09df68ba0 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -24,9 +24,9 @@ properties: reg: true reset-gpios: true vdd-supply: - description: core voltage supply + description: core voltage supply vddio-supply: - description: vddio supply + description: vddio supply required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml index 7f5df5851017..38bc1d1b511e 100644 --- a/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml +++ b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml @@ -48,12 +48,12 @@ properties: port: true required: - - compatible - - reg - - enable-gpios - - pp1800-supply - - avdd-supply - - avee-supply + - compatible + - reg + - enable-gpios + - pp1800-supply + - avdd-supply + - avee-supply additionalProperties: false diff --git a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml index aa761f697b7a..7adb83e2e8d9 100644 --- a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml +++ b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml @@ -19,9 +19,9 @@ properties: backlight: true reset-gpios: true iovcc-supply: - description: regulator that supplies the iovcc voltage + description: regulator that supplies the iovcc voltage vdd-supply: - description: regulator that supplies the vdd voltage + description: regulator that supplies the vdd voltage required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml b/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml index 927f1eea18d2..81adb82f061d 100644 --- a/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml +++ b/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml @@ -19,11 +19,11 @@ properties: backlight: true reset-gpios: true avdd-supply: - description: regulator that supplies the AVDD voltage + description: regulator that supplies the AVDD voltage dvdd-supply: - description: regulator that supplies the DVDD voltage + description: regulator that supplies the DVDD voltage cvdd-supply: - description: regulator that supplies the CVDD voltage + description: regulator that supplies the CVDD voltage required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml index 177d48c5bd97..e89c1ea62ffa 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml @@ -25,8 +25,7 @@ properties: compatible: items: - enum: - - dlink,dir-685-panel - + - dlink,dir-685-panel - const: ilitek,ili9322 reset-gpios: true diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml index a39332276bab..76a9068a85dd 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml @@ -13,8 +13,7 @@ properties: compatible: items: - enum: - - bananapi,lhr050h41 - + - bananapi,lhr050h41 - const: ilitek,ili9881c backlight: true diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml index a372bdc5bde1..3715882b63b6 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml @@ -21,9 +21,9 @@ properties: backlight: true reset-gpios: true iovcc-supply: - description: regulator that supplies the iovcc voltage + description: regulator that supplies the iovcc voltage vci-supply: - description: regulator that supplies the vci voltage + description: regulator that supplies the vci voltage required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml index b900973b5f7b..c5944b4d636c 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml @@ -19,9 +19,9 @@ properties: backlight: true reset-gpios: true iovcc-supply: - description: regulator that supplies the iovcc voltage + description: regulator that supplies the iovcc voltage vcc-supply: - description: regulator that supplies the vcc voltage + description: regulator that supplies the vcc voltage required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml index 73d2ff3baaff..bc92928c805b 100644 --- a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml +++ b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml @@ -25,9 +25,9 @@ properties: reg: true reset-gpios: true vdd-supply: - description: regulator that supplies the vdd voltage + description: regulator that supplies the vdd voltage vddi-supply: - description: regulator that supplies the vddi voltage + description: regulator that supplies the vddi voltage backlight: true required: diff --git a/Documentation/devicetree/bindings/display/panel/panel-dsi-cm.yaml b/Documentation/devicetree/bindings/display/panel/panel-dsi-cm.yaml index d766c949c622..4a36aa64c716 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-dsi-cm.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-dsi-cm.yaml @@ -27,10 +27,10 @@ properties: compatible: items: - enum: - - motorola,droid4-panel # Panel from Motorola Droid4 phone - - nokia,himalaya # Panel from Nokia N950 phone - - tpo,taal # Panel from OMAP4 SDP board - - const: panel-dsi-cm # Generic DSI command mode panel compatible fallback + - motorola,droid4-panel # Panel from Motorola Droid4 phone + - nokia,himalaya # Panel from Nokia N950 phone + - tpo,taal # Panel from OMAP4 SDP board + - const: panel-dsi-cm # Generic DSI command mode panel compatible fallback reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml index 182c19cb7fdd..9bf592dc3033 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml @@ -59,7 +59,7 @@ description: | properties: clock-frequency: - description: Panel clock in Hz + description: Panel clock in Hz hactive: $ref: /schemas/types.yaml#/definitions/uint32 @@ -200,15 +200,15 @@ properties: description: Enable double clock mode required: - - clock-frequency - - hactive - - vactive - - hfront-porch - - hback-porch - - hsync-len - - vfront-porch - - vback-porch - - vsync-len + - clock-frequency + - hactive + - vactive + - hfront-porch + - hback-porch + - hsync-len + - vfront-porch + - vback-porch + - vsync-len additionalProperties: false diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml index a35ba16fc000..39477793d289 100644 --- a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml +++ b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml @@ -10,8 +10,8 @@ maintainers: - Philippe CORNU description: | - The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD - panel connected using a MIPI-DSI video interface. + The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD + panel connected using a MIPI-DSI video interface. allOf: - $ref: panel-common.yaml# diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml index 7a685d0428b3..44ce98f68705 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml @@ -18,9 +18,9 @@ properties: reg: true reset-gpios: true vdd3-supply: - description: core voltage supply + description: core voltage supply vci-supply: - description: voltage supply for analog circuits + description: voltage supply for analog circuits required: - compatible diff --git a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml index b36f39f6b233..076b057b4af5 100644 --- a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml +++ b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Visionox model RM69299 Panels Device Tree Bindings. maintainers: - - Harigovindan P + - Harigovindan P description: | This binding is for display panels using a Visionox RM692999 panel. diff --git a/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml index 3be76d15bf6c..69cc7e8bf15a 100644 --- a/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml +++ b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml @@ -45,7 +45,7 @@ properties: phy-dsi-supply: description: - Phandle of the regulator that provides the supply voltage. + Phandle of the regulator that provides the supply voltage. ports: type: object @@ -147,4 +147,3 @@ examples: ... - diff --git a/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml index bbd76591c180..173730d56334 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml +++ b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml @@ -78,7 +78,7 @@ properties: - const: vp4 interrupts: - items: + items: - description: common_m DSS Master common - description: common_s0 DSS Shared common 0 - description: common_s1 DSS Shared common 1 diff --git a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml index 3bbe9521c0bc..4cc011230153 100644 --- a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml +++ b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml @@ -56,8 +56,8 @@ properties: memory-region: description: - phandle to a node describing reserved memory (System RAM memory) - used by DSP (see bindings/reserved-memory/reserved-memory.txt) + phandle to a node describing reserved memory (System RAM memory) + used by DSP (see bindings/reserved-memory/reserved-memory.txt) maxItems: 1 required: diff --git a/Documentation/devicetree/bindings/example-schema.yaml b/Documentation/devicetree/bindings/example-schema.yaml index c9534d2164a2..822975dbeafa 100644 --- a/Documentation/devicetree/bindings/example-schema.yaml +++ b/Documentation/devicetree/bindings/example-schema.yaml @@ -177,10 +177,10 @@ properties: dependencies: # 'vendor,bool-property' is only allowed when 'vendor,string-array-property' # is present - vendor,bool-property: [ vendor,string-array-property ] + vendor,bool-property: [ 'vendor,string-array-property' ] # Expressing 2 properties in both orders means all of the set of properties # must be present or none of them. - vendor,string-array-property: [ vendor,bool-property ] + vendor,string-array-property: [ 'vendor,bool-property' ] required: - compatible diff --git a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml index 893d81e54caa..b26d4b4be743 100644 --- a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml +++ b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: IBM FSI-attached SPI controllers maintainers: - - Eddie James + - Eddie James description: | This binding describes an FSI CFAM engine called the FSI2SPI. Therefore this diff --git a/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml b/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml index 4f2cbd8307a7..c213cb9ddb9f 100644 --- a/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml @@ -19,10 +19,8 @@ properties: reg: items: - - description: the I/O address containing the GPIO controller - registers. - - description: the I/O address containing the Chip Common A interrupt - registers. + - description: the I/O address containing the GPIO controller registers. + - description: the I/O address containing the Chip Common A interrupt registers. gpio-controller: true diff --git a/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml b/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml index 397d9383d15a..3ad229307bd5 100644 --- a/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml @@ -13,39 +13,39 @@ properties: compatible: oneOf: - items: - - enum: - - renesas,gpio-r8a7778 # R-Car M1 - - renesas,gpio-r8a7779 # R-Car H1 - - const: renesas,rcar-gen1-gpio # R-Car Gen1 + - enum: + - renesas,gpio-r8a7778 # R-Car M1 + - renesas,gpio-r8a7779 # R-Car H1 + - const: renesas,rcar-gen1-gpio # R-Car Gen1 - items: - - enum: - - renesas,gpio-r8a7742 # RZ/G1H - - renesas,gpio-r8a7743 # RZ/G1M - - renesas,gpio-r8a7744 # RZ/G1N - - renesas,gpio-r8a7745 # RZ/G1E - - renesas,gpio-r8a77470 # RZ/G1C - - renesas,gpio-r8a7790 # R-Car H2 - - renesas,gpio-r8a7791 # R-Car M2-W - - renesas,gpio-r8a7792 # R-Car V2H - - renesas,gpio-r8a7793 # R-Car M2-N - - renesas,gpio-r8a7794 # R-Car E2 - - const: renesas,rcar-gen2-gpio # R-Car Gen2 or RZ/G1 + - enum: + - renesas,gpio-r8a7742 # RZ/G1H + - renesas,gpio-r8a7743 # RZ/G1M + - renesas,gpio-r8a7744 # RZ/G1N + - renesas,gpio-r8a7745 # RZ/G1E + - renesas,gpio-r8a77470 # RZ/G1C + - renesas,gpio-r8a7790 # R-Car H2 + - renesas,gpio-r8a7791 # R-Car M2-W + - renesas,gpio-r8a7792 # R-Car V2H + - renesas,gpio-r8a7793 # R-Car M2-N + - renesas,gpio-r8a7794 # R-Car E2 + - const: renesas,rcar-gen2-gpio # R-Car Gen2 or RZ/G1 - items: - - enum: - - renesas,gpio-r8a774a1 # RZ/G2M - - renesas,gpio-r8a774b1 # RZ/G2N - - renesas,gpio-r8a774c0 # RZ/G2E - - renesas,gpio-r8a7795 # R-Car H3 - - renesas,gpio-r8a7796 # R-Car M3-W - - renesas,gpio-r8a77961 # R-Car M3-W+ - - renesas,gpio-r8a77965 # R-Car M3-N - - renesas,gpio-r8a77970 # R-Car V3M - - renesas,gpio-r8a77980 # R-Car V3H - - renesas,gpio-r8a77990 # R-Car E3 - - renesas,gpio-r8a77995 # R-Car D3 - - const: renesas,rcar-gen3-gpio # R-Car Gen3 or RZ/G2 + - enum: + - renesas,gpio-r8a774a1 # RZ/G2M + - renesas,gpio-r8a774b1 # RZ/G2N + - renesas,gpio-r8a774c0 # RZ/G2E + - renesas,gpio-r8a7795 # R-Car H3 + - renesas,gpio-r8a7796 # R-Car M3-W + - renesas,gpio-r8a77961 # R-Car M3-W+ + - renesas,gpio-r8a77965 # R-Car M3-N + - renesas,gpio-r8a77970 # R-Car V3M + - renesas,gpio-r8a77980 # R-Car V3H + - renesas,gpio-r8a77990 # R-Car E3 + - renesas,gpio-r8a77995 # R-Car D3 + - const: renesas,rcar-gen3-gpio # R-Car Gen3 or RZ/G2 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml index e1ac6ff5a230..4843df1ddbb6 100644 --- a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml +++ b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml @@ -26,7 +26,8 @@ properties: - description: AXI/master interface clock - description: GPU core clock - description: Shader clock (only required if GPU has feature PIPE_3D) - - description: AHB/slave interface clock (only required if GPU can gate slave interface independently) + - description: AHB/slave interface clock (only required if GPU can gate + slave interface independently) minItems: 1 maxItems: 4 diff --git a/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml b/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml index af35b77053df..7898b9dba5a5 100644 --- a/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml +++ b/Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml @@ -19,7 +19,7 @@ description: |+ properties: compatible: enum: - - adi,axi-fan-control-1.00.a + - adi,axi-fan-control-1.00.a reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index 869f2ae3d5b3..810536953177 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -20,22 +20,22 @@ properties: - const: fsl,imx1-i2c - items: - enum: - - fsl,imx25-i2c - - fsl,imx27-i2c - - fsl,imx31-i2c - - fsl,imx50-i2c - - fsl,imx51-i2c - - fsl,imx53-i2c - - fsl,imx6q-i2c - - fsl,imx6sl-i2c - - fsl,imx6sx-i2c - - fsl,imx6sll-i2c - - fsl,imx6ul-i2c - - fsl,imx7s-i2c - - fsl,imx8mq-i2c - - fsl,imx8mm-i2c - - fsl,imx8mn-i2c - - fsl,imx8mp-i2c + - fsl,imx25-i2c + - fsl,imx27-i2c + - fsl,imx31-i2c + - fsl,imx50-i2c + - fsl,imx51-i2c + - fsl,imx53-i2c + - fsl,imx6q-i2c + - fsl,imx6sl-i2c + - fsl,imx6sx-i2c + - fsl,imx6sll-i2c + - fsl,imx6ul-i2c + - fsl,imx7s-i2c + - fsl,imx8mq-i2c + - fsl,imx8mm-i2c + - fsl,imx8mn-i2c + - fsl,imx8mp-i2c - const: fsl,imx21-i2c reg: diff --git a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml index da6e8bdc4037..015885dd02d3 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml @@ -16,8 +16,8 @@ allOf: required: - mrvl,i2c-polling then: - required: - - interrupts + required: + - interrupts properties: compatible: diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index 5117ad68a584..cbb8819d7069 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -53,10 +53,10 @@ properties: standby-gpios: description: - Must be the device tree identifier of the STBY pin. This pin is used - to place the AD7606 into one of two power-down modes, Standby mode or - Shutdown mode. As the line is active low, it should be marked - GPIO_ACTIVE_LOW. + Must be the device tree identifier of the STBY pin. This pin is used + to place the AD7606 into one of two power-down modes, Standby mode or + Shutdown mode. As the line is active low, it should be marked + GPIO_ACTIVE_LOW. maxItems: 1 adi,first-data-gpios: diff --git a/Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml b/Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml index a0ebb4680140..cccd3033a55b 100644 --- a/Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml +++ b/Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml @@ -10,7 +10,7 @@ maintainers: - Jonathan Cameron description: | - Family of simple ADCs with i2c inteface and internal references. + Family of simple ADCs with i2c inteface and internal references. properties: compatible: diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml index e6263b617941..a6ac289d98da 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml @@ -24,11 +24,11 @@ properties: - const: qcom,spmi-adc-rev2 - items: - - enum: - - qcom,spmi-vadc - - qcom,spmi-adc5 - - qcom,spmi-adc-rev2 - - qcom,spmi-adc7 + - enum: + - qcom,spmi-vadc + - qcom,spmi-adc5 + - qcom,spmi-adc-rev2 + - qcom,spmi-adc7 reg: description: VADC base address in the SPMI PMIC register map @@ -101,12 +101,11 @@ patternProperties: - $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - items: - - const: 1 - - enum: [ 1, 3, 4, 6, 20, 8, 10 ] - + - const: 1 + - enum: [ 1, 3, 4, 6, 20, 8, 10 ] - items: - - const: 10 - - const: 81 + - const: 10 + - const: 81 qcom,ratiometric: description: | diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml index bcff82a423bc..1bb76197787b 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml @@ -17,10 +17,10 @@ properties: - const: rockchip,rk3399-saradc - items: - enum: - - rockchip,px30-saradc - - rockchip,rk3308-saradc - - rockchip,rk3328-saradc - - rockchip,rv1108-saradc + - rockchip,px30-saradc + - rockchip,rk3308-saradc + - rockchip,rk3328-saradc + - rockchip,rv1108-saradc - const: rockchip,rk3399-saradc reg: diff --git a/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml b/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml index 1c6d49685e9f..5342360e96b1 100644 --- a/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml +++ b/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml @@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: HMC425A 6-bit Digital Step Attenuator maintainers: -- Michael Hennerich -- Beniamin Bia + - Michael Hennerich + - Beniamin Bia description: | Digital Step Attenuator IIO device with gpio interface. diff --git a/Documentation/devicetree/bindings/iio/chemical/atlas,sensor.yaml b/Documentation/devicetree/bindings/iio/chemical/atlas,sensor.yaml index 69e8931e0ae8..9a89b34bdd8f 100644 --- a/Documentation/devicetree/bindings/iio/chemical/atlas,sensor.yaml +++ b/Documentation/devicetree/bindings/iio/chemical/atlas,sensor.yaml @@ -31,10 +31,10 @@ properties: - atlas,co2-ezo reg: - maxItems: 1 + maxItems: 1 interrupts: - maxItems: 1 + maxItems: 1 required: - compatible diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml index 58d81ca43460..82424e06be27 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml @@ -61,17 +61,17 @@ properties: const: 0 adi,range-microamp: - description: Output range of the channel. - oneOf: - - items: - - const: 0 - - const: 300000 - - items: - - const: -60000 - - const: 0 - - items: - - const: -60000 - - const: 300000 + description: Output range of the channel. + oneOf: + - items: + - const: 0 + - const: 300000 + - items: + - const: -60000 + - const: 0 + - items: + - const: -60000 + - const: 300000 channel@1: description: Represents an external channel which are @@ -84,10 +84,10 @@ properties: const: 1 adi,range-microamp: - description: Output range of the channel. - items: - - const: 0 - - enum: [ 140000, 250000 ] + description: Output range of the channel. + items: + - const: 0 + - enum: [140000, 250000] channel@2: description: Represents an external channel which are @@ -100,10 +100,10 @@ properties: const: 2 adi,range-microamp: - description: Output range of the channel. - items: - - const: 0 - - enum: [ 55000, 150000 ] + description: Output range of the channel. + items: + - const: 0 + - enum: [55000, 150000] patternProperties: "^channel@([3-5])$": @@ -116,19 +116,19 @@ patternProperties: maximum: 5 adi,range-microamp: - description: Output range of the channel. - items: - - const: 0 - - enum: [ 45000, 100000 ] + description: Output range of the channel. + items: + - const: 0 + - enum: [45000, 100000] required: -- reg -- channel@0 -- channel@1 -- channel@2 -- channel@3 -- channel@4 -- channel@5 + - reg + - channel@0 + - channel@1 + - channel@2 + - channel@3 + - channel@4 + - channel@5 examples: - | diff --git a/Documentation/devicetree/bindings/iio/light/vishay,vcnl4000.yaml b/Documentation/devicetree/bindings/iio/light/vishay,vcnl4000.yaml index da8f2e872535..58887a4f9c15 100644 --- a/Documentation/devicetree/bindings/iio/light/vishay,vcnl4000.yaml +++ b/Documentation/devicetree/bindings/iio/light/vishay,vcnl4000.yaml @@ -36,15 +36,15 @@ required: additionalProperties: false examples: -- | - i2c { - #address-cells = <1>; - #size-cells = <0>; - - light-sensor@51 { - compatible = "vishay,vcnl4200"; - reg = <0x51>; - proximity-near-level = <220>; - }; - }; + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + light-sensor@51 { + compatible = "vishay,vcnl4200"; + reg = <0x51>; + proximity-near-level = <220>; + }; + }; ... diff --git a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml index f4393bfbf355..f0b336ac39c9 100644 --- a/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml +++ b/Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml @@ -13,15 +13,15 @@ properties: compatible: oneOf: - enum: - - asahi-kasei,ak8975 - - asahi-kasei,ak8963 - - asahi-kasei,ak09911 - - asahi-kasei,ak09912 + - asahi-kasei,ak8975 + - asahi-kasei,ak8963 + - asahi-kasei,ak09911 + - asahi-kasei,ak09912 - enum: - - ak8975 - - ak8963 - - ak09911 - - ak09912 + - ak8975 + - ak8963 + - ak09911 + - ak09912 deprecated: true reg: diff --git a/Documentation/devicetree/bindings/iio/proximity/vishay,vcnl3020.yaml b/Documentation/devicetree/bindings/iio/proximity/vishay,vcnl3020.yaml index 4190253336ec..51dba64037f6 100644 --- a/Documentation/devicetree/bindings/iio/proximity/vishay,vcnl3020.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/vishay,vcnl3020.yaml @@ -39,8 +39,8 @@ properties: description: The driver current for the LED used in proximity sensing. enum: [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, - 100000, 110000, 120000, 130000, 140000, 150000, 160000, 170000, - 180000, 190000, 200000] + 100000, 110000, 120000, 130000, 140000, 150000, 160000, 170000, + 180000, 190000, 200000] default: 20000 required: diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml index 40ccbe7b5c13..0f79d9a01c49 100644 --- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml @@ -307,7 +307,7 @@ patternProperties: mode. $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000, - 500000, 1000000] + 500000, 1000000] adi,custom-thermistor: description: diff --git a/Documentation/devicetree/bindings/input/imx-keypad.yaml b/Documentation/devicetree/bindings/input/imx-keypad.yaml index 7432c6e8cf3a..f21db81206b4 100644 --- a/Documentation/devicetree/bindings/input/imx-keypad.yaml +++ b/Documentation/devicetree/bindings/input/imx-keypad.yaml @@ -24,19 +24,19 @@ properties: - const: fsl,imx21-kpp - items: - enum: - - fsl,imx25-kpp - - fsl,imx27-kpp - - fsl,imx31-kpp - - fsl,imx35-kpp - - fsl,imx51-kpp - - fsl,imx53-kpp - - fsl,imx50-kpp - - fsl,imx6q-kpp - - fsl,imx6sx-kpp - - fsl,imx6sl-kpp - - fsl,imx6sll-kpp - - fsl,imx6ul-kpp - - fsl,imx7d-kpp + - fsl,imx25-kpp + - fsl,imx27-kpp + - fsl,imx31-kpp + - fsl,imx35-kpp + - fsl,imx51-kpp + - fsl,imx53-kpp + - fsl,imx50-kpp + - fsl,imx6q-kpp + - fsl,imx6sx-kpp + - fsl,imx6sl-kpp + - fsl,imx6sll-kpp + - fsl,imx6ul-kpp + - fsl,imx7d-kpp - const: fsl,imx21-kpp reg: diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml index 8c73e5264312..3225c8d1fdaf 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml @@ -51,7 +51,7 @@ required: - touchscreen-max-pressure examples: -- | + - | #include i2c { #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 024b262a2ef7..4ce109476a0e 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -20,11 +20,11 @@ maintainers: allOf: - $ref: touchscreen.yaml# - if: - properties: - compatible: - contains: - enum: - - evervision,ev-ft5726 + properties: + compatible: + contains: + enum: + - evervision,ev-ft5726 then: properties: diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml index e81cfa56f25a..da5b0d87e16d 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml @@ -35,9 +35,8 @@ properties: maxItems: 1 irq-gpios: - description: GPIO pin used for IRQ. - The driver uses the interrupt gpio pin as - output to reset the device. + description: GPIO pin used for IRQ. The driver uses the interrupt gpio pin + as output to reset the device. maxItems: 1 reset-gpios: diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml index d7dac16a3960..36dc7b56a453 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml @@ -33,8 +33,8 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-min-pressure: - description: minimum pressure on the touchscreen to be achieved in order for the - touchscreen driver to report a touch event. + description: minimum pressure on the touchscreen to be achieved in order + for the touchscreen driver to report a touch event. $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-fuzz-x: @@ -46,13 +46,13 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-fuzz-pressure: - description: pressure noise value of the absolute input device (arbitrary range - dependent on the controller) + description: pressure noise value of the absolute input device (arbitrary + range dependent on the controller) $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-average-samples: - description: Number of data samples which are averaged for each read (valid values - dependent on the controller) + description: Number of data samples which are averaged for each read (valid + values dependent on the controller) $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-inverted-x: diff --git a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml index ff09550ad959..a8873739d61a 100644 --- a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml +++ b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml @@ -25,17 +25,17 @@ properties: compatible: oneOf: - items: - - enum: - - fsl,imx8mn-nic - - fsl,imx8mm-nic - - fsl,imx8mq-nic - - const: fsl,imx8m-nic + - enum: + - fsl,imx8mn-nic + - fsl,imx8mm-nic + - fsl,imx8mq-nic + - const: fsl,imx8m-nic - items: - - enum: - - fsl,imx8mn-noc - - fsl,imx8mm-noc - - fsl,imx8mq-noc - - const: fsl,imx8m-noc + - enum: + - fsl,imx8mn-noc + - fsl,imx8mm-noc + - fsl,imx8mq-noc + - const: fsl,imx8m-noc - const: fsl,imx8m-nic reg: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml index d01bac80d416..8659048f92a7 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interconnect/qcom,sc7180.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm SC7180 Network-On-Chip Interconnect +title: Qualcomm SC7180 Network-On-Chip Interconnect maintainers: - Odelu Kukatla diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml index 74536747b51d..dab17c0716ce 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interconnect/qcom,sdm845.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm SDM845 Network-On-Chip Interconnect +title: Qualcomm SDM845 Network-On-Chip Interconnect maintainers: - Georgi Djakov diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml index 96f8803ff4e6..06889963dfb7 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml @@ -42,8 +42,8 @@ properties: - items: - const: arm,gic-400 - enum: - - arm,cortex-a15-gic - - arm,cortex-a7-gic + - arm,cortex-a15-gic + - arm,cortex-a7-gic - items: - const: arm,arm1176jzf-devchip-gic diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml index 28b27e1a6e9d..02a3cf470518 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml @@ -16,20 +16,20 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-intc - - ingenic,jz4760-intc - - ingenic,jz4780-intc + - ingenic,jz4740-intc + - ingenic,jz4760-intc + - ingenic,jz4780-intc - items: - - enum: - - ingenic,jz4775-intc - - ingenic,jz4770-intc - - const: ingenic,jz4760-intc + - enum: + - ingenic,jz4775-intc + - ingenic,jz4770-intc + - const: ingenic,jz4760-intc - items: - - const: ingenic,x1000-intc - - const: ingenic,jz4780-intc + - const: ingenic,x1000-intc + - const: ingenic,jz4780-intc - items: - - const: ingenic,jz4725b-intc - - const: ingenic,jz4740-intc + - const: ingenic,jz4725b-intc + - const: ingenic,jz4740-intc "#interrupt-cells": const: 1 diff --git a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml index 32e0896c6bc1..47938e372987 100644 --- a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml @@ -79,7 +79,8 @@ properties: description: | kHz; switching frequency. $ref: /schemas/types.yaml#/definitions/uint32 - enum: [ 600, 640, 685, 738, 800, 872, 960, 1066, 1200, 1371, 1600, 1920, 2400, 3200, 4800, 9600 ] + enum: [ 600, 640, 685, 738, 800, 872, 960, 1066, 1200, 1371, 1600, 1920, + 2400, 3200, 4800, 9600 ] qcom,ovp: description: | diff --git a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml index 3b35eb5ac3f9..8a3470b64d06 100644 --- a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml +++ b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml @@ -29,12 +29,12 @@ properties: - const: fsl,imx8-mu-scu - items: - enum: - - fsl,imx7s-mu - - fsl,imx8mq-mu - - fsl,imx8mm-mu - - fsl,imx8mn-mu - - fsl,imx8mp-mu - - fsl,imx8qxp-mu + - fsl,imx7s-mu + - fsl,imx8mq-mu + - fsl,imx8mm-mu + - fsl,imx8mn-mu + - fsl,imx8mp-mu + - fsl,imx8qxp-mu - const: fsl,imx6sx-mu - description: To communicate with i.MX8 SCU with fast IPC items: diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index 4ac2123d9193..168beeb7e9f7 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -24,7 +24,7 @@ properties: compatible: items: - enum: - - qcom,sm8250-ipcc + - qcom,sm8250-ipcc - const: qcom,ipcc reg: diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml index 75196d11da58..a258832d520c 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml @@ -20,8 +20,8 @@ properties: oneOf: - const: allwinner,sun8i-a83t-de2-rotate - items: - - const: allwinner,sun50i-a64-de2-rotate - - const: allwinner,sun8i-a83t-de2-rotate + - const: allwinner,sun50i-a64-de2-rotate + - const: allwinner,sun8i-a83t-de2-rotate reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml index 8707df613f6c..6a56214c6cfd 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml @@ -20,8 +20,8 @@ properties: oneOf: - const: allwinner,sun8i-h3-deinterlace - items: - - const: allwinner,sun50i-a64-deinterlace - - const: allwinner,sun8i-h3-deinterlace + - const: allwinner,sun50i-a64-deinterlace + - const: allwinner,sun8i-h3-deinterlace reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/media/i2c/adv7180.yaml b/Documentation/devicetree/bindings/media/i2c/adv7180.yaml index e0084b272b25..d8c54f9d9506 100644 --- a/Documentation/devicetree/bindings/media/i2c/adv7180.yaml +++ b/Documentation/devicetree/bindings/media/i2c/adv7180.yaml @@ -17,17 +17,17 @@ properties: compatible: items: - enum: - - adi,adv7180 - - adi,adv7180cp - - adi,adv7180st - - adi,adv7182 - - adi,adv7280 - - adi,adv7280-m - - adi,adv7281 - - adi,adv7281-m - - adi,adv7281-ma - - adi,adv7282 - - adi,adv7282-m + - adi,adv7180 + - adi,adv7180cp + - adi,adv7180st + - adi,adv7182 + - adi,adv7280 + - adi,adv7280-m + - adi,adv7281 + - adi,adv7281-m + - adi,adv7281-ma + - adi,adv7282 + - adi,adv7282-m reg: maxItems: 1 @@ -58,17 +58,16 @@ allOf: - if: properties: compatible: - items: - - enum: - - adi,adv7180 - - adi,adv7182 - - adi,adv7280 - - adi,adv7280-m - - adi,adv7281 - - adi,adv7281-m - - adi,adv7281-ma - - adi,adv7282 - - adi,adv7282-m + enum: + - adi,adv7180 + - adi,adv7182 + - adi,adv7280 + - adi,adv7280-m + - adi,adv7281 + - adi,adv7281-m + - adi,adv7281-ma + - adi,adv7282 + - adi,adv7282-m then: required: - port diff --git a/Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml b/Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml index 5ad4b8c356cf..107c862a7fc7 100644 --- a/Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml +++ b/Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/i2c/imi,rdacm2x-gmsl.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: IMI D&D RDACM20 and RDACM21 Automotive Camera Platforms +title: IMI D&D RDACM20 and RDACM21 Automotive Camera Platforms maintainers: - Jacopo Mondi diff --git a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml index e7b543159d15..9ea827092fdd 100644 --- a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml +++ b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml @@ -49,7 +49,7 @@ properties: gpio-controller: true '#gpio-cells': - const: 2 + const: 2 ports: type: object diff --git a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml index 1956b2a32bf4..cde85553fd01 100644 --- a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml @@ -138,4 +138,5 @@ examples: }; }; }; -... \ No newline at end of file +... + diff --git a/Documentation/devicetree/bindings/media/renesas,csi2.yaml b/Documentation/devicetree/bindings/media/renesas,csi2.yaml index c9e068231d4b..6d282585d0b9 100644 --- a/Documentation/devicetree/bindings/media/renesas,csi2.yaml +++ b/Documentation/devicetree/bindings/media/renesas,csi2.yaml @@ -19,15 +19,15 @@ properties: compatible: items: - enum: - - renesas,r8a774a1-csi2 # RZ/G2M - - renesas,r8a774b1-csi2 # RZ/G2N - - renesas,r8a774c0-csi2 # RZ/G2E - - renesas,r8a7795-csi2 # R-Car H3 - - renesas,r8a7796-csi2 # R-Car M3-W - - renesas,r8a77965-csi2 # R-Car M3-N - - renesas,r8a77970-csi2 # R-Car V3M - - renesas,r8a77980-csi2 # R-Car V3H - - renesas,r8a77990-csi2 # R-Car E3 + - renesas,r8a774a1-csi2 # RZ/G2M + - renesas,r8a774b1-csi2 # RZ/G2N + - renesas,r8a774c0-csi2 # RZ/G2E + - renesas,r8a7795-csi2 # R-Car H3 + - renesas,r8a7796-csi2 # R-Car M3-W + - renesas,r8a77965-csi2 # R-Car M3-N + - renesas,r8a77970-csi2 # R-Car V3M + - renesas,r8a77980-csi2 # R-Car V3H + - renesas,r8a77990-csi2 # R-Car E3 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml index 2b629456d75f..c81dbc3e8960 100644 --- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml +++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml @@ -31,8 +31,8 @@ properties: oneOf: - const: vdpu - items: - - const: vepu - - const: vdpu + - const: vepu + - const: vdpu clocks: maxItems: 2 diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,csi2rxss.yaml b/Documentation/devicetree/bindings/media/xilinx/xlnx,csi2rxss.yaml index 7b9407c0520e..2961a5b6872f 100644 --- a/Documentation/devicetree/bindings/media/xilinx/xlnx,csi2rxss.yaml +++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,csi2rxss.yaml @@ -25,7 +25,7 @@ properties: compatible: items: - enum: - - xlnx,mipi-csi2-rx-subsystem-5.0 + - xlnx,mipi-csi2-rx-subsystem-5.0 reg: maxItems: 1 @@ -65,13 +65,12 @@ properties: 0x2d - RAW14 0x2e - RAW16 0x2f - RAW20 - allOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - - anyOf: - - minimum: 0x1e - - maximum: 0x24 - - minimum: 0x28 - - maximum: 0x2f + $ref: /schemas/types.yaml#/definitions/uint32 + oneOf: + - minimum: 0x1e + maximum: 0x24 + - minimum: 0x28 + maximum: 0x2f xlnx,vfb: type: boolean diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml b/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml index dee5131c0361..68484136a510 100644 --- a/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml @@ -15,12 +15,12 @@ properties: - const: fsl,imx6q-mmdc - items: - enum: - - fsl,imx6qp-mmdc - - fsl,imx6sl-mmdc - - fsl,imx6sll-mmdc - - fsl,imx6sx-mmdc - - fsl,imx6ul-mmdc - - fsl,imx7ulp-mmdc + - fsl,imx6qp-mmdc + - fsl,imx6sl-mmdc + - fsl,imx6sll-mmdc + - fsl,imx6sx-mmdc + - fsl,imx6ul-mmdc + - fsl,imx7ulp-mmdc - const: fsl,imx6q-mmdc reg: diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml index 17ba45a6c260..fe0ce191a851 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml @@ -16,11 +16,11 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-nemc - - ingenic,jz4780-nemc + - ingenic,jz4740-nemc + - ingenic,jz4780-nemc - items: - - const: ingenic,jz4725b-nemc - - const: ingenic,jz4740-nemc + - const: ingenic,jz4725b-nemc + - const: ingenic,jz4740-nemc "#address-cells": const: 2 diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml index 660005601a7f..7bfe120e14c3 100644 --- a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml @@ -26,10 +26,10 @@ properties: compatible: items: - enum: - - renesas,r8a77970-rpc-if # R-Car V3M - - renesas,r8a77980-rpc-if # R-Car V3H - - renesas,r8a77995-rpc-if # R-Car D3 - - const: renesas,rcar-gen3-rpc-if # a generic R-Car gen3 device + - renesas,r8a77970-rpc-if # R-Car V3M + - renesas,r8a77980-rpc-if # R-Car V3H + - renesas,r8a77995-rpc-if # R-Car D3 + - const: renesas,rcar-gen3-rpc-if # a generic R-Car gen3 device reg: items: diff --git a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml index a5531f6caf12..499c62c04daa 100644 --- a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml +++ b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml @@ -98,11 +98,11 @@ allOf: description: Databus power supply. - if: - properties: - compatible: - contains: - enum: - - cirrus,cs47l15 + properties: + compatible: + contains: + enum: + - cirrus,cs47l15 then: required: - MICVDD-supply @@ -174,24 +174,24 @@ properties: "mclk3" For the clock supplied on MCLK3. oneOf: - items: - - const: mclk1 + - const: mclk1 - items: - - const: mclk2 + - const: mclk2 - items: - - const: mclk3 + - const: mclk3 - items: - - const: mclk1 - - const: mclk2 + - const: mclk1 + - const: mclk2 - items: - - const: mclk1 - - const: mclk3 + - const: mclk1 + - const: mclk3 - items: - - const: mclk2 - - const: mclk3 + - const: mclk2 + - const: mclk3 - items: - - const: mclk1 - - const: mclk2 - - const: mclk3 + - const: mclk1 + - const: mclk2 + - const: mclk3 AVDD-supply: description: diff --git a/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml index 487a8445722e..7fd232699f16 100644 --- a/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml +++ b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml @@ -89,8 +89,8 @@ properties: description: Values of resistors for divider on raw ADC input maxItems: 2 items: - minimum: 1000 - maximum: 1000000 + minimum: 1000 + maximum: 1000000 gw,voltage-offset-microvolt: description: | diff --git a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml index dd995d7dc1a6..305123e74a58 100644 --- a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml @@ -113,8 +113,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true @@ -135,8 +135,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true @@ -154,8 +154,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true @@ -172,8 +172,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true @@ -198,8 +198,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true @@ -220,8 +220,8 @@ properties: maxItems: 1 st,mask-reset: - description: mask reset for this regulator, - the regulator configuration is maintained during pmic reset. + description: mask reset for this regulator, the regulator configuration + is maintained during pmic reset. $ref: /schemas/types.yaml#/definitions/flag regulator-name: true diff --git a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml index 03d0a232c75e..c8fd5d3e3071 100644 --- a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml @@ -24,12 +24,11 @@ maintainers: properties: compatible: - anyOf: - - items: - - enum: - - ti,j721e-system-controller - - const: syscon - - const: simple-mfd + items: + - enum: + - ti,j721e-system-controller + - const: syscon + - const: simple-mfd "#address-cells": const: 1 diff --git a/Documentation/devicetree/bindings/mfd/wlf,arizona.yaml b/Documentation/devicetree/bindings/mfd/wlf,arizona.yaml index 4c0106cea36d..9e762d474218 100644 --- a/Documentation/devicetree/bindings/mfd/wlf,arizona.yaml +++ b/Documentation/devicetree/bindings/mfd/wlf,arizona.yaml @@ -73,13 +73,13 @@ allOf: required: - DBVDD3-supply - if: - properties: - compatible: - contains: - enum: - - cirrus,cs47l24 - - wlf,wm1831 - - wlf,wm8997 + properties: + compatible: + contains: + enum: + - cirrus,cs47l24 + - wlf,wm1831 + - wlf,wm8997 then: properties: SPKVDD-supply: @@ -183,12 +183,12 @@ properties: clock supplied on MCLK2, recommended to be an always on 32k clock. oneOf: - items: - - const: mclk1 + - const: mclk1 - items: - - const: mclk2 + - const: mclk2 - items: - - const: mclk1 - - const: mclk2 + - const: mclk1 + - const: mclk2 reset-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml b/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml index 7a386a5b8fcb..0cd74c3116f8 100644 --- a/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml +++ b/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml @@ -21,9 +21,9 @@ properties: compatible: items: - enum: - - amlogic,meson8-sdhc - - amlogic,meson8b-sdhc - - amlogic,meson8m2-sdhc + - amlogic,meson8-sdhc + - amlogic,meson8b-sdhc + - amlogic,meson8m2-sdhc - const: amlogic,meson-mx-sdhc reg: diff --git a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml index e60bfe980ab3..9b63df1c22fb 100644 --- a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml +++ b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml @@ -16,14 +16,14 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-mmc - - ingenic,jz4725b-mmc - - ingenic,jz4760-mmc - - ingenic,jz4780-mmc - - ingenic,x1000-mmc + - ingenic,jz4740-mmc + - ingenic,jz4725b-mmc + - ingenic,jz4760-mmc + - ingenic,jz4780-mmc + - ingenic,x1000-mmc - items: - - const: ingenic,jz4770-mmc - - const: ingenic,jz4760-mmc + - const: ingenic,jz4770-mmc + - const: ingenic,jz4760-mmc reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml index e5dbc20456e5..b4c3fd40caeb 100644 --- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml +++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml @@ -130,9 +130,9 @@ then: required: - clock-names description: - The internal card detection logic that exists in these controllers is - sectioned off to be run by a separate second clock source to allow - the main core clock to be turned off to save power. + The internal card detection logic that exists in these controllers is + sectioned off to be run by a separate second clock source to allow + the main core clock to be turned off to save power. unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml b/Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml index cb9794edff24..b32876933269 100644 --- a/Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml @@ -14,12 +14,10 @@ maintainers: properties: compatible: - oneOf: - - items: - - enum: + items: + - enum: - xlnx,zynqmp-nand-controller - - enum: - - arasan,nfc-v3p10 + - const: arasan,nfc-v3p10 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml index 354cb63feea3..3201372b7f85 100644 --- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/gpmi-nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale General-Purpose Media Interface (GPMI) binding +title: Freescale General-Purpose Media Interface (GPMI) binding maintainers: - Han Xu diff --git a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml index ee4d1d026fd8..73b86f2226c7 100644 --- a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/mxc-nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale's mxc_nand binding +title: Freescale's mxc_nand binding maintainers: - Uwe Kleine-König diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml index 6ae7de15d172..28a08ff407db 100644 --- a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml @@ -42,7 +42,7 @@ patternProperties: const: 512 nand-ecc-strength: - enum: [1, 4 ,8 ] + enum: [1, 4, 8] allOf: - $ref: "nand-controller.yaml#" diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index faea214339ca..6a1ec50ad4fd 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -85,8 +85,8 @@ patternProperties: oneOf: - required: - - ports + - ports - required: - - ethernet-ports + - ethernet-ports ... diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index a3561276e609..8594f114f016 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -43,7 +43,7 @@ description: properties: compatible: - const: "qcom,sdm845-ipa" + const: "qcom,sdm845-ipa" reg: items: @@ -64,7 +64,7 @@ properties: maxItems: 1 clock-names: - const: core + const: core interrupts: items: @@ -96,8 +96,8 @@ properties: $ref: /schemas/types.yaml#/definitions/phandle-array description: State bits used in by the AP to signal the modem. items: - - description: Whether the "ipa-clock-enabled" state bit is valid - - description: Whether the IPA clock is enabled (if valid) + - description: Whether the "ipa-clock-enabled" state bit is valid + - description: Whether the IPA clock is enabled (if valid) qcom,smem-state-names: $ref: /schemas/types.yaml#/definitions/string-array @@ -140,9 +140,9 @@ required: oneOf: - required: - - modem-init + - modem-init - required: - - memory-region + - memory-region examples: - | diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index 7d84a863b9b9..cbacc04fc9e6 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -46,10 +46,10 @@ properties: clock-names: oneOf: - items: # for Pro4 - - const: gio - - const: ether - - const: ether-gb - - const: ether-phy + - const: gio + - const: ether + - const: ether-gb + - const: ether-phy - const: ether # for others resets: @@ -59,8 +59,8 @@ properties: reset-names: oneOf: - items: # for Pro4 - - const: gio - - const: ether + - const: gio + - const: ether - const: ether # for others socionext,syscon-phy-mode: diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml index fafa34cebdb1..e5dff66df481 100644 --- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml @@ -48,11 +48,11 @@ properties: minItems: 3 maxItems: 5 items: - - description: GMAC main clock - - description: MAC TX clock - - description: MAC RX clock - - description: For MPU family, used for power mode - - description: For MPU family, used for PHY without quartz + - description: GMAC main clock + - description: MAC TX clock + - description: MAC RX clock + - description: For MPU family, used for power mode + - description: For MPU family, used for PHY without quartz clock-names: minItems: 3 @@ -89,7 +89,7 @@ required: - st,syscon examples: - - | + - | #include #include #include diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml index 3ea0e1290dbb..dadeb8f811c0 100644 --- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml +++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml @@ -35,7 +35,7 @@ properties: reg: maxItems: 1 description: - The physical base address and size of full the CPSW module IO range + The physical base address and size of full the CPSW module IO range '#address-cells': const: 1 @@ -85,36 +85,36 @@ properties: patternProperties: "^port@[0-9]+$": - type: object - description: CPSW external ports - - allOf: - - $ref: ethernet-controller.yaml# - - properties: - reg: - items: - - enum: [1, 2] - description: CPSW port number - - phys: - maxItems: 1 - description: phandle on phy-gmii-sel PHY - - label: - description: label associated with this port - - ti,dual-emac-pvid: - $ref: /schemas/types.yaml#/definitions/uint32 - minimum: 1 - maximum: 1024 - description: - Specifies default PORT VID to be used to segregate - ports. Default value - CPSW port number. - - required: - - reg - - phys + type: object + description: CPSW external ports + + allOf: + - $ref: ethernet-controller.yaml# + + properties: + reg: + items: + - enum: [1, 2] + description: CPSW port number + + phys: + maxItems: 1 + description: phandle on phy-gmii-sel PHY + + label: + description: label associated with this port + + ti,dual-emac-pvid: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1 + maximum: 1024 + description: + Specifies default PORT VID to be used to segregate + ports. Default value - CPSW port number. + + required: + - reg + - phys cpts: type: object diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 174579370a22..227270cbf892 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -55,7 +55,7 @@ properties: reg: maxItems: 1 description: - The physical base address and size of full the CPSW2G NUSS IO range + The physical base address and size of full the CPSW2G NUSS IO range reg-names: items: @@ -100,38 +100,38 @@ properties: patternProperties: port@1: - type: object - description: CPSW2G NUSS external ports - - $ref: ethernet-controller.yaml# - - properties: - reg: - items: - - const: 1 - description: CPSW port number - - phys: - maxItems: 1 - description: phandle on phy-gmii-sel PHY - - label: - description: label associated with this port - - ti,mac-only: - $ref: /schemas/types.yaml#definitions/flag - description: - Specifies the port works in mac-only mode. - - ti,syscon-efuse: - $ref: /schemas/types.yaml#definitions/phandle-array - description: - Phandle to the system control device node which provides access - to efuse IO range with MAC addresses - - required: - - reg - - phys + type: object + description: CPSW2G NUSS external ports + + $ref: ethernet-controller.yaml# + + properties: + reg: + items: + - const: 1 + description: CPSW port number + + phys: + maxItems: 1 + description: phandle on phy-gmii-sel PHY + + label: + description: label associated with this port + + ti,mac-only: + $ref: /schemas/types.yaml#definitions/flag + description: + Specifies the port works in mac-only mode. + + ti,syscon-efuse: + $ref: /schemas/types.yaml#definitions/phandle-array + description: + Phandle to the system control device node which provides access + to efuse IO range with MAC addresses + + required: + - reg + - phys additionalProperties: false diff --git a/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml index fe9c7df78ea1..1c9d7f05f173 100644 --- a/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml @@ -21,18 +21,18 @@ properties: compatible: items: - enum: - - fsl,imx6q-ocotp - - fsl,imx6sl-ocotp - - fsl,imx6sx-ocotp - - fsl,imx6ul-ocotp - - fsl,imx6ull-ocotp - - fsl,imx7d-ocotp - - fsl,imx6sll-ocotp - - fsl,imx7ulp-ocotp - - fsl,imx8mq-ocotp - - fsl,imx8mm-ocotp - - fsl,imx8mn-ocotp - - fsl,imx8mp-ocotp + - fsl,imx6q-ocotp + - fsl,imx6sl-ocotp + - fsl,imx6sx-ocotp + - fsl,imx6ul-ocotp + - fsl,imx6ull-ocotp + - fsl,imx7d-ocotp + - fsl,imx6sll-ocotp + - fsl,imx7ulp-ocotp + - fsl,imx8mq-ocotp + - fsl,imx8mm-ocotp + - fsl,imx8mn-ocotp + - fsl,imx8mp-ocotp - const: syscon reg: diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index d10a0cf91ba7..59aca6d22ff9 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -46,8 +46,8 @@ properties: const: 1 required: - - compatible - - reg + - compatible + - reg examples: - | diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml index 9e32cb43fb21..0d2557bb0bcc 100644 --- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml @@ -37,9 +37,9 @@ properties: const: 0 phy-supply: - description: - Phandle to a regulator that provides power to the PHY. This - regulator will be managed during the PHY power on/off sequence. + description: + Phandle to a regulator that provides power to the PHY. This + regulator will be managed during the PHY power on/off sequence. required: - compatible diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml index cb71561a21b4..fb29ad807b68 100644 --- a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml +++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml @@ -100,9 +100,9 @@ properties: - const: linestate - const: otg-mux - items: - - const: otg-bvalid - - const: otg-id - - const: linestate + - const: otg-bvalid + - const: otg-id + - const: linestate phy-supply: description: diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml index e4cd4a1deae9..185cdea9cf81 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml @@ -37,7 +37,7 @@ properties: - description: Address and length of PHY's common serdes block. "#clock-cells": - enum: [ 1, 2 ] + enum: [ 1, 2 ] "#address-cells": enum: [ 1, 2 ] @@ -65,16 +65,15 @@ properties: vdda-phy-supply: description: - Phandle to a regulator supply to PHY core block. + Phandle to a regulator supply to PHY core block. vdda-pll-supply: description: - Phandle to 1.8V regulator supply to PHY refclk pll block. + Phandle to 1.8V regulator supply to PHY refclk pll block. vddp-ref-clk-supply: description: - Phandle to a regulator supply to any specific refclk - pll block. + Phandle to a regulator supply to any specific refclk pll block. #Required nodes: patternProperties: @@ -184,8 +183,8 @@ allOf: - description: phy common block reset. reset-names: items: - - const: phy - - const: common + - const: phy + - const: common - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml index 6e2487501457..ef8ae9f73092 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml @@ -26,7 +26,7 @@ properties: - const: dp_com "#clock-cells": - enum: [ 1, 2 ] + enum: [ 1, 2 ] "#address-cells": enum: [ 1, 2 ] @@ -62,16 +62,15 @@ properties: vdda-phy-supply: description: - Phandle to a regulator supply to PHY core block. + Phandle to a regulator supply to PHY core block. vdda-pll-supply: description: - Phandle to 1.8V regulator supply to PHY refclk pll block. + Phandle to 1.8V regulator supply to PHY refclk pll block. vddp-ref-clk-supply: description: - Phandle to a regulator supply to any specific refclk - pll block. + Phandle to a regulator supply to any specific refclk pll block. #Required nodes: patternProperties: diff --git a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml index 9ba62dcb1e5d..ccda92859eca 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml @@ -17,15 +17,15 @@ properties: compatible: oneOf: - items: - - enum: - - qcom,ipq8074-qusb2-phy - - qcom,msm8996-qusb2-phy - - qcom,msm8998-qusb2-phy + - enum: + - qcom,ipq8074-qusb2-phy + - qcom,msm8996-qusb2-phy + - qcom,msm8998-qusb2-phy - items: - - enum: - - qcom,sc7180-qusb2-phy - - qcom,sdm845-qusb2-phy - - const: qcom,qusb2-v2-phy + - enum: + - qcom,sc7180-qusb2-phy + - qcom,sdm845-qusb2-phy + - const: qcom,qusb2-v2-phy reg: maxItems: 1 @@ -49,12 +49,12 @@ properties: - const: iface vdda-pll-supply: - description: - Phandle to 1.8V regulator supply to PHY refclk pll block. + description: + Phandle to 1.8V regulator supply to PHY refclk pll block. vdda-phy-dpdm-supply: - description: - Phandle to 3.1V regulator supply to Dp/Dm port signals. + description: + Phandle to 3.1V regulator supply to Dp/Dm port signals. resets: maxItems: 1 @@ -64,12 +64,12 @@ properties: nvmem-cells: maxItems: 1 description: - Phandle to nvmem cell that contains 'HS Tx trim' - tuning parameter value for qusb2 phy. + Phandle to nvmem cell that contains 'HS Tx trim' + tuning parameter value for qusb2 phy. qcom,tcsr-syscon: description: - Phandle to TCSR syscon register region. + Phandle to TCSR syscon register region. $ref: /schemas/types.yaml#/definitions/phandle if: diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml index 86f49093b65f..a06831fd64b9 100644 --- a/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml @@ -33,8 +33,8 @@ properties: clock-names: oneOf: - items: # for Pro5 - - const: gio - - const: link + - const: gio + - const: link - const: link # for others resets: @@ -44,8 +44,8 @@ properties: reset-names: oneOf: - items: # for Pro5 - - const: gio - - const: link + - const: gio + - const: link - const: link # for others socionext,syscon: diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml index c871d462c952..6fa5caab1487 100644 --- a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml @@ -37,12 +37,12 @@ properties: oneOf: - const: link # for PXs2 - items: # for PXs3 with phy-ext - - const: link - - const: phy - - const: phy-ext + - const: link + - const: phy + - const: phy-ext - items: # for others - - const: link - - const: phy + - const: link + - const: phy resets: maxItems: 2 diff --git a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml index edff2c95c9ae..9d46715ed036 100644 --- a/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml +++ b/Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml @@ -37,15 +37,15 @@ properties: clock-names: oneOf: - items: # for Pro4, Pro5 - - const: gio - - const: link + - const: gio + - const: link - items: # for PXs3 with phy-ext - - const: link - - const: phy - - const: phy-ext + - const: link + - const: phy + - const: phy-ext - items: # for others - - const: link - - const: phy + - const: link + - const: phy resets: maxItems: 2 @@ -53,11 +53,11 @@ properties: reset-names: oneOf: - items: # for Pro4,Pro5 - - const: gio - - const: link + - const: gio + - const: link - items: # for others - - const: link - - const: phy + - const: link + - const: phy vbus-supply: description: A phandle to the regulator for USB VBUS diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index 3f913d6d1c3d..5ffc95c62909 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -203,7 +203,8 @@ examples: }; refclk-dig { - clocks = <&k3_clks 292 11>, <&k3_clks 292 0>, <&dummy_cmn_refclk>, <&dummy_cmn_refclk1>; + clocks = <&k3_clks 292 11>, <&k3_clks 292 0>, + <&dummy_cmn_refclk>, <&dummy_cmn_refclk1>; #clock-cells = <0>; assigned-clocks = <&wiz0_refclk_dig>; assigned-clock-parents = <&k3_clks 292 11>; diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml index 017d9593573b..54631dc1adb0 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml @@ -34,22 +34,22 @@ patternProperties: patternProperties: "^function|groups$": $ref: "/schemas/types.yaml#/definitions/string" - enum: [ACPI, ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, - ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, DDCCLK, DDCDAT, - EXTRST, FLACK, FLBUSY, FLWP, GPID, GPID0, GPID2, GPID4, GPID6, GPIE0, - GPIE2, GPIE4, GPIE6, I2C10, I2C11, I2C12, I2C13, I2C14, I2C3, I2C4, - I2C5, I2C6, I2C7, I2C8, I2C9, LPCPD, LPCPME, LPCRST, LPCSMI, MAC1LINK, - MAC2LINK, MDIO1, MDIO2, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, - NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, - NDTS4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, OSCCLK, PWM0, - PWM1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, RGMII1, RGMII2, RMII1, - RMII2, ROM16, ROM8, ROMCS1, ROMCS2, ROMCS3, ROMCS4, RXD1, RXD2, RXD3, - RXD4, SALT1, SALT2, SALT3, SALT4, SD1, SD2, SGPMCK, SGPMI, SGPMLD, - SGPMO, SGPSCK, SGPSI0, SGPSI1, SGPSLD, SIOONCTRL, SIOPBI, SIOPBO, - SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1DEBUG, SPI1PASSTHRU, - SPICS1, TIMER3, TIMER4, TIMER5, TIMER6, TIMER7, TIMER8, TXD1, TXD2, - TXD3, TXD4, UART6, USB11D1, USB11H2, USB2D1, USB2H1, USBCKI, VGABIOS_ROM, - VGAHS, VGAVS, VPI18, VPI24, VPI30, VPO12, VPO24, WDTRST1, WDTRST2] + enum: [ ACPI, ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, + ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, DDCCLK, DDCDAT, + EXTRST, FLACK, FLBUSY, FLWP, GPID, GPID0, GPID2, GPID4, GPID6, GPIE0, + GPIE2, GPIE4, GPIE6, I2C10, I2C11, I2C12, I2C13, I2C14, I2C3, I2C4, + I2C5, I2C6, I2C7, I2C8, I2C9, LPCPD, LPCPME, LPCRST, LPCSMI, MAC1LINK, + MAC2LINK, MDIO1, MDIO2, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, + NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, + NDTS4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, OSCCLK, PWM0, + PWM1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, RGMII1, RGMII2, RMII1, + RMII2, ROM16, ROM8, ROMCS1, ROMCS2, ROMCS3, ROMCS4, RXD1, RXD2, RXD3, + RXD4, SALT1, SALT2, SALT3, SALT4, SD1, SD2, SGPMCK, SGPMI, SGPMLD, + SGPMO, SGPSCK, SGPSI0, SGPSI1, SGPSLD, SIOONCTRL, SIOPBI, SIOPBO, + SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1DEBUG, SPI1PASSTHRU, + SPICS1, TIMER3, TIMER4, TIMER5, TIMER6, TIMER7, TIMER8, TXD1, TXD2, + TXD3, TXD4, UART6, USB11D1, USB11H2, USB2D1, USB2H1, USBCKI, VGABIOS_ROM, + VGAHS, VGAVS, VPI18, VPI24, VPI30, VPO12, VPO24, WDTRST1, WDTRST2] required: - compatible diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml index c643d6d44415..a90c0fe0495f 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml @@ -43,24 +43,24 @@ patternProperties: patternProperties: "^function|groups$": $ref: "/schemas/types.yaml#/definitions/string" - enum: [ACPI, ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, - ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, DDCCLK, DDCDAT, - ESPI, FWSPICS1, FWSPICS2, GPID0, GPID2, GPID4, GPID6, GPIE0, GPIE2, - GPIE4, GPIE6, I2C10, I2C11, I2C12, I2C13, I2C14, I2C3, I2C4, I2C5, - I2C6, I2C7, I2C8, I2C9, LAD0, LAD1, LAD2, LAD3, LCLK, LFRAME, LPCHC, - LPCPD, LPCPLUS, LPCPME, LPCRST, LPCSMI, LSIRQ, MAC1LINK, MAC2LINK, - MDIO1, MDIO2, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, - NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, - NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE, PNOR, PWM0, - PWM1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, RGMII1, RGMII2, RMII1, - RMII2, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12, SALT13, - SALT14, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9, SCL1, - SCL2, SD1, SD2, SDA1, SDA2, SGPS1, SGPS2, SIOONCTRL, SIOPBI, SIOPBO, - SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1CS1, SPI1DEBUG, - SPI1PASSTHRU, SPI2CK, SPI2CS0, SPI2CS1, SPI2MISO, SPI2MOSI, TIMER3, - TIMER4, TIMER5, TIMER6, TIMER7, TIMER8, TXD1, TXD2, TXD3, TXD4, UART6, - USB11BHID, USB2AD, USB2AH, USB2BD, USB2BH, USBCKI, VGABIOSROM, VGAHS, - VGAVS, VPI24, VPO, WDTRST1, WDTRST2] + enum: [ ACPI, ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, + ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, DDCCLK, DDCDAT, + ESPI, FWSPICS1, FWSPICS2, GPID0, GPID2, GPID4, GPID6, GPIE0, GPIE2, + GPIE4, GPIE6, I2C10, I2C11, I2C12, I2C13, I2C14, I2C3, I2C4, I2C5, + I2C6, I2C7, I2C8, I2C9, LAD0, LAD1, LAD2, LAD3, LCLK, LFRAME, LPCHC, + LPCPD, LPCPLUS, LPCPME, LPCRST, LPCSMI, LSIRQ, MAC1LINK, MAC2LINK, + MDIO1, MDIO2, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, + NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, + NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE, PNOR, PWM0, + PWM1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, RGMII1, RGMII2, RMII1, + RMII2, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12, SALT13, + SALT14, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9, SCL1, + SCL2, SD1, SD2, SDA1, SDA2, SGPS1, SGPS2, SIOONCTRL, SIOPBI, SIOPBO, + SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1CS1, SPI1DEBUG, + SPI1PASSTHRU, SPI2CK, SPI2CS0, SPI2CS1, SPI2MISO, SPI2MOSI, TIMER3, + TIMER4, TIMER5, TIMER6, TIMER7, TIMER8, TXD1, TXD2, TXD3, TXD4, UART6, + USB11BHID, USB2AD, USB2AH, USB2BD, USB2BH, USBCKI, VGABIOSROM, VGAHS, + VGAVS, VPI24, VPO, WDTRST1, WDTRST2] required: - compatible diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index 1506726c7fea..c78ab7e2eee7 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -31,57 +31,57 @@ patternProperties: properties: function: $ref: "/schemas/types.yaml#/definitions/string" - enum: [ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, - ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT, - FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, - GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, - GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, - I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, - I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, - MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, - NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, - NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, - NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE, PWM0, PWM1, PWM10, PWM11, - PWM12, PWM13, PWM14, PWM15, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8, - PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, RMII4, - RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12, SALT13, SALT14, - SALT15, SALT16, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, - SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, - SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2, - SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13, TACH14, - TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, - THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, - UART13, UART6, UART7, UART8, UART9, USBAD, USBADP, USB2AH, USB2AHP, - USB2BD, USB2BH, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4] + enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, + ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT, + FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, + GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, + GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, + I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, + I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, + MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, + NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, + NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, + NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE, PWM0, PWM1, PWM10, PWM11, + PWM12, PWM13, PWM14, PWM15, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8, + PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, RMII4, + RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12, SALT13, SALT14, + SALT15, SALT16, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, + SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, + SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2, + SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13, TACH14, + TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, + THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, + UART13, UART6, UART7, UART8, UART9, USBAD, USBADP, USB2AH, USB2AHP, + USB2BD, USB2BH, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4 ] groups: $ref: "/schemas/types.yaml#/definitions/string" - enum: [ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, - ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, - EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, - GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, - GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, - I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, - I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, - LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, - MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, - NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, - NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, - OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, - PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, - PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1, - QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, RMII4, - RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1, SALT11G0, SALT11G1, - SALT12G0, SALT12G1, SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, - SALT15G1, SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, - SALT7, SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL, - SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, - SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, - TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, - TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, - TXD4, UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6, - UART7, UART8, UART9, USBA, USBB, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, - WDTRST3, WDTRST4] + enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, + ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, + EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, + GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, + GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, + I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, + I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, + LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, + MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, + NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, + NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, + OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, + PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, + PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1, + QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3, RMII4, + RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1, SALT11G0, SALT11G1, + SALT12G0, SALT12G1, SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, + SALT15G1, SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, + SALT7, SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL, + SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, + SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, + TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, + TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, + TXD4, UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6, + UART7, UART8, UART9, USBA, USBB, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, + WDTRST3, WDTRST4] required: - compatible diff --git a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml index 18163fb69ce7..44c04d11ae4c 100644 --- a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml @@ -32,20 +32,20 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-pinctrl - - ingenic,jz4725b-pinctrl - - ingenic,jz4760-pinctrl - - ingenic,jz4770-pinctrl - - ingenic,jz4780-pinctrl - - ingenic,x1000-pinctrl - - ingenic,x1500-pinctrl - - ingenic,x1830-pinctrl + - ingenic,jz4740-pinctrl + - ingenic,jz4725b-pinctrl + - ingenic,jz4760-pinctrl + - ingenic,jz4770-pinctrl + - ingenic,jz4780-pinctrl + - ingenic,x1000-pinctrl + - ingenic,x1500-pinctrl + - ingenic,x1830-pinctrl - items: - - const: ingenic,jz4760b-pinctrl - - const: ingenic,jz4760-pinctrl + - const: ingenic,jz4760b-pinctrl + - const: ingenic,jz4760-pinctrl - items: - - const: ingenic,x1000e-pinctrl - - const: ingenic,x1000-pinctrl + - const: ingenic,x1000e-pinctrl + - const: ingenic,x1000-pinctrl reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml index b2de3992d484..c64c93206817 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml @@ -60,8 +60,8 @@ patternProperties: oneOf: - pattern: "^gpio([1-9]|[1-7][0-9]|80)$" - enum: [ sdc1_clk, sdc1_cmd, sdc1_data, sdc2_clk, sdc2_cmd, - sdc2_data, qdsd_cmd, qdsd_data0, qdsd_data1, qdsd_data2, - qdsd_data3 ] + sdc2_data, qdsd_cmd, qdsd_data0, qdsd_data1, qdsd_data2, + qdsd_data3 ] minItems: 1 maxItems: 4 @@ -70,31 +70,31 @@ patternProperties: Specify the alternative function to be configured for the specified pins. enum: [ adsp_ext, alsp_int, atest_bbrx0, atest_bbrx1, atest_char, - atest_char0, atest_char1, atest_char2, atest_char3, atest_combodac, - atest_gpsadc0, atest_gpsadc1, atest_tsens, atest_wlan0, - atest_wlan1, backlight_en, bimc_dte0, bimc_dte1, blsp1_i2c, - blsp2_i2c, blsp3_i2c, blsp4_i2c, blsp5_i2c, blsp6_i2c, blsp1_spi, - blsp1_spi_cs1, blsp1_spi_cs2, blsp1_spi_cs3, blsp2_spi, - blsp2_spi_cs1, blsp2_spi_cs2, blsp2_spi_cs3, blsp3_spi, - blsp3_spi_cs1, blsp3_spi_cs2, blsp3_spi_cs3, blsp4_spi, blsp5_spi, - blsp6_spi, blsp1_uart, blsp2_uart, blsp1_uim, blsp2_uim, cam1_rst, - cam1_standby, cam_mclk0, cam_mclk1, cci_async, cci_i2c, cci_timer0, - cci_timer1, cci_timer2, cdc_pdm0, codec_mad, dbg_out, display_5v, - dmic0_clk, dmic0_data, dsi_rst, ebi0_wrcdc, euro_us, ext_lpass, - flash_strobe, gcc_gp1_clk_a, gcc_gp1_clk_b, gcc_gp2_clk_a, - gcc_gp2_clk_b, gcc_gp3_clk_a, gcc_gp3_clk_b, gpio, gsm0_tx0, - gsm0_tx1, gsm1_tx0, gsm1_tx1, gyro_accl, kpsns0, kpsns1, kpsns2, - ldo_en, ldo_update, mag_int, mdp_vsync, modem_tsync, m_voc, - nav_pps, nav_tsync, pa_indicator, pbs0, pbs1, pbs2, pri_mi2s, - pri_mi2s_ws, prng_rosc, pwr_crypto_enabled_a, pwr_crypto_enabled_b, - pwr_modem_enabled_a, pwr_modem_enabled_b, pwr_nav_enabled_a, - pwr_nav_enabled_b, qdss_ctitrig_in_a0, qdss_ctitrig_in_a1, - qdss_ctitrig_in_b0, qdss_ctitrig_in_b1, qdss_ctitrig_out_a0, - qdss_ctitrig_out_a1, qdss_ctitrig_out_b0, qdss_ctitrig_out_b1, - qdss_traceclk_a, qdss_traceclk_b, qdss_tracectl_a, qdss_tracectl_b, - qdss_tracedata_a, qdss_tracedata_b, reset_n, sd_card, sd_write, - sec_mi2s, smb_int, ssbi_wtr0, ssbi_wtr1, uim1, uim2, uim3, - uim_batt, wcss_bt, wcss_fm, wcss_wlan, webcam1_rst ] + atest_char0, atest_char1, atest_char2, atest_char3, atest_combodac, + atest_gpsadc0, atest_gpsadc1, atest_tsens, atest_wlan0, + atest_wlan1, backlight_en, bimc_dte0, bimc_dte1, blsp1_i2c, + blsp2_i2c, blsp3_i2c, blsp4_i2c, blsp5_i2c, blsp6_i2c, blsp1_spi, + blsp1_spi_cs1, blsp1_spi_cs2, blsp1_spi_cs3, blsp2_spi, + blsp2_spi_cs1, blsp2_spi_cs2, blsp2_spi_cs3, blsp3_spi, + blsp3_spi_cs1, blsp3_spi_cs2, blsp3_spi_cs3, blsp4_spi, blsp5_spi, + blsp6_spi, blsp1_uart, blsp2_uart, blsp1_uim, blsp2_uim, cam1_rst, + cam1_standby, cam_mclk0, cam_mclk1, cci_async, cci_i2c, cci_timer0, + cci_timer1, cci_timer2, cdc_pdm0, codec_mad, dbg_out, display_5v, + dmic0_clk, dmic0_data, dsi_rst, ebi0_wrcdc, euro_us, ext_lpass, + flash_strobe, gcc_gp1_clk_a, gcc_gp1_clk_b, gcc_gp2_clk_a, + gcc_gp2_clk_b, gcc_gp3_clk_a, gcc_gp3_clk_b, gpio, gsm0_tx0, + gsm0_tx1, gsm1_tx0, gsm1_tx1, gyro_accl, kpsns0, kpsns1, kpsns2, + ldo_en, ldo_update, mag_int, mdp_vsync, modem_tsync, m_voc, + nav_pps, nav_tsync, pa_indicator, pbs0, pbs1, pbs2, pri_mi2s, + pri_mi2s_ws, prng_rosc, pwr_crypto_enabled_a, pwr_crypto_enabled_b, + pwr_modem_enabled_a, pwr_modem_enabled_b, pwr_nav_enabled_a, + pwr_nav_enabled_b, qdss_ctitrig_in_a0, qdss_ctitrig_in_a1, + qdss_ctitrig_in_b0, qdss_ctitrig_in_b1, qdss_ctitrig_out_a0, + qdss_ctitrig_out_a1, qdss_ctitrig_out_b0, qdss_ctitrig_out_b1, + qdss_traceclk_a, qdss_traceclk_b, qdss_tracectl_a, qdss_tracectl_b, + qdss_tracedata_a, qdss_tracedata_b, reset_n, sd_card, sd_write, + sec_mi2s, smb_int, ssbi_wtr0, ssbi_wtr1, uim1, uim2, uim3, + uim_batt, wcss_bt, wcss_fm, wcss_wlan, webcam1_rst ] drive-strength: enum: [2, 4, 6, 8, 10, 12, 14, 16] diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8250-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm8250-pinctrl.yaml index 6dc3b52f47cd..8508c57522fd 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm8250-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8250-pinctrl.yaml @@ -76,22 +76,22 @@ patternProperties: pins. enum: [ aoss_cti, atest, audio_ref, cam_mclk, cci_async, cci_i2c, - cci_timer0, cci_timer1, cci_timer2, cci_timer3, cci_timer4, cri_trng, - cri_trng0, cri_trng1, dbg_out, ddr_bist, ddr_pxi0, ddr_pxi1, - ddr_pxi2, ddr_pxi3, dp_hot, dp_lcd, gcc_gp1, gcc_gp2, gcc_gp3, gpio, - ibi_i3c, jitter_bist, lpass_slimbus, mdp_vsync, mdp_vsync0, - mdp_vsync1, mdp_vsync2, mdp_vsync3, mi2s0_data0, mi2s0_data1, - mi2s0_sck, mi2s0_ws, mi2s1_data0, mi2s1_data1, mi2s1_sck, mi2s1_ws, - mi2s2_data0, mi2s2_data1, mi2s2_sck, mi2s2_ws, pci_e0, pci_e1, - pci_e2, phase_flag, pll_bist, pll_bypassnl, pll_clk, pll_reset, - pri_mi2s, prng_rosc, qdss_cti, qdss_gpio, qspi0, qspi1, qspi2, qspi3, - qspi_clk, qspi_cs, qup0, qup1, qup10, qup11, qup12, qup13, qup14, - qup15, qup16, qup17, qup18, qup19, qup2, qup3, qup4, qup5, qup6, - qup7, qup8, qup9, qup_l4, qup_l5, qup_l6, sd_write, sdc40, sdc41, - sdc42, sdc43, sdc4_clk, sdc4_cmd, sec_mi2s, sp_cmu, tgu_ch0, tgu_ch1, - tgu_ch2, tgu_ch3, tsense_pwm1, tsense_pwm2, tsif0_clk, tsif0_data, - tsif0_en, tsif0_error, tsif0_sync, tsif1_clk, tsif1_data, tsif1_en, - tsif1_error, tsif1_sync, usb2phy_ac, usb_phy, vsense_trigger ] + cci_timer0, cci_timer1, cci_timer2, cci_timer3, cci_timer4, cri_trng, + cri_trng0, cri_trng1, dbg_out, ddr_bist, ddr_pxi0, ddr_pxi1, + ddr_pxi2, ddr_pxi3, dp_hot, dp_lcd, gcc_gp1, gcc_gp2, gcc_gp3, gpio, + ibi_i3c, jitter_bist, lpass_slimbus, mdp_vsync, mdp_vsync0, + mdp_vsync1, mdp_vsync2, mdp_vsync3, mi2s0_data0, mi2s0_data1, + mi2s0_sck, mi2s0_ws, mi2s1_data0, mi2s1_data1, mi2s1_sck, mi2s1_ws, + mi2s2_data0, mi2s2_data1, mi2s2_sck, mi2s2_ws, pci_e0, pci_e1, + pci_e2, phase_flag, pll_bist, pll_bypassnl, pll_clk, pll_reset, + pri_mi2s, prng_rosc, qdss_cti, qdss_gpio, qspi0, qspi1, qspi2, qspi3, + qspi_clk, qspi_cs, qup0, qup1, qup10, qup11, qup12, qup13, qup14, + qup15, qup16, qup17, qup18, qup19, qup2, qup3, qup4, qup5, qup6, + qup7, qup8, qup9, qup_l4, qup_l5, qup_l6, sd_write, sdc40, sdc41, + sdc42, sdc43, sdc4_clk, sdc4_cmd, sec_mi2s, sp_cmu, tgu_ch0, tgu_ch1, + tgu_ch2, tgu_ch3, tsense_pwm1, tsense_pwm2, tsif0_clk, tsif0_data, + tsif0_en, tsif0_error, tsif0_sync, tsif1_clk, tsif1_data, tsif1_en, + tsif1_error, tsif1_sync, usb2phy_ac, usb_phy, vsense_trigger ] drive-strength: enum: [2, 4, 6, 8, 10, 12, 14, 16] diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 0857cbeeb43c..72877544ca78 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -48,8 +48,8 @@ properties: st,package: description: - Indicates the SOC package used. - More details in include/dt-bindings/pinctrl/stm32-pinfunc.h + Indicates the SOC package used. + More details in include/dt-bindings/pinctrl/stm32-pinfunc.h $ref: /schemas/types.yaml#/definitions/uint32 enum: [1, 2, 4, 8] diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml index ff5936e4a215..dd564349aa53 100644 --- a/Documentation/devicetree/bindings/power/power-domain.yaml +++ b/Documentation/devicetree/bindings/power/power-domain.yaml @@ -58,13 +58,13 @@ properties: power-domains: description: - A phandle and PM domain specifier as defined by bindings of the power - controller specified by phandle. Some power domains might be powered - from another power domain (or have other hardware specific - dependencies). For representing such dependency a standard PM domain - consumer binding is used. When provided, all domains created - by the given provider should be subdomains of the domain specified - by this binding. + A phandle and PM domain specifier as defined by bindings of the power + controller specified by phandle. Some power domains might be powered + from another power domain (or have other hardware specific + dependencies). For representing such dependency a standard PM domain + consumer binding is used. When provided, all domains created + by the given provider should be subdomains of the domain specified + by this binding. required: - "#power-domain-cells" diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml index 30eabbb14ef3..6244b8ee9402 100644 --- a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml @@ -44,9 +44,9 @@ required: anyOf: - required: - - gpios + - gpios - required: - - charge-status-gpios + - charge-status-gpios additionalProperties: false diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml index d2022206081f..c0d7700afee7 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml @@ -76,8 +76,7 @@ patternProperties: "^((s|l|lvs|5vs)[0-9]*)|(boost-bypass)|(bob)$": description: List of regulators and its properties - allOf: - - $ref: regulator.yaml# + $ref: regulator.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml index 085cbd1ad8d0..fb111e2d5b99 100644 --- a/Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml @@ -29,7 +29,7 @@ properties: Short-circuit interrupt for lab. required: - - interrupts + - interrupts ibb: type: object @@ -42,7 +42,7 @@ properties: Short-circuit interrupt for lab. required: - - interrupts + - interrupts required: - compatible diff --git a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml index 24b0c50fa436..6070456a7b67 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml @@ -118,16 +118,16 @@ else: - const: l1dram required: - - compatible - - reg - - reg-names - - ti,sci - - ti,sci-dev-id - - ti,sci-proc-ids - - resets - - firmware-name - - mboxes - - memory-region + - compatible + - reg + - reg-names + - ti,sci + - ti,sci-dev-id + - ti,sci-proc-ids + - resets + - firmware-name + - mboxes + - memory-region unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.yaml b/Documentation/devicetree/bindings/reset/fsl,imx7-src.yaml index b1a71c1bb05b..569cd3bd3a70 100644 --- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.yaml +++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.yaml @@ -24,9 +24,9 @@ properties: compatible: items: - enum: - - fsl,imx7d-src - - fsl,imx8mq-src - - fsl,imx8mp-src + - fsl,imx7d-src + - fsl,imx8mq-src + - fsl,imx8mp-src - const: syscon reg: diff --git a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml index 4206bf8a2469..bc2c7e53a28e 100644 --- a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml @@ -16,16 +16,16 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-rtc - - ingenic,jz4760-rtc + - ingenic,jz4740-rtc + - ingenic,jz4760-rtc - items: - - const: ingenic,jz4725b-rtc - - const: ingenic,jz4740-rtc + - const: ingenic,jz4725b-rtc + - const: ingenic,jz4740-rtc - items: - - enum: - - ingenic,jz4770-rtc - - ingenic,jz4780-rtc - - const: ingenic,jz4760-rtc + - enum: + - ingenic,jz4770-rtc + - ingenic,jz4780-rtc + - const: ingenic,jz4760-rtc reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml index c023d650e9c1..dc8349322c83 100644 --- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml +++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml @@ -16,18 +16,18 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-uart - - ingenic,jz4760-uart - - ingenic,jz4780-uart - - ingenic,x1000-uart + - ingenic,jz4740-uart + - ingenic,jz4760-uart + - ingenic,jz4780-uart + - ingenic,x1000-uart - items: - - enum: - - ingenic,jz4770-uart - - ingenic,jz4775-uart - - const: ingenic,jz4760-uart + - enum: + - ingenic,jz4770-uart + - ingenic,jz4775-uart + - const: ingenic,jz4760-uart - items: - - const: ingenic,jz4725b-uart - - const: ingenic,jz4740-uart + - const: ingenic,jz4725b-uart + - const: ingenic,jz4740-uart reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml b/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml index 3cd0b70cd6cf..55fffae05dcf 100644 --- a/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml +++ b/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml @@ -97,13 +97,13 @@ allOf: clock-names: oneOf: - items: - - const: t0_clk - - const: slow_clk + - const: t0_clk + - const: slow_clk - items: - - const: t0_clk - - const: t1_clk - - const: t2_clk - - const: slow_clk + - const: t0_clk + - const: t1_clk + - const: t2_clk + - const: slow_clk required: - compatible diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml index a2b29cc3e93b..bd04fdb57414 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml @@ -7,8 +7,8 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: GENI Serial Engine QUP Wrapper Controller maintainers: - - Mukesh Savaliya - - Akash Asthana + - Mukesh Savaliya + - Akash Asthana description: | Generic Interface (GENI) based Qualcomm Universal Peripheral (QUP) wrapper @@ -38,10 +38,10 @@ properties: - description: Slave AHB Clock "#address-cells": - const: 2 + const: 2 "#size-cells": - const: 2 + const: 2 ranges: true @@ -79,15 +79,15 @@ patternProperties: maxItems: 1 interconnects: - minItems: 2 - maxItems: 3 + minItems: 2 + maxItems: 3 interconnect-names: - minItems: 2 - items: - - const: qup-core - - const: qup-config - - const: qup-memory + minItems: 2 + items: + - const: qup-core + - const: qup-config + - const: qup-memory required: - reg @@ -111,10 +111,10 @@ patternProperties: maxItems: 1 "#address-cells": - const: 1 + const: 1 "#size-cells": - const: 0 + const: 0 required: - compatible @@ -136,10 +136,10 @@ patternProperties: maxItems: 1 "#address-cells": - const: 1 + const: 1 "#size-cells": - const: 0 + const: 0 clock-frequency: description: Desired I2C bus clock frequency in Hz. diff --git a/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml index f9344adaf6c2..7a7f28469624 100644 --- a/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml +++ b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml @@ -19,12 +19,11 @@ properties: compatible: items: - enum: - - amlogic,aiu-gxbb - - amlogic,aiu-gxl - - amlogic,aiu-meson8 - - amlogic,aiu-meson8b - - const: - amlogic,aiu + - amlogic,aiu-gxbb + - amlogic,aiu-gxl + - amlogic,aiu-meson8 + - amlogic,aiu-meson8b + - const: amlogic,aiu clocks: items: diff --git a/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml b/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml index 51a0c30e10f9..b4b3828c40af 100644 --- a/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml +++ b/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml @@ -19,13 +19,11 @@ properties: compatible: oneOf: - items: - - const: - amlogic,g12a-toacodec + - const: amlogic,g12a-toacodec - items: - - enum: - - amlogic,sm1-toacodec - - const: - amlogic,g12a-toacodec + - enum: + - amlogic,sm1-toacodec + - const: amlogic,g12a-toacodec reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml index 83f44f07ac3f..5bcb643c288f 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml @@ -11,7 +11,7 @@ maintainers: properties: compatible: - const: cirrus,cs42l51 + const: cirrus,cs42l51 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml index 44f49bebb267..cdc0fdaab30a 100644 --- a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml +++ b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml @@ -16,13 +16,13 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4740-i2s - - ingenic,jz4760-i2s - - ingenic,jz4770-i2s - - ingenic,jz4780-i2s + - ingenic,jz4740-i2s + - ingenic,jz4760-i2s + - ingenic,jz4770-i2s + - ingenic,jz4780-i2s - items: - - const: ingenic,jz4725b-i2s - - const: ingenic,jz4740-i2s + - const: ingenic,jz4725b-i2s + - const: ingenic,jz4740-i2s '#sound-dai-cells': const: 0 diff --git a/Documentation/devicetree/bindings/sound/maxim,max98390.yaml b/Documentation/devicetree/bindings/sound/maxim,max98390.yaml index e5ac35280da3..9c2e3effa0c0 100644 --- a/Documentation/devicetree/bindings/sound/maxim,max98390.yaml +++ b/Documentation/devicetree/bindings/sound/maxim,max98390.yaml @@ -11,7 +11,7 @@ maintainers: properties: compatible: - const: maxim,max98390 + const: maxim,max98390 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/rockchip-i2s.yaml b/Documentation/devicetree/bindings/sound/rockchip-i2s.yaml index acb2b888dbfc..245895b58a2f 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-i2s.yaml @@ -19,16 +19,16 @@ properties: - const: rockchip,rk3066-i2s - items: - enum: - - rockchip,px30-i2s - - rockchip,rk3036-i2s - - rockchip,rk3188-i2s - - rockchip,rk3228-i2s - - rockchip,rk3288-i2s - - rockchip,rk3308-i2s - - rockchip,rk3328-i2s - - rockchip,rk3366-i2s - - rockchip,rk3368-i2s - - rockchip,rk3399-i2s + - rockchip,px30-i2s + - rockchip,rk3036-i2s + - rockchip,rk3188-i2s + - rockchip,rk3228-i2s + - rockchip,rk3288-i2s + - rockchip,rk3308-i2s + - rockchip,rk3328-i2s + - rockchip,rk3366-i2s + - rockchip,rk3368-i2s + - rockchip,rk3399-i2s - const: rockchip,rk3066-i2s reg: @@ -55,8 +55,8 @@ properties: oneOf: - const: rx - items: - - const: tx - - const: rx + - const: tx + - const: rx power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index c467152656f7..7bad6f16fe60 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml @@ -25,8 +25,8 @@ properties: - const: rockchip,rk3399-spdif - items: - enum: - - rockchip,rk3188-spdif - - rockchip,rk3288-spdif + - rockchip,rk3188-spdif + - rockchip,rk3288-spdif - const: rockchip,rk3066-spdif reg: diff --git a/Documentation/devicetree/bindings/sound/tas2770.yaml b/Documentation/devicetree/bindings/sound/tas2770.yaml index 8192450d72dc..33a90f829c80 100644 --- a/Documentation/devicetree/bindings/sound/tas2770.yaml +++ b/Documentation/devicetree/bindings/sound/tas2770.yaml @@ -44,8 +44,8 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 description: Sets TDM RX capture edge. enum: - - 0 # Rising edge - - 1 # Falling edge + - 0 # Rising edge + - 1 # Falling edge '#sound-dai-cells': const: 1 diff --git a/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml b/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml index e84d4a20c633..f578f17f3e04 100644 --- a/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml +++ b/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml @@ -32,32 +32,32 @@ properties: reg: maxItems: 1 description: | - I2C addresss of the device can be one of these 0x4c, 0x4d, 0x4e or 0x4f + I2C addresss of the device can be one of these 0x4c, 0x4d, 0x4e or 0x4f reset-gpios: description: | - GPIO used for hardware reset. + GPIO used for hardware reset. areg-supply: - description: | - Regulator with AVDD at 3.3V. If not defined then the internal regulator - is enabled. + description: | + Regulator with AVDD at 3.3V. If not defined then the internal regulator + is enabled. ti,mic-bias-source: description: | - Indicates the source for MIC Bias. - 0 - Mic bias is set to VREF - 1 - Mic bias is set to VREF × 1.096 - 6 - Mic bias is set to AVDD + Indicates the source for MIC Bias. + 0 - Mic bias is set to VREF + 1 - Mic bias is set to VREF × 1.096 + 6 - Mic bias is set to AVDD $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 6] ti,vref-source: description: | - Indicates the source for MIC Bias. - 0 - Set VREF to 2.75V - 1 - Set VREF to 2.5V - 2 - Set VREF to 1.375V + Indicates the source for MIC Bias. + 0 - Set VREF to 2.75V + 1 - Set VREF to 2.5V + 2 - Set VREF to 1.375V $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] @@ -109,7 +109,7 @@ properties: default: [0, 0, 0, 0] patternProperties: - '^ti,gpo-config-[1-4]$': + '^ti,gpo-config-[1-4]$': $ref: /schemas/types.yaml#/definitions/uint32-array description: | Defines the configuration and output driver for the general purpose diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index 243a6b1e66ea..7866a655d81c 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml @@ -22,10 +22,10 @@ properties: - const: allwinner,sun6i-a31-spi - const: allwinner,sun8i-h3-spi - items: - - enum: - - allwinner,sun8i-r40-spi - - allwinner,sun50i-h6-spi - - const: allwinner,sun8i-h3-spi + - enum: + - allwinner,sun8i-r40-spi + - allwinner,sun50i-h6-spi + - const: allwinner,sun8i-h3-spi reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml index 6e44c9c2aeba..1b50cedbfb3e 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml @@ -23,19 +23,19 @@ properties: - const: fsl,imx51-ecspi - const: fsl,imx53-ecspi - items: - - enum: - - fsl,imx50-ecspi - - fsl,imx6q-ecspi - - fsl,imx6sx-ecspi - - fsl,imx6sl-ecspi - - fsl,imx6sll-ecspi - - fsl,imx6ul-ecspi - - fsl,imx7d-ecspi - - fsl,imx8mq-ecspi - - fsl,imx8mm-ecspi - - fsl,imx8mn-ecspi - - fsl,imx8mp-ecspi - - const: fsl,imx51-ecspi + - enum: + - fsl,imx50-ecspi + - fsl,imx6q-ecspi + - fsl,imx6sx-ecspi + - fsl,imx6sl-ecspi + - fsl,imx6sll-ecspi + - fsl,imx6ul-ecspi + - fsl,imx7d-ecspi + - fsl,imx8mq-ecspi + - fsl,imx8mm-ecspi + - fsl,imx8mn-ecspi + - fsl,imx8mp-ecspi + - const: fsl,imx51-ecspi reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml b/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml index 4ddb42a4ae05..e0c55dd235d8 100644 --- a/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml +++ b/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml @@ -33,4 +33,4 @@ examples: reg = <0x1f000000 0x10>; }; -... \ No newline at end of file +... diff --git a/Documentation/devicetree/bindings/spi/spi-mux.yaml b/Documentation/devicetree/bindings/spi/spi-mux.yaml index 0ae692dc28b5..3d3fed63409b 100644 --- a/Documentation/devicetree/bindings/spi/spi-mux.yaml +++ b/Documentation/devicetree/bindings/spi/spi-mux.yaml @@ -43,47 +43,47 @@ properties: maxItems: 1 required: - - compatible - - reg - - spi-max-frequency - - mux-controls + - compatible + - reg + - spi-max-frequency + - mux-controls examples: - - | - #include - mux: mux-controller { - compatible = "gpio-mux"; - #mux-control-cells = <0>; + - | + #include + mux: mux-controller { + compatible = "gpio-mux"; + #mux-control-cells = <0>; - mux-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>; - }; + mux-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>; + }; - spi { - #address-cells = <1>; - #size-cells = <0>; - spi@0 { - compatible = "spi-mux"; - reg = <0>; - #address-cells = <1>; - #size-cells = <0>; - spi-max-frequency = <100000000>; + spi { + #address-cells = <1>; + #size-cells = <0>; + spi@0 { + compatible = "spi-mux"; + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <100000000>; - mux-controls = <&mux>; + mux-controls = <&mux>; - spi-flash@0 { - compatible = "jedec,spi-nor"; - reg = <0>; - #address-cells = <1>; - #size-cells = <0>; - spi-max-frequency = <40000000>; - }; + spi-flash@0 { + compatible = "jedec,spi-nor"; + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <40000000>; + }; - spi-device@1 { - compatible = "lineartechnology,ltc2488"; - reg = <1>; - #address-cells = <1>; - #size-cells = <0>; - spi-max-frequency = <10000000>; - }; - }; - }; + spi-device@1 { + compatible = "lineartechnology,ltc2488"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <10000000>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 81ad4b761502..74dc6185eced 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -26,13 +26,13 @@ properties: - const: rockchip,rv1108-spi - items: - enum: - - rockchip,px30-spi - - rockchip,rk3188-spi - - rockchip,rk3288-spi - - rockchip,rk3308-spi - - rockchip,rk3328-spi - - rockchip,rk3368-spi - - rockchip,rk3399-spi + - rockchip,px30-spi + - rockchip,rk3188-spi + - rockchip,rk3288-spi + - rockchip,rk3308-spi + - rockchip,rk3328-spi + - rockchip,rk3368-spi + - rockchip,rk3399-spi - const: rockchip,rk3066-spi reg: diff --git a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml index 5145883d932e..ad4beaf02842 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml @@ -44,9 +44,9 @@ select: true properties: "#cooling-cells": description: - Must be 2, in order to specify minimum and maximum cooling state used in - the cooling-maps reference. The first cell is the minimum cooling state - and the second cell is the maximum cooling state requested. + Must be 2, in order to specify minimum and maximum cooling state used in + the cooling-maps reference. The first cell is the minimum cooling state + and the second cell is the maximum cooling state requested. const: 2 examples: diff --git a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml index 7a922f540934..a832d427e9d5 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml @@ -18,29 +18,28 @@ description: | This binding describes the thermal idle node. properties: - $nodename: - const: thermal-idle - description: | - A thermal-idle node describes the idle cooling device properties to - cool down efficiently the attached thermal zone. - - '#cooling-cells': - const: 2 - description: | - Must be 2, in order to specify minimum and maximum cooling state used in - the cooling-maps reference. The first cell is the minimum cooling state - and the second cell is the maximum cooling state requested. - - duration-us: - description: | - The idle duration in microsecond the device should cool down. - - exit-latency-us: - description: | - The exit latency constraint in microsecond for the injected - idle state for the device. It is the latency constraint to - apply when selecting an idle state from among all the present - ones. + $nodename: + const: thermal-idle + description: | + A thermal-idle node describes the idle cooling device properties to + cool down efficiently the attached thermal zone. + + '#cooling-cells': + const: 2 + description: | + Must be 2, in order to specify minimum and maximum cooling state used in + the cooling-maps reference. The first cell is the minimum cooling state + and the second cell is the maximum cooling state requested. + + duration-us: + description: | + The idle duration in microsecond the device should cool down. + + exit-latency-us: + description: | + The exit latency constraint in microsecond for the injected idle state + for the device. It is the latency constraint to apply when selecting an + idle state from among all the present ones. required: - '#cooling-cells' diff --git a/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml b/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml index 883f7f46650b..a4f51f46b7a1 100644 --- a/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml +++ b/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml @@ -20,17 +20,17 @@ properties: - const: fsl,imx31-gpt - items: - enum: - - fsl,imx25-gpt - - fsl,imx50-gpt - - fsl,imx51-gpt - - fsl,imx53-gpt - - fsl,imx6q-gpt + - fsl,imx25-gpt + - fsl,imx50-gpt + - fsl,imx51-gpt + - fsl,imx53-gpt + - fsl,imx6q-gpt - const: fsl,imx31-gpt - const: fsl,imx6dl-gpt - items: - enum: - - fsl,imx6sl-gpt - - fsl,imx6sx-gpt + - fsl,imx6sl-gpt + - fsl,imx6sx-gpt - const: fsl,imx6dl-gpt reg: diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml index 371fb02a4351..024bcad75101 100644 --- a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml +++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml @@ -49,16 +49,16 @@ properties: compatible: oneOf: - items: - - enum: - - ingenic,jz4740-tcu - - ingenic,jz4725b-tcu - - ingenic,jz4770-tcu - - ingenic,x1000-tcu - - const: simple-mfd + - enum: + - ingenic,jz4740-tcu + - ingenic,jz4725b-tcu + - ingenic,jz4770-tcu + - ingenic,x1000-tcu + - const: simple-mfd - items: - - const: ingenic,jz4780-tcu - - const: ingenic,jz4770-tcu - - const: simple-mfd + - const: ingenic,jz4780-tcu + - const: ingenic,jz4770-tcu + - const: simple-mfd reg: maxItems: 1 @@ -113,13 +113,13 @@ patternProperties: compatible: oneOf: - enum: - - ingenic,jz4740-watchdog - - ingenic,jz4780-watchdog + - ingenic,jz4740-watchdog + - ingenic,jz4780-watchdog - items: - - enum: - - ingenic,jz4770-watchdog - - ingenic,jz4725b-watchdog - - const: ingenic,jz4740-watchdog + - enum: + - ingenic,jz4770-watchdog + - ingenic,jz4725b-watchdog + - const: ingenic,jz4740-watchdog reg: maxItems: 1 @@ -143,13 +143,13 @@ patternProperties: compatible: oneOf: - enum: - - ingenic,jz4740-pwm - - ingenic,jz4725b-pwm + - ingenic,jz4740-pwm + - ingenic,jz4725b-pwm - items: - - enum: - - ingenic,jz4770-pwm - - ingenic,jz4780-pwm - - const: ingenic,jz4740-pwm + - enum: + - ingenic,jz4770-pwm + - ingenic,jz4780-pwm + - const: ingenic,jz4740-pwm reg: maxItems: 1 @@ -182,11 +182,11 @@ patternProperties: compatible: oneOf: - enum: - - ingenic,jz4725b-ost - - ingenic,jz4770-ost + - ingenic,jz4725b-ost + - ingenic,jz4770-ost - items: - - const: ingenic,jz4780-ost - - const: ingenic,jz4770-ost + - const: ingenic,jz4780-ost + - const: ingenic,jz4770-ost reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml index 5d300efdf0ca..7b39e3204fb3 100644 --- a/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml +++ b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml @@ -27,8 +27,8 @@ properties: clocks: minItems: 1 items: - - description: Timer ticks reference clock source - - description: APB interface clock source + - description: Timer ticks reference clock source + - description: APB interface clock source clock-names: minItems: 1 diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index b7e94fe8643f..4ace8039840a 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -298,7 +298,7 @@ properties: - national,lm80 # Temperature sensor with integrated fan control - national,lm85 - # ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface + # I2C ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator - national,lm92 # i2c trusted platform module (TPM) - nuvoton,npct501 diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml index 4ff632d82858..ffa157a0fce7 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.yaml +++ b/Documentation/devicetree/bindings/usb/dwc2.yaml @@ -19,24 +19,24 @@ properties: - const: snps,dwc2 - items: - enum: - - rockchip,px30-usb - - rockchip,rk3036-usb - - rockchip,rk3188-usb - - rockchip,rk3228-usb - - rockchip,rk3288-usb - - rockchip,rk3328-usb - - rockchip,rk3368-usb - - rockchip,rv1108-usb + - rockchip,px30-usb + - rockchip,rk3036-usb + - rockchip,rk3188-usb + - rockchip,rk3228-usb + - rockchip,rk3288-usb + - rockchip,rk3328-usb + - rockchip,rk3368-usb + - rockchip,rv1108-usb - const: rockchip,rk3066-usb - const: snps,dwc2 - const: lantiq,arx100-usb - const: lantiq,xrx200-usb - items: - enum: - - amlogic,meson8-usb - - amlogic,meson8b-usb - - amlogic,meson-gxbb-usb - - amlogic,meson-g12a-usb + - amlogic,meson8-usb + - amlogic,meson8b-usb + - amlogic,meson-gxbb-usb + - amlogic,meson-g12a-usb - const: snps,dwc2 - const: amcc,dwc-otg - const: snps,dwc2 @@ -116,12 +116,13 @@ properties: snps,need-phy-for-wake: $ref: /schemas/types.yaml#/definitions/flag - description: If present indicates that the phy needs to be left on for remote wakeup during suspend. + description: If present indicates that the phy needs to be left on for + remote wakeup during suspend. snps,reset-phy-on-wake: $ref: /schemas/types.yaml#/definitions/flag - description: If present indicates that we need to reset the PHY when we detect a wakeup. - This is due to a hardware errata. + description: If present indicates that we need to reset the PHY when we + detect a wakeup. This is due to a hardware errata. required: - compatible diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml index 69f3f26d1207..247ef00381ea 100644 --- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml @@ -80,7 +80,7 @@ properties: companion: $ref: /schemas/types.yaml#/definitions/phandle description: - Phandle of a companion. + Phandle of a companion. phys: description: PHY specifier for the USB PHY diff --git a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml index c334aea6b59d..678396eeeb78 100644 --- a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml +++ b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml @@ -16,11 +16,11 @@ properties: compatible: oneOf: - enum: - - ingenic,jz4770-musb - - ingenic,jz4740-musb + - ingenic,jz4770-musb + - ingenic,jz4740-musb - items: - - const: ingenic,jz4725b-musb - - const: ingenic,jz4740-musb + - const: ingenic,jz4725b-musb + - const: ingenic,jz4740-musb reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml index 0073763a30d8..196589c93373 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml @@ -57,11 +57,11 @@ properties: minItems: 4 maxItems: 5 items: - - const: dev - - const: ss - - const: ss_src - - const: fs_src - - const: hs_src + - const: dev + - const: ss + - const: ss_src + - const: fs_src + - const: hs_src power-domains: items: diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index 90750255792f..484fc1091d7c 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -19,9 +19,9 @@ properties: power-domains: description: - PM domain provider node and an args specifier containing - the USB device id value. See, - Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt + PM domain provider node and an args specifier containing + the USB device id value. See, + Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt clocks: description: Clock phandles to usb2_refclk and lpm_clk diff --git a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml index 804b9b4f6654..c1b19fc5d0a2 100644 --- a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml @@ -13,8 +13,8 @@ properties: compatible: items: - enum: - - ti,keystone-dwc3 - - ti,am654-dwc3 + - ti,keystone-dwc3 + - ti,am654-dwc3 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index f3d847832fdc..2baee2c817c1 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -993,7 +993,8 @@ patternProperties: "^sst,.*": description: Silicon Storage Technology, Inc. "^sstar,.*": - description: Xiamen Xingchen(SigmaStar) Technology Co., Ltd. (formerly part of MStar Semiconductor, Inc.) + description: Xiamen Xingchen(SigmaStar) Technology Co., Ltd. + (formerly part of MStar Semiconductor, Inc.) "^st,.*": description: STMicroelectronics "^starry,.*": -- cgit From 5f0b06da5cde3f0a613308b89f0afea678559fdf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Aug 2020 09:21:41 -0600 Subject: dt-bindings: Remove more cases of 'allOf' containing a '$ref' Another wack-a-mole pass of killing off unnecessary 'allOf + $ref' usage. json-schema versions draft7 and earlier have a weird behavior in that any keywords combined with a '$ref' are ignored (silently). The correct form was to put a '$ref' under an 'allOf'. This behavior is now changed in the 2019-09 json-schema spec and '$ref' can be mixed with other keywords. The json-schema library doesn't yet support this, but the tooling now does a fixup for this and either way works. This has been a constant source of review comments, so let's change this treewide so everyone copies the simpler syntax. Signed-off-by: Rob Herring --- .../bindings/display/brcm,bcm2835-hdmi.yaml | 3 +- .../bindings/iio/adc/qcom,spmi-vadc.yaml | 3 +- .../bindings/interrupt-controller/mti,gic.yaml | 28 +++++++------- .../bindings/media/i2c/dongwoon,dw9768.yaml | 43 ++++++++++------------ .../devicetree/bindings/pci/ti,j721e-pci-ep.yaml | 3 +- .../devicetree/bindings/pci/ti,j721e-pci-host.yaml | 3 +- .../devicetree/bindings/sound/maxim,max98390.yaml | 6 +-- .../bindings/sound/nvidia,tegra186-dspk.yaml | 3 +- .../bindings/sound/nvidia,tegra210-dmic.yaml | 3 +- .../bindings/sound/nvidia,tegra210-i2s.yaml | 3 +- .../bindings/sound/ti,j721e-cpb-audio.yaml | 6 +-- .../bindings/sound/ti,j721e-cpb-ivi-audio.yaml | 15 +++----- 12 files changed, 49 insertions(+), 70 deletions(-) diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml index 52b3cdac0bdf..f54b4e4808f0 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml @@ -32,8 +32,7 @@ properties: - const: hdmi ddc: - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: > Phandle of the I2C controller used for DDC EDID probing diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml index a6ac289d98da..0ca992465a21 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml @@ -97,8 +97,7 @@ patternProperties: input signal is multiplied. For example, <1 3> indicates the signal is scaled down to 1/3 of its value before ADC measurement. If property is not found default value depending on chip will be used. - allOf: - - $ref: /schemas/types.yaml#/definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - items: - const: 1 diff --git a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml index 9f0eb3addac4..ce6aaff15214 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml @@ -42,14 +42,13 @@ properties: Specifies the list of CPU interrupt vectors to which the GIC may not route interrupts. This property is ignored if the CPU is started in EIC mode. - allOf: - - $ref: /schemas/types.yaml#definitions/uint32-array - - minItems: 1 - maxItems: 6 - uniqueItems: true - items: - minimum: 2 - maximum: 7 + $ref: /schemas/types.yaml#definitions/uint32-array + minItems: 1 + maxItems: 6 + uniqueItems: true + items: + minimum: 2 + maximum: 7 mti,reserved-ipi-vectors: description: | @@ -57,13 +56,12 @@ properties: It accepts two values: the 1st is the starting interrupt and the 2nd is the size of the reserved range. If not specified, the driver will allocate the last (2 * number of VPEs in the system). - allOf: - - $ref: /schemas/types.yaml#definitions/uint32-array - - items: - - minimum: 0 - maximum: 254 - - minimum: 2 - maximum: 254 + $ref: /schemas/types.yaml#definitions/uint32-array + items: + - minimum: 0 + maximum: 254 + - minimum: 2 + maximum: 254 timer: type: object diff --git a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml index cb96e95d7e81..21864ab86ec4 100644 --- a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml +++ b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml @@ -38,39 +38,36 @@ properties: dongwoon,aac-mode: description: Indication of AAC mode select. - allOf: - - $ref: "/schemas/types.yaml#/definitions/uint32" - - enum: - - 1 # AAC2 mode(operation time# 0.48 x Tvib) - - 2 # AAC3 mode(operation time# 0.70 x Tvib) - - 3 # AAC4 mode(operation time# 0.75 x Tvib) - - 5 # AAC8 mode(operation time# 1.13 x Tvib) - default: 2 + $ref: "/schemas/types.yaml#/definitions/uint32" + enum: + - 1 # AAC2 mode(operation time# 0.48 x Tvib) + - 2 # AAC3 mode(operation time# 0.70 x Tvib) + - 3 # AAC4 mode(operation time# 0.75 x Tvib) + - 5 # AAC8 mode(operation time# 1.13 x Tvib) + default: 2 dongwoon,aac-timing: description: Number of AAC Timing count that controlled by one 6-bit period of vibration register AACT[5:0], the unit of which is 100 us. - allOf: - - $ref: "/schemas/types.yaml#/definitions/uint32" - - default: 0x20 - minimum: 0x00 - maximum: 0x3f + $ref: "/schemas/types.yaml#/definitions/uint32" + default: 0x20 + minimum: 0x00 + maximum: 0x3f dongwoon,clock-presc: description: Indication of VCM internal clock dividing rate select, as one multiple factor to calculate VCM ring periodic time Tvib. - allOf: - - $ref: "/schemas/types.yaml#/definitions/uint32" - - enum: - - 0 # Dividing Rate - 2 - - 1 # Dividing Rate - 1 - - 2 # Dividing Rate - 1/2 - - 3 # Dividing Rate - 1/4 - - 4 # Dividing Rate - 8 - - 5 # Dividing Rate - 4 - default: 1 + $ref: "/schemas/types.yaml#/definitions/uint32" + enum: + - 0 # Dividing Rate - 2 + - 1 # Dividing Rate - 1 + - 2 # Dividing Rate - 1/2 + - 3 # Dividing Rate - 1/4 + - 4 # Dividing Rate - 8 + - 5 # Dividing Rate - 4 + default: 1 required: - compatible diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml index cfe25cface21..b3c3d0c3c390 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml @@ -31,8 +31,7 @@ properties: ti,syscon-pcie-ctrl: description: Phandle to the SYSCON entry required for configuring PCIe mode and link speed. - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml index d7b60487c6c3..8200ba00bc09 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml @@ -31,8 +31,7 @@ properties: ti,syscon-pcie-ctrl: description: Phandle to the SYSCON entry required for configuring PCIe mode and link speed. - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/maxim,max98390.yaml b/Documentation/devicetree/bindings/sound/maxim,max98390.yaml index 9c2e3effa0c0..fea9a1b6619a 100644 --- a/Documentation/devicetree/bindings/sound/maxim,max98390.yaml +++ b/Documentation/devicetree/bindings/sound/maxim,max98390.yaml @@ -18,16 +18,14 @@ properties: description: I2C address of the device. maxim,temperature_calib: - allOf: - - $ref: /schemas/types.yaml#/definitions/uint32 description: The calculated temperature data was measured while doing the calibration. + $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 65535 maxim,r0_calib: - allOf: - - $ref: /schemas/types.yaml#/definitions/uint32 description: This is r0 calibration data which was measured in factory mode. + $ref: /schemas/types.yaml#/definitions/uint32 minimum: 1 maximum: 8388607 diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml index e620c77d0728..2f2fcffa65cb 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml @@ -48,8 +48,7 @@ properties: sound-name-prefix: pattern: "^DSPK[1-9]$" - allOf: - - $ref: /schemas/types.yaml#/definitions/string + $ref: /schemas/types.yaml#/definitions/string description: Used as prefix for sink/source names of the component. Must be a unique string among multiple instances of the same component. diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml index 1c14e83f67c7..8689d9f18c11 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml @@ -49,8 +49,7 @@ properties: sound-name-prefix: pattern: "^DMIC[1-9]$" - allOf: - - $ref: /schemas/types.yaml#/definitions/string + $ref: /schemas/types.yaml#/definitions/string description: used as prefix for sink/source names of the component. Must be a unique string among multiple instances of the same component. diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml index 795797001843..9bbf18153d63 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml @@ -67,8 +67,7 @@ properties: sound-name-prefix: pattern: "^I2S[1-9]$" - allOf: - - $ref: /schemas/types.yaml#/definitions/string + $ref: /schemas/types.yaml#/definitions/string description: Used as prefix for sink/source names of the component. Must be a unique string among multiple instances of the same component. diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml index 6f2be6503401..d52cfbeb2d07 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml @@ -37,13 +37,11 @@ properties: ti,cpb-mcasp: description: phandle to McASP used on CPB - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle ti,cpb-codec: description: phandle to the pcm3168a codec used on the CPB - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle clocks: items: diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml index e0b88470a502..bb780f621628 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml @@ -50,28 +50,23 @@ properties: ti,cpb-mcasp: description: phandle to McASP used on CPB - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle ti,cpb-codec: description: phandle to the pcm3168a codec used on the CPB - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle ti,ivi-mcasp: description: phandle to McASP used on IVI - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle ti,ivi-codec-a: description: phandle to the pcm3168a-A codec on the expansion board - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle ti,ivi-codec-b: description: phandle to the pcm3168a-B codec on the expansion board - allOf: - - $ref: /schemas/types.yaml#/definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle clocks: items: -- cgit From 8ab49526b53d3172d1d8dd03a75c7d1f5bd21239 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Aug 2020 11:16:17 -0700 Subject: x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task syzbot found its way in 86_fsgsbase_read_task() and triggered this oops: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 PID: 6866 Comm: syz-executor262 Not tainted 5.8.0-syzkaller #0 RIP: 0010:x86_fsgsbase_read_task+0x16d/0x310 arch/x86/kernel/process_64.c:393 Call Trace: putreg32+0x3ab/0x530 arch/x86/kernel/ptrace.c:876 genregs32_set arch/x86/kernel/ptrace.c:1026 [inline] genregs32_set+0xa4/0x100 arch/x86/kernel/ptrace.c:1006 copy_regset_from_user include/linux/regset.h:326 [inline] ia32_arch_ptrace arch/x86/kernel/ptrace.c:1061 [inline] compat_arch_ptrace+0x36c/0xd90 arch/x86/kernel/ptrace.c:1198 __do_compat_sys_ptrace kernel/ptrace.c:1420 [inline] __se_compat_sys_ptrace kernel/ptrace.c:1389 [inline] __ia32_compat_sys_ptrace+0x220/0x2f0 kernel/ptrace.c:1389 do_syscall_32_irqs_on arch/x86/entry/common.c:84 [inline] __do_fast_syscall_32+0x57/0x80 arch/x86/entry/common.c:126 do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:149 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c This can happen if ptrace() or sigreturn() pokes an LDT selector into FS or GS for a task with no LDT and something tries to read the base before a return to usermode notices the bad selector and fixes it. The fix is to make sure ldt pointer is not NULL. Fixes: 07e1d88adaae ("x86/fsgsbase/64: Fix ptrace() to read the FS/GS base accurately") Co-developed-by: Jann Horn Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Andy Lutomirski Cc: Chang S. Bae Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Markus T Metzger Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Rik van Riel Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6f946707270..9afefe325acb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -390,7 +390,7 @@ unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); -- cgit From a85ffd59bd362d6c2e456e7f523b091830cd5454 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 12 Aug 2020 20:17:00 -0700 Subject: dma-debug: fix debug_dma_assert_idle(), use rcu_read_lock() Since commit 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") improved unlock_page(), it has become more noticeable how cow_user_page() in a kernel with CONFIG_DMA_API_DEBUG=y can create and suffer from heavy contention on DMA debug's radix_lock in debug_dma_assert_idle(). It is only doing a lookup: use rcu_read_lock() and rcu_read_unlock() instead; though that does require the static ents[] to be moved onstack... ...but, hold on, isn't that radix_tree_gang_lookup() and loop doing quite the wrong thing: searching CACHELINES_PER_PAGE entries for an exact match with the first cacheline of the page in question? radix_tree_gang_lookup() is the right tool for the job, but we need nothing more than to check the first entry it can find, reporting if that falls anywhere within the page. (Is RCU safe here? As safe as using the spinlock was. The entries are never freed, so don't need to be freed by RCU. They may be reused, and there is a faint chance of a race, with an offending entry reused while printing its error info; but the spinlock did not prevent that either, and I agree that it's not worth worrying about. ] [ Side noe: this patch is a clear improvement to the status quo, but the next patch will be removing this debug function entirely. But just in case we decide we want to resurrect the debugging code some day, I'm first applying this improvement patch so that it doesn't get lost - Linus ] Fixes: 3b7a6418c749 ("dma debug: account for cachelines and read-only mappings in overlap tracking") Signed-off-by: Hugh Dickins Acked-by: Dan Williams Acked-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- kernel/dma/debug.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index f7f807fb6ebb..6f4f4b9d2d03 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -565,11 +565,8 @@ static void active_cacheline_remove(struct dma_debug_entry *entry) */ void debug_dma_assert_idle(struct page *page) { - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; + struct dma_debug_entry *entry; + unsigned long pfn; phys_addr_t cln; if (dma_debug_disabled()) @@ -578,20 +575,14 @@ void debug_dma_assert_idle(struct page *page) if (!page) return; - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); + pfn = page_to_pfn(page); + cln = (phys_addr_t) pfn << CACHELINE_PER_PAGE_SHIFT; - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; - } - spin_unlock_irqrestore(&radix_lock, flags); + rcu_read_lock(); + if (!radix_tree_gang_lookup(&dma_active_cacheline, (void **) &entry, + cln, 1) || entry->pfn != pfn) + entry = NULL; + rcu_read_unlock(); if (!entry) return; -- cgit From 5848dc5b1b76d83599dcec1b39f188a7cbdca7e2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Aug 2020 15:22:43 -0700 Subject: dma-debug: remove debug_dma_assert_idle() function This remoes the code from the COW path to call debug_dma_assert_idle(), which was added many years ago. Google shows that it hasn't caught anything in the 6+ years we've had it apart from a false positive, and Hugh just noticed how it had a very unfortunate spinlock serialization in the COW path. He fixed that issue the previous commit (a85ffd59bd36: "dma-debug: fix debug_dma_assert_idle(), use rcu_read_lock()"), but let's see if anybody even notices when we remove this function entirely. NOTE! We keep the dma tracking infrastructure that was added by the commit that introduced it. Partly to make it easier to resurrect this debug code if we ever deside to, and partly because that tracking by pfn and offset looks quite reasonable. The problem with this debug code was simply that it was expensive and didn't seem worth it, not that it was wrong per se. Acked-by: Dan Williams Acked-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/dma-debug.h | 6 ------ kernel/dma/Kconfig | 5 ----- kernel/dma/debug.c | 46 +--------------------------------------------- mm/memory.c | 2 -- 4 files changed, 1 insertion(+), 58 deletions(-) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 4208f94d93f7..7b3b04ba60f3 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -67,8 +67,6 @@ extern void debug_dma_sync_sg_for_device(struct device *dev, extern void debug_dma_dump_mappings(struct device *dev); -extern void debug_dma_assert_idle(struct page *page); - #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_add_bus(struct bus_type *bus) @@ -157,10 +155,6 @@ static inline void debug_dma_dump_mappings(struct device *dev) { } -static inline void debug_dma_assert_idle(struct page *page) -{ -} - #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f4770fcfa62b..5732b2b3ef17 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -186,11 +186,6 @@ config DMA_API_DEBUG drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This also attempts to catch cases where a page owned by DMA is - accessed by the cpu in a way that could cause data corruption. For - example, this enables cow_user_page() to check that the source page is - not undergoing DMA. - This option causes a performance degradation. Use only if you want to debug device drivers and dma interactions. diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 6f4f4b9d2d03..8e9f7b301c6d 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -448,9 +448,6 @@ void debug_dma_dump_mappings(struct device *dev) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. */ static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); static DEFINE_SPINLOCK(radix_lock); @@ -497,10 +494,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) overlap = active_cacheline_set_overlap(cln, ++overlap); /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. + * leaking dma-mappings. */ WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), @@ -555,44 +549,6 @@ static void active_cacheline_remove(struct dma_debug_entry *entry) spin_unlock_irqrestore(&radix_lock, flags); } -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. - */ -void debug_dma_assert_idle(struct page *page) -{ - struct dma_debug_entry *entry; - unsigned long pfn; - phys_addr_t cln; - - if (dma_debug_disabled()) - return; - - if (!page) - return; - - pfn = page_to_pfn(page); - cln = (phys_addr_t) pfn << CACHELINE_PER_PAGE_SHIFT; - - rcu_read_lock(); - if (!radix_tree_gang_lookup(&dma_active_cacheline, (void **) &entry, - cln, 1) || entry->pfn != pfn) - entry = NULL; - rcu_read_unlock(); - - if (!entry) - return; - - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); -} - /* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. diff --git a/mm/memory.c b/mm/memory.c index 228efaca75d3..d3c3bbd65a7e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2411,8 +2411,6 @@ static inline bool cow_user_page(struct page *dst, struct page *src, struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; - debug_dma_assert_idle(src); - if (likely(src)) { copy_user_highpage(dst, src, addr, vma); return true; -- cgit From 76d4467a97bd8c4bb01e810a5129c8f96d7dcbf8 Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Thu, 13 Aug 2020 11:38:04 +0800 Subject: riscv: Setup exception vector for nommu platform Exception vector is missing on nommu platform and that is an issue. This patch is tested in Sipeed Maix Bit Dev Board. Fixes: 79b1feba5455 ("RISC-V: Setup exception vector early") Suggested-by: Anup Patel Suggested-by: Atish Patra Signed-off-by: Qiu Wenbo Reviewed-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index d0c5c316e9bb..0a4e81b8dc79 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -77,16 +77,10 @@ relocate: csrw CSR_SATP, a0 .align 2 1: - /* Set trap vector to exception handler */ - la a0, handle_exception + /* Set trap vector to spin forever to help debug */ + la a0, .Lsecondary_park csrw CSR_TVEC, a0 - /* - * Set sup0 scratch register to 0, indicating to exception vector that - * we are presently executing in kernel. - */ - csrw CSR_SCRATCH, zero - /* Reload the global pointer */ .option push .option norelax @@ -144,9 +138,23 @@ secondary_start_common: la a0, swapper_pg_dir call relocate #endif + call setup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ +.align 2 +setup_trap_vector: + /* Set trap vector to exception handler */ + la a0, handle_exception + csrw CSR_TVEC, a0 + + /* + * Set sup0 scratch register to 0, indicating to exception vector that + * we are presently executing in kernel. + */ + csrw CSR_SCRATCH, zero + ret + .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi @@ -240,6 +248,7 @@ clear_bss_done: call relocate #endif /* CONFIG_MMU */ + call setup_trap_vector /* Restore C environment */ la tp, init_task sw zero, TASK_TI_CPU(tp) -- cgit From be74273aee2c94e8322404d0b6e84e4f30d911ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Jun 2020 06:04:08 -0700 Subject: sh: Fix unneeded constructor in page table allocation The pgd kmem_cache allocation both specified __GFP_ZERO and had a constructor which makes no sense. Remove __GFP_ZERO and zero the user parts of the pgd explicitly. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Rich Felker --- arch/sh/mm/pgtable.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 5c8f9247c3c2..cf7ce4b57359 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -2,8 +2,6 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO - static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 static struct kmem_cache *pmd_cachep; @@ -13,6 +11,7 @@ void pgd_ctor(void *x) { pgd_t *pgd = x; + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); @@ -32,7 +31,7 @@ void pgtable_cache_init(void) pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cachep, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -48,7 +47,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP); + return kmem_cache_alloc(pmd_cachep, GFP_KERNEL | __GFP_ZERO); } void pmd_free(struct mm_struct *mm, pmd_t *pmd) -- cgit From 2202d81b098ef776153c6b40c5404d486042a3ed Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Thu, 2 Jan 2020 09:54:30 +0800 Subject: sh: remove call to memset after dma_alloc_coherent Function dma_alloc_coherent use in buf already zeroes out memory, so memset is not needed. Signed-off-by: Chen Zhou Signed-off-by: Rich Felker --- arch/sh/mm/consistent.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 3169a343a5ab..0de206c1acfe 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, return -ENOMEM; } - memset(buf, 0, memsize); - r->flags = IORESOURCE_MEM; r->start = dma_handle; r->end = r->start + memsize - 1; -- cgit From 2d2b308a8b7d0aeeadeabd8d7f329bb8200f2e2b Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Thu, 11 Jun 2020 09:58:11 +0200 Subject: sh: Implement __get_user_u64() required for 64-bit get_user() Trying to build the kernel with CONFIG_INFINIBAND_USER_ACCESS enabled fails ERROR: "__get_user_unknown" [drivers/infiniband/core/ib_uverbs.ko] undefined! with on SH since the kernel misses a 64-bit implementation of get_user(). Implement the missing 64-bit get_user() as __get_user_u64(), matching the already existing __put_user_u64() which implements the 64-bit put_user(). Signed-off-by: John Paul Adrian Glaubitz Acked-by: Yoshinori Sato Signed-off-by: Rich Felker --- arch/sh/include/asm/uaccess_32.h | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h index 624cf55acc27..5d7ddc092afd 100644 --- a/arch/sh/include/asm/uaccess_32.h +++ b/arch/sh/include/asm/uaccess_32.h @@ -26,6 +26,9 @@ do { \ case 4: \ __get_user_asm(x, ptr, retval, "l"); \ break; \ + case 8: \ + __get_user_u64(x, ptr, retval); \ + break; \ default: \ __get_user_unknown(); \ break; \ @@ -66,6 +69,56 @@ do { \ extern void __get_user_unknown(void); +#if defined(CONFIG_CPU_LITTLE_ENDIAN) +#define __get_user_u64(x, addr, err) \ +({ \ +__asm__ __volatile__( \ + "1:\n\t" \ + "mov.l %2,%R1\n\t" \ + "mov.l %T2,%S1\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\n\t" \ + "mov #0,%R1\n\t" \ + "mov #0,%S1\n\t" \ + "mov.l 4f, %0\n\t" \ + "jmp @%0\n\t" \ + " mov %3, %0\n\t" \ + ".balign 4\n" \ + "4: .long 2b\n\t" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".long 1b, 3b\n\t" \ + ".long 1b + 2, 3b\n\t" \ + ".previous" \ + :"=&r" (err), "=&r" (x) \ + :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); }) +#else +#define __get_user_u64(x, addr, err) \ +({ \ +__asm__ __volatile__( \ + "1:\n\t" \ + "mov.l %2,%S1\n\t" \ + "mov.l %T2,%R1\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\n\t" \ + "mov #0,%S1\n\t" \ + "mov #0,%R1\n\t" \ + "mov.l 4f, %0\n\t" \ + "jmp @%0\n\t" \ + " mov %3, %0\n\t" \ + ".balign 4\n" \ + "4: .long 2b\n\t" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".long 1b, 3b\n\t" \ + ".long 1b + 2, 3b\n\t" \ + ".previous" \ + :"=&r" (err), "=&r" (x) \ + :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); }) +#endif + #define __put_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ -- cgit From 7619f957dc8cb8b2db3be21254d758c16e025961 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Jun 2020 10:06:36 +0200 Subject: Revert "sh: add missing EXPORT_SYMBOL() for __delay" This reverts commit d1f56f318d234fc5db230af2f3e0088f689ab3c0. __delay() is an internal implementation detail on several architectures. Drivers should not call __delay() directly, as it has non-standardized semantics, or may not even exist. Hence there is no need to export __delay() to modules. See also include/asm-generic/delay.h: /* Undefined functions to get compile-time errors */ ... extern void __delay(unsigned long loops); Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/lib/delay.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index 540e670dbafc..dad8e6a54906 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -29,7 +29,6 @@ void __delay(unsigned long loops) : "0" (loops) : "t"); } -EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { -- cgit From 7dfaa9ea56e9e68b889d6676aa51a246f32c1857 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Mon, 15 Jun 2020 14:09:40 +0200 Subject: arch: sh: smc37c93x: fix spelling mistake Fix typo: "triger" --> "trigger" Signed-off-by: Flavio Suligoi Signed-off-by: Rich Felker --- arch/sh/include/asm/smc37c93x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/smc37c93x.h b/arch/sh/include/asm/smc37c93x.h index f054c30a171a..891f2f8f2fd0 100644 --- a/arch/sh/include/asm/smc37c93x.h +++ b/arch/sh/include/asm/smc37c93x.h @@ -112,8 +112,8 @@ typedef struct uart_reg { #define FCR_RFRES 0x0200 /* Receiver FIFO reset */ #define FCR_TFRES 0x0400 /* Transmitter FIFO reset */ #define FCR_DMA 0x0800 /* DMA mode select */ -#define FCR_RTL 0x4000 /* Receiver triger (LSB) */ -#define FCR_RTM 0x8000 /* Receiver triger (MSB) */ +#define FCR_RTL 0x4000 /* Receiver trigger (LSB) */ +#define FCR_RTM 0x8000 /* Receiver trigger (MSB) */ /* Line Control Register */ -- cgit From 8a8e54625be28a6e675e53d214387fc8ee41fb6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 08:55:38 +0200 Subject: sh: Remove SH5-based Cayman platform Since the removal of core support for SH5, Cayman support can no longer be selected. Fixes: 37744feebc086908 ("sh: remove sh5 support") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/Kconfig | 5 +- arch/sh/Makefile | 5 - arch/sh/boards/Kconfig | 6 -- arch/sh/boards/mach-cayman/Makefile | 5 - arch/sh/boards/mach-cayman/irq.c | 148 ----------------------------- arch/sh/boards/mach-cayman/panic.c | 46 --------- arch/sh/boards/mach-cayman/setup.c | 181 ------------------------------------ arch/sh/configs/cayman_defconfig | 66 ------------- arch/sh/drivers/pci/Makefile | 1 - arch/sh/drivers/pci/fixups-cayman.c | 78 ---------------- arch/sh/tools/mach-types | 1 - 11 files changed, 2 insertions(+), 540 deletions(-) delete mode 100644 arch/sh/boards/mach-cayman/Makefile delete mode 100644 arch/sh/boards/mach-cayman/irq.c delete mode 100644 arch/sh/boards/mach-cayman/panic.c delete mode 100644 arch/sh/boards/mach-cayman/setup.c delete mode 100644 arch/sh/configs/cayman_defconfig delete mode 100644 arch/sh/drivers/pci/fixups-cayman.c diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 9fc2b010e938..d8b097cbb6ed 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -123,8 +123,8 @@ config ARCH_HAS_ILOG2_U64 config NO_IOPORT_MAP def_bool !PCI - depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \ - !SH_HP6XX && !SH_SOLUTION_ENGINE + depends on !SH_SH4202_MICRODEV && !SH_SHMIN && !SH_HP6XX && \ + !SH_SOLUTION_ENGINE config IO_TRAPPED bool @@ -726,7 +726,6 @@ config ZERO_PAGE_OFFSET config BOOT_LINK_OFFSET hex default "0x00210000" if SH_SHMIN - default "0x00400000" if SH_CAYMAN default "0x00810000" if SH_7780_SOLUTION_ENGINE default "0x009e0000" if SH_TITAN default "0x01800000" if SH_SDK7780 diff --git a/arch/sh/Makefile b/arch/sh/Makefile index da9cf952f33c..2faebfd72eca 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -15,11 +15,7 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif -ifeq ($(ARCH),sh) KBUILD_DEFCONFIG := shx3_defconfig -else -KBUILD_DEFCONFIG := cayman_defconfig -endif isa-y := any isa-$(CONFIG_SH_DSP) := sh @@ -143,7 +139,6 @@ machdir-$(CONFIG_SH_SH7763RDP) += mach-sh7763rdp machdir-$(CONFIG_SH_SH4202_MICRODEV) += mach-microdev machdir-$(CONFIG_SH_LANDISK) += mach-landisk machdir-$(CONFIG_SH_LBOX_RE2) += mach-lboxre2 -machdir-$(CONFIG_SH_CAYMAN) += mach-cayman machdir-$(CONFIG_SH_RSK) += mach-rsk ifneq ($(machdir-y),) diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index fb0ca0c1efe1..83bcb6d2daca 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -340,12 +340,6 @@ config SH_MAGIC_PANEL_R2 help Select Magic Panel R2 if configuring for Magic Panel R2. -config SH_CAYMAN - bool "Hitachi Cayman" - depends on CPU_SUBTYPE_SH5_101 || CPU_SUBTYPE_SH5_103 - select HAVE_PCI - select ARCH_MIGHT_HAVE_PC_SERIO - config SH_POLARIS bool "SMSC Polaris" select CPU_HAS_IPR_IRQ diff --git a/arch/sh/boards/mach-cayman/Makefile b/arch/sh/boards/mach-cayman/Makefile deleted file mode 100644 index 775a4be57434..000000000000 --- a/arch/sh/boards/mach-cayman/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the Hitachi Cayman specific parts of the kernel -# -obj-y := setup.o irq.o panic.o diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c deleted file mode 100644 index 0305d0b51730..000000000000 --- a/arch/sh/boards/mach-cayman/irq.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/sh/mach-cayman/irq.c - SH-5 Cayman Interrupt Support - * - * This file handles the board specific parts of the Cayman interrupt system - * - * Copyright (C) 2002 Stuart Menefy - */ -#include -#include -#include -#include -#include -#include - -/* Setup for the SMSC FDC37C935 / LAN91C100FD */ -#define SMSC_IRQ IRQ_IRL1 - -/* Setup for PCI Bus 2, which transmits interrupts via the EPLD */ -#define PCI2_IRQ IRQ_IRL3 - -unsigned long epld_virt; - -#define EPLD_BASE 0x04002000 -#define EPLD_STATUS_BASE (epld_virt + 0x10) -#define EPLD_MASK_BASE (epld_virt + 0x20) - -/* Note the SMSC SuperIO chip and SMSC LAN chip interrupts are all muxed onto - the same SH-5 interrupt */ - -static irqreturn_t cayman_interrupt_smsc(int irq, void *dev_id) -{ - printk(KERN_INFO "CAYMAN: spurious SMSC interrupt\n"); - return IRQ_NONE; -} - -static irqreturn_t cayman_interrupt_pci2(int irq, void *dev_id) -{ - printk(KERN_INFO "CAYMAN: spurious PCI interrupt, IRQ %d\n", irq); - return IRQ_NONE; -} - -static void enable_cayman_irq(struct irq_data *data) -{ - unsigned int irq = data->irq; - unsigned long flags; - unsigned long mask; - unsigned int reg; - unsigned char bit; - - irq -= START_EXT_IRQS; - reg = EPLD_MASK_BASE + ((irq / 8) << 2); - bit = 1<<(irq % 8); - local_irq_save(flags); - mask = __raw_readl(reg); - mask |= bit; - __raw_writel(mask, reg); - local_irq_restore(flags); -} - -static void disable_cayman_irq(struct irq_data *data) -{ - unsigned int irq = data->irq; - unsigned long flags; - unsigned long mask; - unsigned int reg; - unsigned char bit; - - irq -= START_EXT_IRQS; - reg = EPLD_MASK_BASE + ((irq / 8) << 2); - bit = 1<<(irq % 8); - local_irq_save(flags); - mask = __raw_readl(reg); - mask &= ~bit; - __raw_writel(mask, reg); - local_irq_restore(flags); -} - -struct irq_chip cayman_irq_type = { - .name = "Cayman-IRQ", - .irq_unmask = enable_cayman_irq, - .irq_mask = disable_cayman_irq, -}; - -int cayman_irq_demux(int evt) -{ - int irq = intc_evt_to_irq[evt]; - - if (irq == SMSC_IRQ) { - unsigned long status; - int i; - - status = __raw_readl(EPLD_STATUS_BASE) & - __raw_readl(EPLD_MASK_BASE) & 0xff; - if (status == 0) { - irq = -1; - } else { - for (i=0; i<8; i++) { - if (status & (1< -#include -#include - -/* THIS IS A PHYSICAL ADDRESS */ -#define HDSP2534_ADDR (0x04002100) - -static void poor_mans_delay(void) -{ - int i; - - for (i = 0; i < 2500000; i++) - cpu_relax(); -} - -static void show_value(unsigned long x) -{ - int i; - unsigned nibble; - for (i = 0; i < 8; i++) { - nibble = ((x >> (i * 4)) & 0xf); - - __raw_writeb(nibble + ((nibble > 9) ? 55 : 48), - HDSP2534_ADDR + 0xe0 + ((7 - i) << 2)); - } -} - -void -panic_handler(unsigned long panicPC, unsigned long panicSSR, - unsigned long panicEXPEVT) -{ - while (1) { - /* This piece of code displays the PC on the LED display */ - show_value(panicPC); - poor_mans_delay(); - show_value(panicSSR); - poor_mans_delay(); - show_value(panicEXPEVT); - poor_mans_delay(); - } -} diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c deleted file mode 100644 index 8ef76e288da0..000000000000 --- a/arch/sh/boards/mach-cayman/setup.c +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/sh/mach-cayman/setup.c - * - * SH5 Cayman support - * - * Copyright (C) 2002 David J. Mckay & Benedict Gaster - * Copyright (C) 2003 - 2007 Paul Mundt - */ -#include -#include -#include -#include - -/* - * Platform Dependent Interrupt Priorities. - */ - -/* Using defaults defined in irq.h */ -#define RES NO_PRIORITY /* Disabled */ -#define IR0 IRL0_PRIORITY /* IRLs */ -#define IR1 IRL1_PRIORITY -#define IR2 IRL2_PRIORITY -#define IR3 IRL3_PRIORITY -#define PCA INTA_PRIORITY /* PCI Ints */ -#define PCB INTB_PRIORITY -#define PCC INTC_PRIORITY -#define PCD INTD_PRIORITY -#define SER TOP_PRIORITY -#define ERR TOP_PRIORITY -#define PW0 TOP_PRIORITY -#define PW1 TOP_PRIORITY -#define PW2 TOP_PRIORITY -#define PW3 TOP_PRIORITY -#define DM0 NO_PRIORITY /* DMA Ints */ -#define DM1 NO_PRIORITY -#define DM2 NO_PRIORITY -#define DM3 NO_PRIORITY -#define DAE NO_PRIORITY -#define TU0 TIMER_PRIORITY /* TMU Ints */ -#define TU1 NO_PRIORITY -#define TU2 NO_PRIORITY -#define TI2 NO_PRIORITY -#define ATI NO_PRIORITY /* RTC Ints */ -#define PRI NO_PRIORITY -#define CUI RTC_PRIORITY -#define ERI SCIF_PRIORITY /* SCIF Ints */ -#define RXI SCIF_PRIORITY -#define BRI SCIF_PRIORITY -#define TXI SCIF_PRIORITY -#define ITI TOP_PRIORITY /* WDT Ints */ - -/* Setup for the SMSC FDC37C935 */ -#define SMSC_SUPERIO_BASE 0x04000000 -#define SMSC_CONFIG_PORT_ADDR 0x3f0 -#define SMSC_INDEX_PORT_ADDR SMSC_CONFIG_PORT_ADDR -#define SMSC_DATA_PORT_ADDR 0x3f1 - -#define SMSC_ENTER_CONFIG_KEY 0x55 -#define SMSC_EXIT_CONFIG_KEY 0xaa - -#define SMCS_LOGICAL_DEV_INDEX 0x07 -#define SMSC_DEVICE_ID_INDEX 0x20 -#define SMSC_DEVICE_REV_INDEX 0x21 -#define SMSC_ACTIVATE_INDEX 0x30 -#define SMSC_PRIMARY_BASE_INDEX 0x60 -#define SMSC_SECONDARY_BASE_INDEX 0x62 -#define SMSC_PRIMARY_INT_INDEX 0x70 -#define SMSC_SECONDARY_INT_INDEX 0x72 - -#define SMSC_IDE1_DEVICE 1 -#define SMSC_KEYBOARD_DEVICE 7 -#define SMSC_CONFIG_REGISTERS 8 - -#define SMSC_SUPERIO_READ_INDEXED(index) ({ \ - outb((index), SMSC_INDEX_PORT_ADDR); \ - inb(SMSC_DATA_PORT_ADDR); }) -#define SMSC_SUPERIO_WRITE_INDEXED(val, index) ({ \ - outb((index), SMSC_INDEX_PORT_ADDR); \ - outb((val), SMSC_DATA_PORT_ADDR); }) - -#define IDE1_PRIMARY_BASE 0x01f0 -#define IDE1_SECONDARY_BASE 0x03f6 - -unsigned long smsc_superio_virt; - -int platform_int_priority[NR_INTC_IRQS] = { - IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD, /* IRQ 0- 7 */ - RES, RES, RES, RES, SER, ERR, PW3, PW2, /* IRQ 8-15 */ - PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES, /* IRQ 16-23 */ - RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 24-31 */ - TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI, /* IRQ 32-39 */ - RXI, BRI, TXI, RES, RES, RES, RES, RES, /* IRQ 40-47 */ - RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 48-55 */ - RES, RES, RES, RES, RES, RES, RES, ITI, /* IRQ 56-63 */ -}; - -static int __init smsc_superio_setup(void) -{ - unsigned char devid, devrev; - - smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024); - if (!smsc_superio_virt) { - panic("Unable to remap SMSC SuperIO\n"); - } - - /* Initially the chip is in run state */ - /* Put it into configuration state */ - outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - - /* Read device ID info */ - devid = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_ID_INDEX); - devrev = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_REV_INDEX); - printk("SMSC SuperIO devid %02x rev %02x\n", devid, devrev); - - /* Select the keyboard device */ - SMSC_SUPERIO_WRITE_INDEXED(SMSC_KEYBOARD_DEVICE, SMCS_LOGICAL_DEV_INDEX); - - /* enable it */ - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX); - - /* Select the interrupts */ - /* On a PC keyboard is IRQ1, mouse is IRQ12 */ - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); - SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); - - /* - * Only IDE1 exists on the Cayman - */ - - /* Power it on */ - SMSC_SUPERIO_WRITE_INDEXED(1 << SMSC_IDE1_DEVICE, 0x22); - - SMSC_SUPERIO_WRITE_INDEXED(SMSC_IDE1_DEVICE, SMCS_LOGICAL_DEV_INDEX); - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE >> 8, - SMSC_PRIMARY_BASE_INDEX + 0); - SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE & 0xff, - SMSC_PRIMARY_BASE_INDEX + 1); - - SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE >> 8, - SMSC_SECONDARY_BASE_INDEX + 0); - SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE & 0xff, - SMSC_SECONDARY_BASE_INDEX + 1); - - SMSC_SUPERIO_WRITE_INDEXED(14, SMSC_PRIMARY_INT_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(SMSC_CONFIG_REGISTERS, - SMCS_LOGICAL_DEV_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc2); /* GP42 = nIDE1_OE */ - SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ - - /* Exit the configuration state */ - outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - - return 0; -} -device_initcall(smsc_superio_setup); - -static void __iomem *cayman_ioport_map(unsigned long port, unsigned int len) -{ - if (port < 0x400) { - extern unsigned long smsc_superio_virt; - return (void __iomem *)((port << 2) | smsc_superio_virt); - } - - return (void __iomem *)port; -} - -extern void init_cayman_irq(void); - -static struct sh_machine_vector mv_cayman __initmv = { - .mv_name = "Hitachi Cayman", - .mv_ioport_map = cayman_ioport_map, - .mv_init_irq = init_cayman_irq, -}; diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig deleted file mode 100644 index 911437c163c7..000000000000 --- a/arch/sh/configs/cayman_defconfig +++ /dev/null @@ -1,66 +0,0 @@ -CONFIG_POSIX_MQUEUE=y -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_FORCE_MAX_ZONEORDER=11 -CONFIG_MEMORY_START=0x80000000 -CONFIG_MEMORY_SIZE=0x00400000 -CONFIG_FLATMEM_MANUAL=y -CONFIG_CACHE_OFF=y -CONFIG_SH_PCLK_FREQ=50000000 -CONFIG_HEARTBEAT=y -CONFIG_PREEMPT=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -# CONFIG_IPV6 is not set -# CONFIG_FW_LOADER is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_HW_RANDOM=y -CONFIG_I2C=m -CONFIG_WATCHDOG=y -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -CONFIG_FB_MODE_HELPERS=y -CONFIG_FB_SH_MOBILE_LCDC=m -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x16=y -CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_LOGO_LINUX_CLUT224 is not set -# CONFIG_LOGO_SUPERH_MONO is not set -# CONFIG_LOGO_SUPERH_VGA16 is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_MINIX_FS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_SCHEDSTATS=y -CONFIG_FRAME_POINTER=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index a5c1e9066f83..d313fd3ce709 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -25,4 +25,3 @@ obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += fixups-titan.o obj-$(CONFIG_SH_LANDISK) += fixups-landisk.o obj-$(CONFIG_SH_LBOX_RE2) += fixups-rts7751r2d.o -obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c deleted file mode 100644 index c797bfbe2e98..000000000000 --- a/arch/sh/drivers/pci/fixups-cayman.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include "pci-sh5.h" - -int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - int result = -1; - - /* The complication here is that the PCI IRQ lines from the Cayman's 2 - 5V slots get into the CPU via a different path from the IRQ lines - from the 3 3.3V slots. Thus, we have to detect whether the card's - interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling' - at the point where we cross from 5V to 3.3V is not the normal case. - - The added complication is that we don't know that the 5V slots are - always bus 2, because a card containing a PCI-PCI bridge may be - plugged into a 3.3V slot, and this changes the bus numbering. - - Also, the Cayman has an intermediate PCI bus that goes a custom - expansion board header (and to the secondary bridge). This bus has - never been used in practice. - - The 1ary onboard PCI-PCI bridge is device 3 on bus 0 - The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of - the 1ary bridge. - */ - - struct slot_pin { - int slot; - int pin; - } path[4]; - int i=0; - - while (dev->bus->number > 0) { - - slot = path[i].slot = PCI_SLOT(dev->devfn); - pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin); - dev = dev->bus->self; - i++; - if (i > 3) panic("PCI path to root bus too long!\n"); - } - - slot = PCI_SLOT(dev->devfn); - /* This is the slot on bus 0 through which the device is eventually - reachable. */ - - /* Now work back up. */ - if ((slot < 3) || (i == 0)) { - /* Bus 0 (incl. PCI-PCI bridge itself) : perform the final - swizzle now. */ - result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1; - } else { - i--; - slot = path[i].slot; - pin = path[i].pin; - if (slot > 0) { - panic("PCI expansion bus device found - not handled!\n"); - } else { - if (i > 0) { - /* 5V slots */ - i--; - slot = path[i].slot; - pin = path[i].pin; - /* 'pin' was swizzled earlier wrt slot, don't do it again. */ - result = IRQ_P2INTA + (pin - 1); - } else { - /* IRQ for 2ary PCI-PCI bridge : unused */ - result = -1; - } - } - } - - return result; -} diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types index 569977e52c91..29e648855d50 100644 --- a/arch/sh/tools/mach-types +++ b/arch/sh/tools/mach-types @@ -46,7 +46,6 @@ X3PROTO SH_X3PROTO MAGICPANELR2 SH_MAGIC_PANEL_R2 R2D_PLUS RTS7751R2D_PLUS R2D_1 RTS7751R2D_1 -CAYMAN SH_CAYMAN SDK7780 SH_SDK7780 MIGOR SH_MIGOR RSK7201 SH_RSK7201 -- cgit From c64bbe7006a4dd5ad51b22ca248d89392487af6d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 08:55:39 +0200 Subject: input: i8042 - Remove special Cayman handling As of commit 37744feebc086908 ("sh: remove sh5 support"), support for the SH5-based Cayman platform can no longer be selected. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- drivers/input/serio/i8042-io.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/input/serio/i8042-io.h b/drivers/input/serio/i8042-io.h index da0bf85321de..64590b86eb37 100644 --- a/drivers/input/serio/i8042-io.h +++ b/drivers/input/serio/i8042-io.h @@ -21,8 +21,6 @@ #elif defined(__arm__) /* defined in include/asm-arm/arch-xxx/irqs.h */ #include -#elif defined(CONFIG_SH_CAYMAN) -#include #elif defined(CONFIG_PPC) extern int of_i8042_kbd_irq; extern int of_i8042_aux_irq; -- cgit From 845d9156febcd6b3b20c0c2c8d73b461b48e844c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:31 +0200 Subject: sh: fault: Fix duplicate printing of "PC:" Somewhere along the patch handling path, both the old "printk(KERN_ALERT ....)" and the new "pr_alert(...)" were retained, leading to the duplicate printing of "PC:". Drop the old one. Fixes: eaabf98b0932a540 ("sh: fault: modernize printing of kernel messages") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index fbe1f2fe9a8c..acd1c7599498 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -208,7 +208,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long address) if (!oops_may_print()) return; - printk(KERN_ALERT "PC:"); pr_alert("BUG: unable to handle kernel %s at %08lx\n", address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", -- cgit From fd722f25a6e4aecefbc58047a690076e57015197 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:32 +0200 Subject: Revert "sh: add loglvl to printk_address()" This reverts commit 2deebe4d56d638269a4a728086d64de5734b460a. printk_address() is always used as a continuation of the previous logging, hence it should not include a log level. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/include/asm/kdebug.h | 3 +-- arch/sh/kernel/dumpstack.c | 6 +++--- arch/sh/mm/fault.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/sh/include/asm/kdebug.h b/arch/sh/include/asm/kdebug.h index 960545306afa..de8693fabb1d 100644 --- a/arch/sh/include/asm/kdebug.h +++ b/arch/sh/include/asm/kdebug.h @@ -12,8 +12,7 @@ enum die_val { }; /* arch/sh/kernel/dumpstack.c */ -extern void printk_address(unsigned long address, int reliable, - const char *loglvl); +extern void printk_address(unsigned long address, int reliable); extern void dump_mem(const char *str, const char *loglvl, unsigned long bottom, unsigned long top); diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index a13c045804ed..ad548fc97635 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -44,9 +44,9 @@ void dump_mem(const char *str, const char *loglvl, } } -void printk_address(unsigned long address, int reliable, const char *loglvl) +void printk_address(unsigned long address, int reliable) { - printk("%s [<%p>] %s%pS\n", loglvl, (void *) address, + printk(" [<%p>] %s%pS\n", (void *) address, reliable ? "" : "? ", (void *) address); } @@ -118,7 +118,7 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk_address(addr, reliable, (char *)data); + printk_address(addr, reliable); } static const struct stacktrace_ops print_trace_ops = { diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index acd1c7599498..92b1ce105f4b 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -213,7 +213,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long address) : "paging request", address); pr_alert("PC:"); - printk_address(regs->pc, 1, KERN_ALERT); + printk_address(regs->pc, 1); show_pte(NULL, address); } -- cgit From f6bed866f113d0b141f01e2a6472523739886dc8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:33 +0200 Subject: Revert "sh: remove needless printk()" This reverts commit 8b92f34877225c8eb85e3ab7f1177fc248ba26d0. "data" became the log level in commit 539e786cc37ee5cb ("sh: add loglvl to show_trace()"), so we do need to keep the printk() before the continuation in print_trace_address(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/kernel/dumpstack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index ad548fc97635..cc8063a01284 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -118,6 +118,7 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { + printk("%s", (char *)data); printk_address(addr, reliable); } -- cgit From 9b9fae8ba821c5ecd78d0fd977a3ab0357ec8029 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:34 +0200 Subject: sh: kernel: disassemble: Fix broken lines in disassembly dumps Rejoin the broken lines by using pr_cont(). Convert the remaining printk() calls to pr_*() while at it. Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/kernel/disassemble.c | 103 ++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c index 845543780cc5..08e1af63edd9 100644 --- a/arch/sh/kernel/disassemble.c +++ b/arch/sh/kernel/disassemble.c @@ -376,148 +376,148 @@ static void print_sh_insn(u32 memaddr, u16 insn) } ok: - printk("%-8s ", op->name); + pr_cont("%-8s ", op->name); lastsp = (op->arg[0] == A_END); disp_pc = 0; for (n = 0; n < 6 && op->arg[n] != A_END; n++) { if (n && op->arg[1] != A_END) - printk(", "); + pr_cont(", "); switch (op->arg[n]) { case A_IMM: - printk("#%d", (char)(imm)); + pr_cont("#%d", (char)(imm)); break; case A_R0: - printk("r0"); + pr_cont("r0"); break; case A_REG_N: - printk("r%d", rn); + pr_cont("r%d", rn); break; case A_INC_N: - printk("@r%d+", rn); + pr_cont("@r%d+", rn); break; case A_DEC_N: - printk("@-r%d", rn); + pr_cont("@-r%d", rn); break; case A_IND_N: - printk("@r%d", rn); + pr_cont("@r%d", rn); break; case A_DISP_REG_N: - printk("@(%d,r%d)", imm, rn); + pr_cont("@(%d,r%d)", imm, rn); break; case A_REG_M: - printk("r%d", rm); + pr_cont("r%d", rm); break; case A_INC_M: - printk("@r%d+", rm); + pr_cont("@r%d+", rm); break; case A_DEC_M: - printk("@-r%d", rm); + pr_cont("@-r%d", rm); break; case A_IND_M: - printk("@r%d", rm); + pr_cont("@r%d", rm); break; case A_DISP_REG_M: - printk("@(%d,r%d)", imm, rm); + pr_cont("@(%d,r%d)", imm, rm); break; case A_REG_B: - printk("r%d_bank", rb); + pr_cont("r%d_bank", rb); break; case A_DISP_PC: disp_pc = 1; disp_pc_addr = imm + 4 + (memaddr & relmask); - printk("%08x <%pS>", disp_pc_addr, - (void *)disp_pc_addr); + pr_cont("%08x <%pS>", disp_pc_addr, + (void *)disp_pc_addr); break; case A_IND_R0_REG_N: - printk("@(r0,r%d)", rn); + pr_cont("@(r0,r%d)", rn); break; case A_IND_R0_REG_M: - printk("@(r0,r%d)", rm); + pr_cont("@(r0,r%d)", rm); break; case A_DISP_GBR: - printk("@(%d,gbr)",imm); + pr_cont("@(%d,gbr)", imm); break; case A_R0_GBR: - printk("@(r0,gbr)"); + pr_cont("@(r0,gbr)"); break; case A_BDISP12: case A_BDISP8: - printk("%08x", imm + memaddr); + pr_cont("%08x", imm + memaddr); break; case A_SR: - printk("sr"); + pr_cont("sr"); break; case A_GBR: - printk("gbr"); + pr_cont("gbr"); break; case A_VBR: - printk("vbr"); + pr_cont("vbr"); break; case A_SSR: - printk("ssr"); + pr_cont("ssr"); break; case A_SPC: - printk("spc"); + pr_cont("spc"); break; case A_MACH: - printk("mach"); + pr_cont("mach"); break; case A_MACL: - printk("macl"); + pr_cont("macl"); break; case A_PR: - printk("pr"); + pr_cont("pr"); break; case A_SGR: - printk("sgr"); + pr_cont("sgr"); break; case A_DBR: - printk("dbr"); + pr_cont("dbr"); break; case FD_REG_N: case F_REG_N: - printk("fr%d", rn); + pr_cont("fr%d", rn); break; case F_REG_M: - printk("fr%d", rm); + pr_cont("fr%d", rm); break; case DX_REG_N: if (rn & 1) { - printk("xd%d", rn & ~1); + pr_cont("xd%d", rn & ~1); break; } /* else, fall through */ case D_REG_N: - printk("dr%d", rn); + pr_cont("dr%d", rn); break; case DX_REG_M: if (rm & 1) { - printk("xd%d", rm & ~1); + pr_cont("xd%d", rm & ~1); break; } /* else, fall through */ case D_REG_M: - printk("dr%d", rm); + pr_cont("dr%d", rm); break; case FPSCR_M: case FPSCR_N: - printk("fpscr"); + pr_cont("fpscr"); break; case FPUL_M: case FPUL_N: - printk("fpul"); + pr_cont("fpul"); break; case F_FR0: - printk("fr0"); + pr_cont("fr0"); break; case V_REG_N: - printk("fv%d", rn*4); + pr_cont("fv%d", rn*4); break; case V_REG_M: - printk("fv%d", rm*4); + pr_cont("fv%d", rm*4); break; case XMTRX_M4: - printk("xmtrx"); + pr_cont("xmtrx"); break; default: return; @@ -532,7 +532,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) else __get_user(val, (u32 *)disp_pc_addr); - printk(" ! %08x <%pS>", val, (void *)val); + pr_cont(" ! %08x <%pS>", val, (void *)val); } return; @@ -541,7 +541,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) } - printk(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]); + pr_info(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]); } void show_code(struct pt_regs *regs) @@ -552,20 +552,21 @@ void show_code(struct pt_regs *regs) if (regs->pc & 0x1) return; - printk("Code:\n"); + pr_info("Code:\n"); for (i = -3 ; i < 6 ; i++) { unsigned short insn; if (__get_user(insn, pc + i)) { - printk(" (Bad address in pc)\n"); + pr_err(" (Bad address in pc)\n"); break; } - printk("%s%08lx: ", (i ? " ": "->"), (unsigned long)(pc + i)); + pr_info("%s%08lx: ", (i ? " " : "->"), + (unsigned long)(pc + i)); print_sh_insn((unsigned long)(pc + i), insn); - printk("\n"); + pr_cont("\n"); } - printk("\n"); + pr_info("\n"); } -- cgit From 0632a6d8c6809d71f535232a01374458164182ae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:35 +0200 Subject: sh: dump_stack: Fix broken lines and ptrval in calltrace dumps Rejoin the broken lines by dropping the log level parameters and using pr_cont(). Use "%px" to print sensible addresses in call traces. Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/kernel/dumpstack.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index cc8063a01284..0a69588e343f 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -16,8 +16,8 @@ #include #include -void dump_mem(const char *str, const char *loglvl, - unsigned long bottom, unsigned long top) +void dump_mem(const char *str, const char *loglvl, unsigned long bottom, + unsigned long top) { unsigned long p; int i; @@ -31,23 +31,23 @@ void dump_mem(const char *str, const char *loglvl, unsigned int val; if (p < bottom || p >= top) - printk("%s ", loglvl); + pr_cont(" "); else { if (__get_user(val, (unsigned int __user *)p)) { - printk("%s\n", loglvl); + pr_cont("\n"); return; } - printk("%s%08x ", loglvl, val); + pr_cont("%08x ", val); } } - printk("%s\n", loglvl); + pr_cont("\n"); } } void printk_address(unsigned long address, int reliable) { - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); + pr_cont(" [<%px>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -137,7 +137,7 @@ void show_trace(struct task_struct *tsk, unsigned long *sp, unwind_stack(tsk, regs, sp, &print_trace_ops, (void *)loglvl); - printk("%s\n", loglvl); + pr_cont("\n"); if (!tsk) tsk = current; -- cgit From 21afcacb0348edf8f5d4e6115b5eb0b58f9a049b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:36 +0200 Subject: sh: process: Fix broken lines in register dumps Rejoin the broken lines by using pr_cont(). Convert the remaining printk() calls to pr_*() while at it. Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/kernel/process_32.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 456cc8d171f7..049f11a5f9ab 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -30,34 +30,30 @@ void show_regs(struct pt_regs * regs) { - printk("\n"); + pr_info("\n"); show_regs_print_info(KERN_DEFAULT); - printk("PC is at %pS\n", (void *)instruction_pointer(regs)); - printk("PR is at %pS\n", (void *)regs->pr); + pr_info("PC is at %pS\n", (void *)instruction_pointer(regs)); + pr_info("PR is at %pS\n", (void *)regs->pr); - printk("PC : %08lx SP : %08lx SR : %08lx ", - regs->pc, regs->regs[15], regs->sr); + pr_info("PC : %08lx SP : %08lx SR : %08lx ", regs->pc, + regs->regs[15], regs->sr); #ifdef CONFIG_MMU - printk("TEA : %08x\n", __raw_readl(MMU_TEA)); + pr_cont("TEA : %08x\n", __raw_readl(MMU_TEA)); #else - printk("\n"); + pr_cont("\n"); #endif - printk("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n", - regs->regs[0],regs->regs[1], - regs->regs[2],regs->regs[3]); - printk("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n", - regs->regs[4],regs->regs[5], - regs->regs[6],regs->regs[7]); - printk("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n", - regs->regs[8],regs->regs[9], - regs->regs[10],regs->regs[11]); - printk("R12 : %08lx R13 : %08lx R14 : %08lx\n", - regs->regs[12],regs->regs[13], - regs->regs[14]); - printk("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n", - regs->mach, regs->macl, regs->gbr, regs->pr); + pr_info("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n", + regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3]); + pr_info("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n", + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); + pr_info("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n", + regs->regs[8], regs->regs[9], regs->regs[10], regs->regs[11]); + pr_info("R12 : %08lx R13 : %08lx R14 : %08lx\n", + regs->regs[12], regs->regs[13], regs->regs[14]); + pr_info("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n", + regs->mach, regs->macl, regs->gbr, regs->pr); show_trace(NULL, (unsigned long *)regs->regs[15], regs, KERN_DEFAULT); show_code(regs); -- cgit From f88c6a26ddd6c1b914c0c2536cff89b183a89a28 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:37 +0200 Subject: sh: sh2007: Modernize printing of kernel messages - Convert from printk() to pr_*(), - Add missing continuation. Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/boards/board-sh2007.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/board-sh2007.c b/arch/sh/boards/board-sh2007.c index ef9c87deeb08..6ea85e480851 100644 --- a/arch/sh/boards/board-sh2007.c +++ b/arch/sh/boards/board-sh2007.c @@ -126,14 +126,14 @@ static void __init sh2007_init_irq(void) */ static void __init sh2007_setup(char **cmdline_p) { - printk(KERN_INFO "SH-2007 Setup..."); + pr_info("SH-2007 Setup..."); /* setup wait control registers for area 5 */ __raw_writel(CS5BCR_D, CS5BCR); __raw_writel(CS5WCR_D, CS5WCR); __raw_writel(CS5PCR_D, CS5PCR); - printk(KERN_INFO " done.\n"); + pr_cont(" done.\n"); } /* -- cgit From 601bf18b6630d37bde945fc890a995650f53e2c9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:38 +0200 Subject: sh: pci: Modernize printing of kernel messages - Convert from printk() to pr_*(), - Add missing continuations, - Join broken messages. Note that printk(KERN_DEBUG ...) is retained, to preserve behavior (pr_debug() is a dummy if DEBUG is not defined). Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/drivers/pci/common.c | 6 +++--- arch/sh/drivers/pci/pci-sh7780.c | 23 +++++++++++------------ arch/sh/drivers/pci/pci.c | 11 +++++------ 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index fe163ecd0719..2fd2b77e12ce 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -54,7 +54,7 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose, int cap66 = -1; u16 stat; - printk(KERN_INFO "PCI: Checking 66MHz capabilities...\n"); + pr_info("PCI: Checking 66MHz capabilities...\n"); for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) { if (PCI_FUNC(pci_devfn)) @@ -134,7 +134,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr, pcibios_report_status(PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT, 1); - printk("\n"); + pr_cont("\n"); cmd |= PCI_STATUS_REC_TARGET_ABORT; } @@ -143,7 +143,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr, printk(KERN_DEBUG "PCI: parity error detected: "); pcibios_report_status(PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY, 1); - printk("\n"); + pr_cont("\n"); cmd |= PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY; diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 287b3a68570c..9a624a6ee354 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -148,7 +148,7 @@ static irqreturn_t sh7780_pci_serr_irq(int irq, void *dev_id) printk(KERN_DEBUG "PCI: system error received: "); pcibios_report_status(PCI_STATUS_SIG_SYSTEM_ERROR, 1); - printk("\n"); + pr_cont("\n"); /* Deassert SERR */ __raw_writel(SH4_PCIINTM_SDIM, hose->reg_base + SH4_PCIINTM); @@ -179,7 +179,7 @@ static int __init sh7780_pci_setup_irqs(struct pci_channel *hose) ret = request_irq(hose->serr_irq, sh7780_pci_serr_irq, 0, "PCI SERR interrupt", hose); if (unlikely(ret)) { - printk(KERN_ERR "PCI: Failed hooking SERR IRQ\n"); + pr_err("PCI: Failed hooking SERR IRQ\n"); return ret; } @@ -250,7 +250,7 @@ static int __init sh7780_pci_init(void) const char *type; int ret, i; - printk(KERN_NOTICE "PCI: Starting initialization.\n"); + pr_notice("PCI: Starting initialization.\n"); chan->reg_base = 0xfe040000; @@ -270,7 +270,7 @@ static int __init sh7780_pci_init(void) id = __raw_readw(chan->reg_base + PCI_VENDOR_ID); if (id != PCI_VENDOR_ID_RENESAS) { - printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id); + pr_err("PCI: Unknown vendor ID 0x%04x.\n", id); return -ENODEV; } @@ -281,14 +281,13 @@ static int __init sh7780_pci_init(void) (id == PCI_DEVICE_ID_RENESAS_SH7785) ? "SH7785" : NULL; if (unlikely(!type)) { - printk(KERN_ERR "PCI: Found an unsupported Renesas host " - "controller, device id 0x%04x.\n", id); + pr_err("PCI: Found an unsupported Renesas host controller, device id 0x%04x.\n", + id); return -EINVAL; } - printk(KERN_NOTICE "PCI: Found a Renesas %s host " - "controller, revision %d.\n", type, - __raw_readb(chan->reg_base + PCI_REVISION_ID)); + pr_notice("PCI: Found a Renesas %s host controller, revision %d.\n", + type, __raw_readb(chan->reg_base + PCI_REVISION_ID)); /* * Now throw it in to register initialization mode and @@ -395,9 +394,9 @@ static int __init sh7780_pci_init(void) sh7780_pci66_init(chan); - printk(KERN_NOTICE "PCI: Running at %dMHz.\n", - (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ) ? - 66 : 33); + pr_notice("PCI: Running at %dMHz.\n", + (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ) + ? 66 : 33); return 0; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index c7784e156964..6ab0b7377f66 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -120,8 +120,7 @@ int register_pci_controller(struct pci_channel *hose) * Do not panic here but later - this might happen before console init. */ if (!hose->io_map_base) { - printk(KERN_WARNING - "registering PCI controller with io_map_base unset\n"); + pr_warn("registering PCI controller with io_map_base unset\n"); } /* @@ -145,7 +144,7 @@ out: for (--i; i >= 0; i--) release_resource(&hose->resources[i]); - printk(KERN_WARNING "Skipping PCI bus scan due to resource conflict\n"); + pr_warn("Skipping PCI bus scan due to resource conflict\n"); return -1; } @@ -213,8 +212,8 @@ pcibios_bus_report_status_early(struct pci_channel *hose, pci_devfn, PCI_STATUS, status & status_mask); if (warn) - printk("(%02x:%02x: %04X) ", current_bus, - pci_devfn, status); + pr_cont("(%02x:%02x: %04X) ", current_bus, pci_devfn, + status); } } @@ -249,7 +248,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, pci_write_config_word(dev, PCI_STATUS, status & status_mask); if (warn) - printk("(%s: %04X) ", pci_name(dev), status); + pr_cont("(%s: %04X) ", pci_name(dev), status); } list_for_each_entry(dev, &bus->devices, bus_list) -- cgit From eac1a488171c70c695c436bcea7682841649d5b6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jun 2020 16:36:39 +0200 Subject: sh: machvec: Modernize printing of kernel messages - Convert from printk() to pr_*(), - Add missing continuations. Signed-off-by: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Rich Felker --- arch/sh/kernel/machvec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index beadbbdb4486..0e9fa33a9b6a 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -64,10 +64,10 @@ static int __init early_parse_mv(char *from) mvp = get_mv_byname(mv_name); if (unlikely(!mvp)) { - printk("Available vectors:\n\n\t'%s', ", sh_mv.mv_name); + pr_info("Available vectors:\n\n\t'%s', ", sh_mv.mv_name); for_each_mv(mvp) - printk("'%s', ", mvp->mv_name); - printk("\n\n"); + pr_cont("'%s', ", mvp->mv_name); + pr_cont("\n\n"); panic("Failed to select machvec '%s' -- halting.\n", mv_name); } else @@ -104,7 +104,7 @@ void __init sh_mv_setup(void) sh_mv = *(struct sh_machine_vector *)&__machvec_start; } - printk(KERN_NOTICE "Booting machvec: %s\n", get_system_type()); + pr_notice("Booting machvec: %s\n", get_system_type()); /* * Manually walk the vec, fill in anything that the board hasn't yet -- cgit From c0735ae9a00642b514bade90456ad6a828dcc6df Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 18 Jun 2020 09:59:37 +0200 Subject: sh: stacktrace: Remove stacktrace_ops.stack() The SH implementation never called stacktrace_ops.stack(). Presumably this was copied from the x86 implementation. Hence remove the method, and all implementations (most of them are dummies). Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/include/asm/stacktrace.h | 2 -- arch/sh/kernel/dumpstack.c | 7 ------- arch/sh/kernel/perf_callchain.c | 6 ------ arch/sh/kernel/stacktrace.c | 7 ------- arch/sh/oprofile/backtrace.c | 7 ------- 5 files changed, 29 deletions(-) diff --git a/arch/sh/include/asm/stacktrace.h b/arch/sh/include/asm/stacktrace.h index 50c173c0b9f5..4f98cdc64ec5 100644 --- a/arch/sh/include/asm/stacktrace.h +++ b/arch/sh/include/asm/stacktrace.h @@ -12,8 +12,6 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); - /* On negative return stop dumping */ - int (*stack)(void *data, char *name); }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 0a69588e343f..758a6c89e911 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -107,12 +107,6 @@ stack_reader_dump(struct task_struct *task, struct pt_regs *regs, } } -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - /* * Print one address/symbol entries per line. */ @@ -123,7 +117,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .stack = print_trace_stack, .address = print_trace_address, }; diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 6281f2fdf9ca..c9d3aa18732d 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -11,11 +11,6 @@ #include #include -static int callchain_stack(void *data, char *name) -{ - return 0; -} - static void callchain_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry_ctx *entry = data; @@ -25,7 +20,6 @@ static void callchain_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops callchain_ops = { - .stack = callchain_stack, .address = callchain_address, }; diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c index 2950b19ad077..daf0b53ee066 100644 --- a/arch/sh/kernel/stacktrace.c +++ b/arch/sh/kernel/stacktrace.c @@ -15,11 +15,6 @@ #include #include -static int save_stack_stack(void *data, char *name) -{ - return 0; -} - /* * Save stack-backtrace addresses into a stack_trace buffer. */ @@ -40,7 +35,6 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .stack = save_stack_stack, .address = save_stack_address, }; @@ -73,7 +67,6 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops_nosched = { - .stack = save_stack_stack, .address = save_stack_address_nosched, }; diff --git a/arch/sh/oprofile/backtrace.c b/arch/sh/oprofile/backtrace.c index f1205f92631d..cc16cf86cd92 100644 --- a/arch/sh/oprofile/backtrace.c +++ b/arch/sh/oprofile/backtrace.c @@ -19,12 +19,6 @@ #include #include -static int backtrace_stack(void *data, char *name) -{ - /* Yes, we want all stacks */ - return 0; -} - static void backtrace_address(void *data, unsigned long addr, int reliable) { unsigned int *depth = data; @@ -34,7 +28,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .stack = backtrace_stack, .address = backtrace_address, }; -- cgit From e1a8d38a17ce68776c313f0960f530371418c428 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Jun 2020 15:15:24 +0200 Subject: arch/sh/configs: remove obsolete CONFIG_SOC_CAMERA* Drop all configs with the CONFIG_SOC_CAMERA prefix since those have been removed. SOC_CAMERA support for the sh architecture was removed a long time ago. Drop it from the configs. Signed-off-by: Hans Verkuil Signed-off-by: Rich Felker --- arch/sh/configs/ap325rxa_defconfig | 3 --- arch/sh/configs/ecovec24_defconfig | 3 --- arch/sh/configs/migor_defconfig | 3 --- arch/sh/configs/se7724_defconfig | 2 -- 4 files changed, 11 deletions(-) diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index cc6e4ce53dac..5193b3e099b9 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -65,9 +65,6 @@ CONFIG_VIDEO_DEV=y # CONFIG_VIDEO_ALLOW_V4L1 is not set # CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_VIDEO_HELPER_CHIPS_AUTO=y -CONFIG_SOC_CAMERA=y -CONFIG_SOC_CAMERA_PLATFORM=y -CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set CONFIG_FB=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index 2fb7db4957ce..03cb916819fa 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -72,9 +72,6 @@ CONFIG_MEDIA_SUPPORT=y CONFIG_VIDEO_DEV=y # CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_VIDEO_HELPER_CHIPS_AUTO=y -CONFIG_SOC_CAMERA=y -CONFIG_SOC_CAMERA_MT9T112=y -CONFIG_SOC_CAMERA_TW9910=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_V4L_USB_DRIVERS is not set CONFIG_FB=y diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 494a1675c226..37e9521a99e5 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -62,9 +62,6 @@ CONFIG_VIDEO_DEV=y # CONFIG_VIDEO_ALLOW_V4L1 is not set # CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_VIDEO_HELPER_CHIPS_AUTO=y -CONFIG_SOC_CAMERA=y -CONFIG_SOC_CAMERA_TW9910=y -CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set CONFIG_FB=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 0e8d5cc1e107..a26f7f1841c7 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -70,8 +70,6 @@ CONFIG_VIDEO_DEV=y CONFIG_DVB_CORE=m # CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_VIDEO_HELPER_CHIPS_AUTO=y -CONFIG_SOC_CAMERA=y -CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DVB_FE_CUSTOMISE is not set -- cgit From 91194e9b046e18ed813d4632e1c72683aac944ad Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Sun, 12 Jul 2020 13:11:18 +0200 Subject: sh: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Rich Felker --- arch/sh/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d8b097cbb6ed..c80c59889b5f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -630,7 +630,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. See also and the SMP-HOWTO - available at . + available at . If you don't know what to do here, say N. -- cgit From ccbb5239d495923cb24b84a73eb814626c5bfa57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:47 +0200 Subject: sh: remove -Werror from Makefiles The sh build is full of warnings when building with gcc 9.2.1. While fixing those would be great, at least avoid failing the build. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/kernel/Makefile | 2 -- arch/sh/lib/Makefile | 2 -- arch/sh/mm/Makefile | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index b0f5574b6228..aa0fbc9202b1 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -47,5 +47,3 @@ obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o obj-$(CONFIG_DMA_NONCOHERENT) += dma-coherent.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o - -ccflags-y := -Werror diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index d0abbe5e38b0..eb473d373ca4 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -30,5 +30,3 @@ memset-$(CONFIG_CPU_SH4) := memset-sh4.o lib-$(CONFIG_MMU) += copy_page.o __clear_user.o lib-$(CONFIG_MCOUNT) += mcount.o lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y) - -ccflags-y := -Werror diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index 487da0ff03b3..f69ddc70b146 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -43,5 +43,3 @@ obj-$(CONFIG_UNCACHED_MAPPING) += uncached.o obj-$(CONFIG_HAVE_SRAM_POOL) += sram.o GCOV_PROFILE_pmb.o := n - -ccflags-y := -Werror -- cgit From 582dc536d75990c7a50f5d385ee0d10ee284411f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:48 +0200 Subject: sh: sort the selects for SUPERH alphabetically Ensure there is an order for the selects. Also remove a duplicate one. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/Kconfig | 95 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c80c59889b5f..1bf23e272483 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,75 +1,74 @@ # SPDX-License-Identifier: GPL-2.0 config SUPERH def_bool y + select ARCH_32BIT_OFF_T + select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HIBERNATION_POSSIBLE if MMU select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_PATA_PLATFORM + select ARCH_WANT_IPC_PARSE_VERSION select CLKDEV_LOOKUP + select CPU_NO_EFFICIENT_FFS select DMA_DECLARE_COHERENT - select HAVE_IDE if HAS_IOPORT_MAP - select HAVE_OPROFILE + select GENERIC_ATOMIC64 + select GENERIC_CLOCKEVENTS + select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST + select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_SHOW + select GENERIC_PCI_IOMAP if PCI + select GENERIC_SCHED_CLOCK + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_SMP_IDLE_THREAD + select GUP_GET_PTE_LOW_HIGH if X2TLB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE - select HAVE_FAST_GUP if MMU - select ARCH_HAVE_CUSTOM_GPIO_H - select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) - select ARCH_HAS_GCOV_PROFILE_ALL - select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK - select HAVE_KERNEL_GZIP - select CPU_NO_EFFICIENT_FFS + select HAVE_DYNAMIC_FTRACE + select HAVE_FAST_GUP if MMU + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_HW_BREAKPOINT + select HAVE_IDE if HAS_IOPORT_MAP + select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA - select HAVE_KERNEL_XZ select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER + select HAVE_NMI + select HAVE_OPROFILE + select HAVE_PATA_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_UID16 - select ARCH_WANT_IPC_PARSE_VERSION + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS - select HAVE_REGS_AND_STACK_ACCESS_API - select MAY_HAVE_SPARSE_IRQ select IRQ_FORCED_THREADING - select RTC_LIB - select GENERIC_ATOMIC64 - select GENERIC_IRQ_SHOW - select GENERIC_SMP_IDLE_THREAD - select GENERIC_IDLE_POLL_SETUP - select GENERIC_CLOCKEVENTS - select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST - select GENERIC_PCI_IOMAP if PCI - select GENERIC_SCHED_CLOCK - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER + select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA + select NEED_SG_DMA_LENGTH select NO_GENERIC_PCI_IOPORT_MAP if PCI - select OLD_SIGSUSPEND select OLD_SIGACTION + select OLD_SIGSUSPEND select PCI_DOMAINS if PCI - select HAVE_ARCH_AUDITSYSCALL - select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_NMI - select NEED_SG_DMA_LENGTH - select ARCH_HAS_GIGANTIC_PAGE - select ARCH_32BIT_OFF_T - select GUP_GET_PTE_LOW_HIGH if X2TLB - select HAVE_KPROBES - select HAVE_KRETPROBES - select HAVE_IOREMAP_PROT if MMU && !X2TLB - select HAVE_FUNCTION_TRACER - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_DYNAMIC_FTRACE - select ARCH_WANT_IPC_PARSE_VERSION - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_ARCH_KGDB - select HAVE_HW_BREAKPOINT - select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS - select ARCH_HIBERNATION_POSSIBLE if MMU + select PERF_USE_VMALLOC + select RTC_LIB select SPARSE_IRQ - select HAVE_STACKPROTECTOR help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast -- cgit From e12b090eae6ac3b18f42673a4b0fef6e61b63cac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:49 +0200 Subject: sh: remove __KERNEL__ ifdefs from non-UAPI headers There is no point in having __KERNEL__ ifdefs in headers not exported to userspace. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/include/asm/adc.h | 2 -- arch/sh/include/asm/addrspace.h | 3 --- arch/sh/include/asm/bitops.h | 4 ---- arch/sh/include/asm/cache.h | 2 -- arch/sh/include/asm/cacheflush.h | 3 --- arch/sh/include/asm/dma.h | 2 -- arch/sh/include/asm/elf.h | 2 -- arch/sh/include/asm/freq.h | 2 -- arch/sh/include/asm/futex.h | 3 --- arch/sh/include/asm/io.h | 3 --- arch/sh/include/asm/mmu_context.h | 2 -- arch/sh/include/asm/mmzone.h | 3 --- arch/sh/include/asm/pci.h | 4 ---- arch/sh/include/asm/processor_32.h | 2 -- arch/sh/include/asm/sparsemem.h | 3 --- arch/sh/include/asm/string_32.h | 4 ---- arch/sh/include/asm/syscalls_32.h | 3 --- arch/sh/include/asm/thread_info.h | 5 ----- arch/sh/include/asm/watchdog.h | 2 -- 19 files changed, 54 deletions(-) diff --git a/arch/sh/include/asm/adc.h b/arch/sh/include/asm/adc.h index 99ec66849559..feccfe639e38 100644 --- a/arch/sh/include/asm/adc.h +++ b/arch/sh/include/asm/adc.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_ADC_H #define __ASM_ADC_H -#ifdef __KERNEL__ /* * Copyright (C) 2004 Andriy Skulysh */ @@ -10,5 +9,4 @@ int adc_single(unsigned int channel); -#endif /* __KERNEL__ */ #endif /* __ASM_ADC_H */ diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h index 34bfbcddcce0..468fba333e89 100644 --- a/arch/sh/include/asm/addrspace.h +++ b/arch/sh/include/asm/addrspace.h @@ -7,8 +7,6 @@ #ifndef __ASM_SH_ADDRSPACE_H #define __ASM_SH_ADDRSPACE_H -#ifdef __KERNEL__ - #include /* If this CPU supports segmentation, hook up the helpers */ @@ -62,5 +60,4 @@ #define P3_ADDR_MAX P4SEG #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_ADDRSPACE_H */ diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index 445dd14c448a..450b5854d7c6 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_BITOPS_H #define __ASM_SH_BITOPS_H -#ifdef __KERNEL__ - #ifndef _LINUX_BITOPS_H #error only can be included directly #endif @@ -71,6 +69,4 @@ static inline unsigned long __ffs(unsigned long word) #include #include -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_BITOPS_H */ diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index 2408ac4873aa..a293343456af 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -8,7 +8,6 @@ */ #ifndef __ASM_SH_CACHE_H #define __ASM_SH_CACHE_H -#ifdef __KERNEL__ #include #include @@ -44,5 +43,4 @@ struct cache_info { unsigned long flags; }; #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* __ASM_SH_CACHE_H */ diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index fe7400079b97..4486a865ff62 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_CACHEFLUSH_H #define __ASM_SH_CACHEFLUSH_H -#ifdef __KERNEL__ - #include /* @@ -109,5 +107,4 @@ static inline void *sh_cacheop_vaddr(void *vaddr) return vaddr; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_CACHEFLUSH_H */ diff --git a/arch/sh/include/asm/dma.h b/arch/sh/include/asm/dma.h index 4d5a21a891c0..17d23ae98c77 100644 --- a/arch/sh/include/asm/dma.h +++ b/arch/sh/include/asm/dma.h @@ -6,7 +6,6 @@ */ #ifndef __ASM_SH_DMA_H #define __ASM_SH_DMA_H -#ifdef __KERNEL__ #include #include @@ -144,5 +143,4 @@ extern int isa_dma_bridge_buggy; #define isa_dma_bridge_buggy (0) #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_DMA_H */ diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index 7661fb5d548a..2862d6d1cb64 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -90,7 +90,6 @@ typedef struct user_fpu_struct elf_fpregset_t; #endif #define ELF_ARCH EM_SH -#ifdef __KERNEL__ /* * This is used to ensure we don't load something for the wrong architecture. */ @@ -209,5 +208,4 @@ do { \ NEW_AUX_ENT(AT_L2_CACHESHAPE, l2_cache_shape); \ } while (0) -#endif /* __KERNEL__ */ #endif /* __ASM_SH_ELF_H */ diff --git a/arch/sh/include/asm/freq.h b/arch/sh/include/asm/freq.h index 18133bf83738..87c23621b7ae 100644 --- a/arch/sh/include/asm/freq.h +++ b/arch/sh/include/asm/freq.h @@ -6,9 +6,7 @@ */ #ifndef __ASM_SH_FREQ_H #define __ASM_SH_FREQ_H -#ifdef __KERNEL__ #include -#endif /* __KERNEL__ */ #endif /* __ASM_SH_FREQ_H */ diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index b39cda09fb95..b70f3fce6ed7 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_FUTEX_H #define __ASM_SH_FUTEX_H -#ifdef __KERNEL__ - #include #include #include @@ -71,5 +69,4 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, return ret; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_FUTEX_H */ diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 26f0f9b4658b..1fd06ef6a194 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -20,7 +20,6 @@ #include #include -#ifdef __KERNEL__ #define __IO_PREFIX generic #include #include @@ -380,6 +379,4 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } int valid_phys_addr_range(phys_addr_t addr, size_t size); int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_IO_H */ diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h index 48e67d544d53..f664e51e8a15 100644 --- a/arch/sh/include/asm/mmu_context.h +++ b/arch/sh/include/asm/mmu_context.h @@ -8,7 +8,6 @@ #ifndef __ASM_SH_MMU_CONTEXT_H #define __ASM_SH_MMU_CONTEXT_H -#ifdef __KERNEL__ #include #include #include @@ -177,5 +176,4 @@ static inline void disable_mmu(void) #define disable_mmu() do { } while (0) #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_MMU_CONTEXT_H */ diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h index cbaee1d1b673..6552a088dc97 100644 --- a/arch/sh/include/asm/mmzone.h +++ b/arch/sh/include/asm/mmzone.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_MMZONE_H #define __ASM_SH_MMZONE_H -#ifdef __KERNEL__ - #ifdef CONFIG_NEED_MULTIPLE_NODES #include @@ -44,5 +42,4 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, /* arch/sh/mm/init.c */ void __init allocate_pgdat(unsigned int nid); -#endif /* __KERNEL__ */ #endif /* __ASM_SH_MMZONE_H */ diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 10a36b1cf2ea..ad22e88c6657 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_PCI_H #define __ASM_SH_PCI_H -#ifdef __KERNEL__ - /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -96,6 +94,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? 15 : 14; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_PCI_H */ - diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index d44409413418..aa92cc933889 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -8,7 +8,6 @@ #ifndef __ASM_SH_PROCESSOR_32_H #define __ASM_SH_PROCESSOR_32_H -#ifdef __KERNEL__ #include #include @@ -203,5 +202,4 @@ static inline void prefetchw(const void *x) } #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_PROCESSOR_32_H */ diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h index 4eb899751e45..ed287c4980bc 100644 --- a/arch/sh/include/asm/sparsemem.h +++ b/arch/sh/include/asm/sparsemem.h @@ -2,7 +2,6 @@ #ifndef __ASM_SH_SPARSEMEM_H #define __ASM_SH_SPARSEMEM_H -#ifdef __KERNEL__ /* * SECTION_SIZE_BITS 2^N: how big each section will be * MAX_PHYSADDR_BITS 2^N: how much physical address space we have @@ -12,6 +11,4 @@ #define MAX_PHYSADDR_BITS 32 #define MAX_PHYSMEM_BITS 32 -#endif - #endif /* __ASM_SH_SPARSEMEM_H */ diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h index 3558b1d7123e..778589e4a99d 100644 --- a/arch/sh/include/asm/string_32.h +++ b/arch/sh/include/asm/string_32.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_STRING_H #define __ASM_SH_STRING_H -#ifdef __KERNEL__ - /* * Copyright (C) 1999 Niibe Yutaka * But consider these trivial functions to be public domain. @@ -127,6 +125,4 @@ extern void *memchr(const void *__s, int __c, size_t __n); #define __HAVE_ARCH_STRLEN extern size_t strlen(const char *); -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_STRING_H */ diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h index 9f9faf63b48c..5c555b864fe0 100644 --- a/arch/sh/include/asm/syscalls_32.h +++ b/arch/sh/include/asm/syscalls_32.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_SYSCALLS_32_H #define __ASM_SH_SYSCALLS_32_H -#ifdef __KERNEL__ - #include #include #include @@ -26,5 +24,4 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, unsigned long thread_info_flags); -#endif /* __KERNEL__ */ #endif /* __ASM_SH_SYSCALLS_32_H */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 6404be69d5fa..243ea5150aa0 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -10,8 +10,6 @@ * Copyright (C) 2002 David Howells (dhowells@redhat.com) * - Incorporating suggestions made by Linus Torvalds and Dave Miller */ -#ifdef __KERNEL__ - #include /* @@ -170,7 +168,4 @@ static inline unsigned int get_thread_fault_code(void) } #endif /* !__ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_THREAD_INFO_H */ diff --git a/arch/sh/include/asm/watchdog.h b/arch/sh/include/asm/watchdog.h index cecd0fc507f9..b9ca4c99f046 100644 --- a/arch/sh/include/asm/watchdog.h +++ b/arch/sh/include/asm/watchdog.h @@ -8,7 +8,6 @@ */ #ifndef __ASM_SH_WATCHDOG_H #define __ASM_SH_WATCHDOG_H -#ifdef __KERNEL__ #include #include @@ -157,5 +156,4 @@ static inline void sh_wdt_write_csr(__u8 val) __raw_writew((WTCSR_HIGH << 8) | (__u16)val, WTCSR); } #endif /* CONFIG_CPU_SUBTYPE_SH7785 || CONFIG_CPU_SUBTYPE_SH7780 */ -#endif /* __KERNEL__ */ #endif /* __ASM_SH_WATCHDOG_H */ -- cgit From 3eef6b74d9fecf18b03db26584cc66928972a60b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:50 +0200 Subject: sh: move ioremap_fixed details out of ioremap_fixed is an internal implementation detail and should not be exposed to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/include/asm/io.h | 16 ---------------- arch/sh/mm/init.c | 1 + arch/sh/mm/ioremap.c | 1 + arch/sh/mm/ioremap.h | 23 +++++++++++++++++++++++ arch/sh/mm/ioremap_fixed.c | 1 + 5 files changed, 26 insertions(+), 16 deletions(-) create mode 100644 arch/sh/mm/ioremap.h diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 1fd06ef6a194..357a7e0c86d6 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -346,22 +346,6 @@ ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags) } #endif -#ifdef CONFIG_IOREMAP_FIXED -extern void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t); -extern int iounmap_fixed(void __iomem *); -extern void ioremap_fixed_init(void); -#else -static inline void __iomem * -ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot) -{ - BUG(); - return NULL; -} - -static inline void ioremap_fixed_init(void) { } -static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } -#endif - #define ioremap_uc ioremap /* diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index a70ba0fdd0b3..da7ea48f9439 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -28,6 +28,7 @@ #include #include #include +#include "ioremap.h" pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index f6d02246d665..d9ec85b6bb21 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -24,6 +24,7 @@ #include #include #include +#include "ioremap.h" /* * Remap an arbitrary physical address space into the kernel virtual diff --git a/arch/sh/mm/ioremap.h b/arch/sh/mm/ioremap.h new file mode 100644 index 000000000000..f2544e721a35 --- /dev/null +++ b/arch/sh/mm/ioremap.h @@ -0,0 +1,23 @@ +#ifndef _SH_MM_IORMEMAP_H +#define _SH_MM_IORMEMAP_H 1 + +#ifdef CONFIG_IOREMAP_FIXED +void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t); +int iounmap_fixed(void __iomem *); +void ioremap_fixed_init(void); +#else +static inline void __iomem * +ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot) +{ + BUG(); + return NULL; +} +static inline void ioremap_fixed_init(void) +{ +} +static inline int iounmap_fixed(void __iomem *addr) +{ + return -EINVAL; +} +#endif /* CONFIG_IOREMAP_FIXED */ +#endif /* _SH_MM_IORMEMAP_H */ diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c index 07e744d75fa0..1914b79d1c53 100644 --- a/arch/sh/mm/ioremap_fixed.c +++ b/arch/sh/mm/ioremap_fixed.c @@ -24,6 +24,7 @@ #include #include #include +#include "ioremap.h" struct ioremap_map { void __iomem *addr; -- cgit From 13f1fc870dd747131f21ba6f20dc0d81cc5d4474 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:51 +0200 Subject: sh: move the ioremap implementation out of line Move the internal implementation details of ioremap out of line, no need to expose any of this to drivers for a slow path API. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/include/asm/io.h | 101 +++++++---------------------------------------- arch/sh/mm/ioremap.c | 53 +++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 86 deletions(-) diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 357a7e0c86d6..da08a61a2f7d 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -242,109 +242,38 @@ unsigned long long poke_real_address_q(unsigned long long addr, #define phys_to_virt(address) (__va(address)) #endif -/* - * On 32-bit SH, we traditionally have the whole physical address space - * mapped at all times (as MIPS does), so "ioremap()" and "iounmap()" do - * not need to do anything but place the address in the proper segment. - * This is true for P1 and P2 addresses, as well as some P3 ones. - * However, most of the P3 addresses and newer cores using extended - * addressing need to map through page tables, so the ioremap() - * implementation becomes a bit more complicated. - * - * See arch/sh/mm/ioremap.c for additional notes on this. - * - * We cheat a bit and always return uncachable areas until we've fixed - * the drivers to handle caching properly. - * - * On the SH-5 the concept of segmentation in the 1:1 PXSEG sense simply - * doesn't exist, so everything must go through page tables. - */ #ifdef CONFIG_MMU +void iounmap(void __iomem *addr); void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size, pgprot_t prot, void *caller); -void iounmap(void __iomem *addr); - -static inline void __iomem * -__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ - return __ioremap_caller(offset, size, prot, __builtin_return_address(0)); -} - -static inline void __iomem * -__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ -#ifdef CONFIG_29BIT - phys_addr_t last_addr = offset + size - 1; - - /* - * For P1 and P2 space this is trivial, as everything is already - * mapped. Uncached access for P1 addresses are done through P2. - * In the P3 case or for addresses outside of the 29-bit space, - * mapping must be done by the PMB or by using page tables. - */ - if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) { - u64 flags = pgprot_val(prot); - - /* - * Anything using the legacy PTEA space attributes needs - * to be kicked down to page table mappings. - */ - if (unlikely(flags & _PAGE_PCC_MASK)) - return NULL; - if (unlikely(flags & _PAGE_CACHABLE)) - return (void __iomem *)P1SEGADDR(offset); - - return (void __iomem *)P2SEGADDR(offset); - } - - /* P4 above the store queues are always mapped. */ - if (unlikely(offset >= P3_ADDR_MAX)) - return (void __iomem *)P4SEGADDR(offset); -#endif - - return NULL; -} - -static inline void __iomem * -__ioremap_mode(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ - void __iomem *ret; - - ret = __ioremap_trapped(offset, size); - if (ret) - return ret; - - ret = __ioremap_29bit(offset, size, prot); - if (ret) - return ret; - - return __ioremap(offset, size, prot); -} -#else -#define __ioremap(offset, size, prot) ((void __iomem *)(offset)) -#define __ioremap_mode(offset, size, prot) ((void __iomem *)(offset)) -static inline void iounmap(void __iomem *addr) {} -#endif /* CONFIG_MMU */ static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size) { - return __ioremap_mode(offset, size, PAGE_KERNEL_NOCACHE); + return __ioremap_caller(offset, size, PAGE_KERNEL_NOCACHE, + __builtin_return_address(0)); } static inline void __iomem * ioremap_cache(phys_addr_t offset, unsigned long size) { - return __ioremap_mode(offset, size, PAGE_KERNEL); + return __ioremap_caller(offset, size, PAGE_KERNEL, + __builtin_return_address(0)); } #define ioremap_cache ioremap_cache #ifdef CONFIG_HAVE_IOREMAP_PROT -static inline void __iomem * -ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags) +static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long flags) { - return __ioremap_mode(offset, size, __pgprot(flags)); + return __ioremap_caller(offset, size, __pgprot(flags), + __builtin_return_address(0)); } -#endif +#endif /* CONFIG_HAVE_IOREMAP_PROT */ + +#else /* CONFIG_MMU */ +#define iounmap(addr) do { } while (0) +#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +#endif /* CONFIG_MMU */ #define ioremap_uc ioremap diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index d9ec85b6bb21..69e55939e48a 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -26,6 +26,51 @@ #include #include "ioremap.h" +/* + * On 32-bit SH, we traditionally have the whole physical address space mapped + * at all times (as MIPS does), so "ioremap()" and "iounmap()" do not need to do + * anything but place the address in the proper segment. This is true for P1 + * and P2 addresses, as well as some P3 ones. However, most of the P3 addresses + * and newer cores using extended addressing need to map through page tables, so + * the ioremap() implementation becomes a bit more complicated. + */ +#ifdef CONFIG_29BIT +static void __iomem * +__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) +{ + phys_addr_t last_addr = offset + size - 1; + + /* + * For P1 and P2 space this is trivial, as everything is already + * mapped. Uncached access for P1 addresses are done through P2. + * In the P3 case or for addresses outside of the 29-bit space, + * mapping must be done by the PMB or by using page tables. + */ + if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) { + u64 flags = pgprot_val(prot); + + /* + * Anything using the legacy PTEA space attributes needs + * to be kicked down to page table mappings. + */ + if (unlikely(flags & _PAGE_PCC_MASK)) + return NULL; + if (unlikely(flags & _PAGE_CACHABLE)) + return (void __iomem *)P1SEGADDR(offset); + + return (void __iomem *)P2SEGADDR(offset); + } + + /* P4 above the store queues are always mapped. */ + if (unlikely(offset >= P3_ADDR_MAX)) + return (void __iomem *)P4SEGADDR(offset); + + return NULL; +} +#else +#define __ioremap_29bit(offset, size, prot) NULL +#endif /* CONFIG_29BIT */ + /* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses @@ -43,6 +88,14 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size, unsigned long offset, last_addr, addr, orig_addr; void __iomem *mapped; + mapped = __ioremap_trapped(phys_addr, size); + if (mapped) + return mapped; + + mapped = __ioremap_29bit(phys_addr, size, pgprot); + if (mapped) + return mapped; + /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; if (!size || last_addr < phys_addr) -- cgit From 08732d1226edb7a0033d08286acada2b4e800c78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:52 +0200 Subject: sh: don't include in No need to expose the details of trapped I/O to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/include/asm/io.h | 1 - arch/sh/kernel/ioport.c | 1 + arch/sh/mm/ioremap.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index da08a61a2f7d..873f1399fa59 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -22,7 +22,6 @@ #define __IO_PREFIX generic #include -#include #include #include diff --git a/arch/sh/kernel/ioport.c b/arch/sh/kernel/ioport.c index 34f8cdbbcf0b..f39446a658bd 100644 --- a/arch/sh/kernel/ioport.c +++ b/arch/sh/kernel/ioport.c @@ -7,6 +7,7 @@ */ #include #include +#include unsigned long sh_io_port_base __read_mostly = -1; EXPORT_SYMBOL(sh_io_port_base); diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 69e55939e48a..21342581144d 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit From bc0f46b1caff2a42ce6e54b31504eb9ecc789f2a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:53 +0200 Subject: sh: unexport register_trapped_io and match_trapped_io_handler Both functions are only used by compiled in core code. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/kernel/io_trapped.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c index 037aab2708b7..004ad0130b10 100644 --- a/arch/sh/kernel/io_trapped.c +++ b/arch/sh/kernel/io_trapped.c @@ -102,7 +102,6 @@ int register_trapped_io(struct trapped_io *tiop) pr_warn("unable to install trapped io filter\n"); return -1; } -EXPORT_SYMBOL_GPL(register_trapped_io); void __iomem *match_trapped_io_handler(struct list_head *list, unsigned long offset, @@ -131,7 +130,6 @@ void __iomem *match_trapped_io_handler(struct list_head *list, spin_unlock_irqrestore(&trapped_lock, flags); return NULL; } -EXPORT_SYMBOL_GPL(match_trapped_io_handler); static struct trapped_io *lookup_tiop(unsigned long address) { -- cgit From 846f9e1fb9b9a2c4eecefa2fdbfb51e975a7d532 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:54 +0200 Subject: dma-mapping: consolidate the NO_DMA definition in kernel/dma/Kconfig Have a single definition that architetures can select. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/m68k/Kconfig | 4 +--- arch/m68k/Kconfig.machine | 1 + arch/um/Kconfig | 4 +--- kernel/dma/Kconfig | 3 +++ 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 6ad6cdac74b3..8e488369a7e5 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -17,6 +17,7 @@ config M68K select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 + select NO_DMA if !MMU && !COLDFIRE select HAVE_UID16 select VIRT_TO_BUS select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS @@ -60,9 +61,6 @@ config TIME_LOW_RES config NO_IOPORT_MAP def_bool y -config NO_DMA - def_bool (MMU && SUN3) || (!MMU && !COLDFIRE) - config ZONE_DMA bool default y diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index a82651d58af4..17e8c3a292d7 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -126,6 +126,7 @@ config SUN3 depends on MMU depends on !MMU_MOTOROLA select MMU_SUN3 if MMU + select NO_DMA select M68020 help This option enables support for the Sun 3 series of workstations diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 9318dc6d1a0c..32c1d1945033 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -15,6 +15,7 @@ config UML select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE select HAVE_COPY_THREAD_TLS + select NO_DMA select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS @@ -168,9 +169,6 @@ config MMAPPER This driver allows a host file to be used as emulated IO memory inside UML. -config NO_DMA - def_bool y - config PGTABLE_LEVELS int default 3 if 3_LEVEL_PGTABLES diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 1da3f44f2565..57533d07676f 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only +config NO_DMA + bool + config HAS_DMA bool depends on !NO_DMA -- cgit From cd57d07b1e4e08f95a27b59253b5c8a46abf4f29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:55 +0200 Subject: sh: don't allow non-coherent DMA for NOMMU The code handling non-coherent DMA depends on being able to remap code as non-cached. But that can't be done without an MMU, so using this option on NOMMU builds is broken. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1bf23e272483..7082a4a499fd 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -61,6 +61,7 @@ config SUPERH select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA select NEED_SG_DMA_LENGTH + select NO_DMA if !MMU && !DMA_COHERENT select NO_GENERIC_PCI_IOPORT_MAP if PCI select OLD_SIGACTION select OLD_SIGSUSPEND @@ -135,7 +136,7 @@ config DMA_COHERENT bool config DMA_NONCOHERENT - def_bool !DMA_COHERENT + def_bool !NO_DMA && !DMA_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE config PGTABLE_LEVELS -- cgit From 6dfdf673ccb24dccc95b342235cac3e585c5af34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 14:18:56 +0200 Subject: sh: use the generic dma coherent remap allocator This switches to using common code for the DMA allocations, including potential use of the CMA allocator if configured. Switching to the generic code enables DMA allocations from atomic context, which is required by the DMA API documentation, and also adds various other minor features drivers start relying upon. It also makes sure we have on tested code base for all architectures that require uncached pte bits for coherent DMA allocations. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/Kconfig | 2 ++ arch/sh/kernel/dma-coherent.c | 51 ++----------------------------------------- 2 files changed, 4 insertions(+), 49 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 7082a4a499fd..c315cc3d7017 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -137,7 +137,9 @@ config DMA_COHERENT config DMA_NONCOHERENT def_bool !NO_DMA && !DMA_COHERENT + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select DMA_DIRECT_REMAP config PGTABLE_LEVELS default 3 if X2TLB diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index d4811691b93c..cd46a9825e3c 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -3,60 +3,13 @@ * Copyright (C) 2004 - 2007 Paul Mundt */ #include -#include #include -#include #include #include -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret, *ret_nocache; - int order = get_order(size); - - gfp |= __GFP_ZERO; - - ret = (void *)__get_free_pages(gfp, order); - if (!ret) - return NULL; - - /* - * Pages from the page allocator may have data present in - * cache. So flush the cache before using uncached memory. - */ - arch_sync_dma_for_device(virt_to_phys(ret), size, - DMA_BIDIRECTIONAL); - - ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size); - if (!ret_nocache) { - free_pages((unsigned long)ret, order); - return NULL; - } - - split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order); - - *dma_handle = virt_to_phys(ret); - if (!WARN_ON(!dev)) - *dma_handle -= PFN_PHYS(dev->dma_pfn_offset); - - return ret_nocache; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - int order = get_order(size); - unsigned long pfn = (dma_handle >> PAGE_SHIFT); - int k; - - if (!WARN_ON(!dev)) - pfn += dev->dma_pfn_offset; - - for (k = 0; k < (1 << order); k++) - __free_pages(pfn_to_page(pfn + k), 0); - - iounmap(vaddr); + __flush_purge_region(page_address(page), size); } void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, -- cgit From e1cc9d8d596e233538a3ecaffea665185751db35 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jun 2020 01:43:23 +0200 Subject: sh: switch to copy_thread_tls() Use the copy_thread_tls() calling convention which passes tls through a register. This is required so we can remove the copy_thread{_tls}() split and remove the HAVE_COPY_THREAD_TLS macro. Cc: Rich Felker Cc: Yoshinori Sato Cc: linux-sh@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Rich Felker --- arch/sh/Kconfig | 1 + arch/sh/kernel/process_32.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c315cc3d7017..f86326b35b51 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -29,6 +29,7 @@ config SUPERH select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 049f11a5f9ab..aba16af59368 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -111,8 +111,8 @@ EXPORT_SYMBOL(dump_fpu); asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, unsigned long tls) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; @@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, ti->addr_limit = USER_DS; if (clone_flags & CLONE_SETTLS) - childregs->gbr = childregs->regs[0]; + childregs->gbr = tls; childregs->regs[0] = 0; /* Set return value for child */ p->thread.pc = (unsigned long) ret_from_fork; -- cgit From 9d2ec8f68e9dfbdd9ae9bdc79cc4deedc0ad4e6b Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Thu, 23 Jul 2020 01:13:20 +0200 Subject: sh: Rearrange blocks in entry-common.S This avoids out-of-range jumps that get auto-replaced by the assembler and prepares for the changes needed to implement SECCOMP_FILTER cleanly. Signed-off-by: Michael Karcher Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/kernel/entry-common.S | 57 ++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index 9bac5bbb67f3..c4d88d61890d 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -178,34 +178,6 @@ syscall_exit_work: bra resume_userspace nop - .align 2 -syscall_trace_entry: - ! Yes it is traced. - mov r15, r4 - mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies - jsr @r11 ! superior (will chomp R[0-7]) - nop - mov.l r0, @(OFF_R0,r15) ! Save return value - ! Reload R0-R4 from kernel stack, where the - ! parent may have modified them using - ! ptrace(POKEUSR). (Note that R0-R2 are - ! reloaded from the kernel stack by syscall_call - ! below, so don't need to be reloaded here.) - ! This allows the parent to rewrite system calls - ! and args on the fly. - mov.l @(OFF_R4,r15), r4 ! arg0 - mov.l @(OFF_R5,r15), r5 - mov.l @(OFF_R6,r15), r6 - mov.l @(OFF_R7,r15), r7 ! arg3 - mov.l @(OFF_R3,r15), r3 ! syscall_nr - ! - mov.l 6f, r10 ! Number of syscalls - cmp/hs r10, r3 - bf syscall_call - mov #-ENOSYS, r0 - bra syscall_exit - mov.l r0, @(OFF_R0,r15) ! Return value - __restore_all: mov #OFF_SR, r0 mov.l @(r0,r15), r0 ! get status register @@ -388,6 +360,35 @@ syscall_exit: bf syscall_exit_work bra __restore_all nop + + .align 2 +syscall_trace_entry: + ! Yes it is traced. + mov r15, r4 + mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies + jsr @r11 ! superior (will chomp R[0-7]) + nop + mov.l r0, @(OFF_R0,r15) ! Save return value + ! Reload R0-R4 from kernel stack, where the + ! parent may have modified them using + ! ptrace(POKEUSR). (Note that R0-R2 are + ! reloaded from the kernel stack by syscall_call + ! below, so don't need to be reloaded here.) + ! This allows the parent to rewrite system calls + ! and args on the fly. + mov.l @(OFF_R4,r15), r4 ! arg0 + mov.l @(OFF_R5,r15), r5 + mov.l @(OFF_R6,r15), r6 + mov.l @(OFF_R7,r15), r7 ! arg3 + mov.l @(OFF_R3,r15), r3 ! syscall_nr + ! + mov.l 6f, r10 ! Number of syscalls + cmp/hs r10, r3 + bf syscall_call + mov #-ENOSYS, r0 + bra syscall_exit + mov.l r0, @(OFF_R0,r15) ! Return value + .align 2 #if !defined(CONFIG_CPU_SH2) 1: .long TRA -- cgit From 0bb605c2c7f2b4b314b91510810b226de7f34fa1 Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Thu, 23 Jul 2020 01:13:21 +0200 Subject: sh: Add SECCOMP_FILTER Port sh to use the new SECCOMP_FILTER code. Signed-off-by: Michael Karcher Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/Kconfig | 1 + arch/sh/kernel/entry-common.S | 2 ++ arch/sh/kernel/ptrace_32.c | 5 +++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 +++++++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f86326b35b51..d20927128fce 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -28,6 +28,7 @@ config SUPERH select GUP_GET_PTE_LOW_HIGH if X2TLB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index c4d88d61890d..ad963104d22d 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -368,6 +368,8 @@ syscall_trace_entry: mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies jsr @r11 ! superior (will chomp R[0-7]) nop + cmp/eq #-1, r0 + bt syscall_exit mov.l r0, @(OFF_R0,r15) ! Save return value ! Reload R0-R4 from kernel stack, where the ! parent may have modified them using diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 64bfb714943e..25ccfbd02bfa 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -485,8 +485,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; - secure_computing_strict(regs->regs[0]); - if (test_thread_flag(TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) /* @@ -496,6 +494,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; + if (secure_computing() == -1) + return -1; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[0]); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 252140a52553..6eb21685c88f 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -122,6 +122,8 @@ struct seccomp_data { # define __NR_seccomp 358 # elif defined(__s390__) # define __NR_seccomp 348 +# elif defined(__sh__) +# define __NR_seccomp 372 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff @@ -1622,6 +1624,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_SYSCALL_NUM regs[4] # define SYSCALL_RET regs[2] # define SYSCALL_NUM_RET_SHARE_REG +#elif defined(__sh__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM gpr[3] +# define SYSCALL_RET gpr[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1693,7 +1699,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret) {} #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) || defined(__riscv) + defined(__s390__) || defined(__hppa__) || defined(__riscv) || defined(__sh__) { regs.SYSCALL_NUM = syscall; } -- cgit From 03dd061f0d203c3479791490d6b9359b2eaf9fec Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Thu, 23 Jul 2020 01:13:22 +0200 Subject: sh: bring syscall_set_return_value in line with other architectures Other architectures expect that syscall_set_return_value gets an already negative value as error. That's also what kernel/seccomp.c provides. Signed-off-by: Michael Karcher Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/include/asm/syscall_32.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 0b5b8e75edac..cb51a7528384 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -40,10 +40,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) - regs->regs[0] = -error; - else - regs->regs[0] = val; + regs->regs[0] = (long) error ?: val; } static inline void syscall_get_arguments(struct task_struct *task, -- cgit From 0c64a0dce51faa9c706fdf1f957d6f19878f4b81 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Aug 2020 14:42:45 +0200 Subject: sh: landisk: Add missing initialization of sh_io_port_base The Landisk setup code maps the CF IDE area using ioremap_prot(), and passes the resulting virtual addresses to the pata_platform driver, disguising them as I/O port addresses. Hence the pata_platform driver translates them again using ioport_map(). As CONFIG_GENERIC_IOMAP=n, and CONFIG_HAS_IOPORT_MAP=y, the SuperH-specific mapping code in arch/sh/kernel/ioport.c translates I/O port addresses to virtual addresses by adding sh_io_port_base, which defaults to -1, thus breaking the assumption of an identity mapping. Fix this by setting sh_io_port_base to zero. Fixes: 37b7a97884ba64bf ("sh: machvec IO death.") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- arch/sh/boards/mach-landisk/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 16b4d8b0bb85..2c44b94f82fb 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -82,6 +82,9 @@ device_initcall(landisk_devices_setup); static void __init landisk_setup(char **cmdline_p) { + /* I/O port identity mapping */ + __set_io_port_base(0); + /* LED ON */ __raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED); -- cgit From 9922c1deff915c2b67ec79ea6b87c289772c6492 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 14 Aug 2020 17:30:04 -0700 Subject: asm-generic: pgalloc.h: use correct #ifdef to enable pud_alloc_one() The #ifdef statement that guards the generic version of pud_alloc_one() by mistake used __HAVE_ARCH_PUD_FREE instead of __HAVE_ARCH_PUD_ALLOC_ONE. Fix it. Fixes: d9e8b929670b ("asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()") Reported-by: kernel test robot Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200812191415.GE163101@linux.ibm.com Signed-off-by: Linus Torvalds --- include/asm-generic/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 6f44810921aa..02932efad3ab 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -147,7 +147,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 -#ifndef __HAVE_ARCH_PUD_FREE +#ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table * @mm: the mm_struct of the current context -- cgit From a8a4b7aeaf841311cb13ff0f6c4710c7a00e68d4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 14 Aug 2020 17:30:07 -0700 Subject: Revert "mm/vmstat.c: do not show lowmem reserve protection information of empty zone" This reverts commit 26e7deadaae175. Sonny reported that one of their tests started failing on the latest kernel on their Chrome OS platform. The root cause is that the above commit removed the protection line of empty zone, while the parser used in the test relies on the protection line to mark the end of each zone. Let's revert it to avoid breaking userspace testing or applications. Fixes: 26e7deadaae175 ("mm/vmstat.c: do not show lowmem reserve protection information of empty zone)" Reported-by: Sonny Rao Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: David Rientjes Cc: [5.8.x] Link: http://lkml.kernel.org/r/20200811075412.12872-1-bhe@redhat.com Signed-off-by: Linus Torvalds --- mm/vmstat.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 727a26d1ec1d..e670f910cd2f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1642,12 +1642,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone->present_pages, zone_managed_pages(zone)); - /* If unpopulated, no other information is useful */ - if (!populated_zone(zone)) { - seq_putc(m, '\n'); - return; - } - seq_printf(m, "\n protection: (%ld", zone->lowmem_reserve[0]); @@ -1655,6 +1649,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_putc(m, ')'); + /* If unpopulated, no other information is useful */ + if (!populated_zone(zone)) { + seq_putc(m, '\n'); + return; + } + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) seq_printf(m, "\n %-12s %lu", zone_stat_name(i), zone_page_state(zone, i)); -- cgit From b1a3e75e466d96383508634f3d2e477ac45f2fc1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Aug 2020 17:30:10 -0700 Subject: lz4: fix kernel decompression speed This patch replaces all memcpy() calls with LZ4_memcpy() which calls __builtin_memcpy() so the compiler can inline it. LZ4 relies heavily on memcpy() with a constant size being inlined. In x86 and i386 pre-boot environments memcpy() cannot be inlined because memcpy() doesn't get defined as __builtin_memcpy(). An equivalent patch has been applied upstream so that the next import won't lose this change [1]. I've measured the kernel decompression speed using QEMU before and after this patch for the x86_64 and i386 architectures. The speed-up is about 10x as shown below. Code Arch Kernel Size Time Speed v5.8 x86_64 11504832 B 148 ms 79 MB/s patch x86_64 11503872 B 13 ms 885 MB/s v5.8 i386 9621216 B 91 ms 106 MB/s patch i386 9620224 B 10 ms 962 MB/s I also measured the time to decompress the initramfs on x86_64, i386, and arm. All three show the same decompression speed before and after, as expected. [1] https://github.com/lz4/lz4/pull/890 Signed-off-by: Nick Terrell Signed-off-by: Andrew Morton Cc: Yann Collet Cc: Gao Xiang Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Arvind Sankar Link: http://lkml.kernel.org/r/20200803194022.2966806-1-nickrterrell@gmail.com Signed-off-by: Linus Torvalds --- lib/lz4/lz4_compress.c | 4 ++-- lib/lz4/lz4_decompress.c | 18 +++++++++--------- lib/lz4/lz4defs.h | 10 ++++++++++ lib/lz4/lz4hc_compress.c | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index cc7b6d4cc7c7..90bb67994688 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -446,7 +446,7 @@ _last_literals: *op++ = (BYTE)(lastRun << ML_BITS); } - memcpy(op, anchor, lastRun); + LZ4_memcpy(op, anchor, lastRun); op += lastRun; } @@ -708,7 +708,7 @@ _last_literals: } else { *op++ = (BYTE)(lastRunSize<= 8) && (dict == withPrefix64k || match >= lowPrefix)) { /* Copy the match. */ - memcpy(op + 0, match + 0, 8); - memcpy(op + 8, match + 8, 8); - memcpy(op + 16, match + 16, 2); + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op + 16, match + 16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; @@ -263,7 +263,7 @@ static FORCE_INLINE int LZ4_decompress_generic( } } - memcpy(op, ip, length); + LZ4_memcpy(op, ip, length); ip += length; op += length; @@ -350,7 +350,7 @@ _copy_match: size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ @@ -360,7 +360,7 @@ _copy_match: while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } @@ -386,7 +386,7 @@ _copy_match: while (op < copyEnd) *op++ = *match++; } else { - memcpy(op, match, mlen); + LZ4_memcpy(op, match, mlen); } op = copyEnd; if (op == oend) @@ -400,7 +400,7 @@ _copy_match: op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; - memcpy(op + 4, match, 4); + LZ4_memcpy(op + 4, match, 4); match -= dec64table[offset]; } else { LZ4_copy8(op, match); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 1a7fa9d9170f..c91dd96ef629 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -137,6 +137,16 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) return put_unaligned_le16(value, memPtr); } +/* + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so apply its specialized memcpy() inlining logic. When + * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() + * as-if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) + static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index 1b61d874e337..e7ac8694b797 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -570,7 +570,7 @@ _Search3: *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun< Date: Fri, 14 Aug 2020 17:30:14 -0700 Subject: exec: restore EACCES of S_ISDIR execve() Patch series "Fix S_ISDIR execve() errno". Fix an errno change for execve() of directories, noticed by Marc Zyngier. Along with the fix, include a regression test to avoid seeing this return in the future. This patch (of 2): The return code for attempting to execute a directory has always been EACCES. Adjust the S_ISDIR exec test to reflect the old errno instead of the general EISDIR for other kinds of "open" attempts on directories. Fixes: 633fb6ac3980 ("exec: move S_ISREG() check earlier") Reported-by: Marc Zyngier Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Tested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200813231723.2725102-2-keescook@chromium.org Link: https://lore.kernel.org/lkml/20200813151305.6191993b@why Signed-off-by: Linus Torvalds --- fs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 2112e578dccc..e99e2a9da0f7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2849,8 +2849,10 @@ static int may_open(const struct path *path, int acc_mode, int flag) case S_IFLNK: return -ELOOP; case S_IFDIR: - if (acc_mode & (MAY_WRITE | MAY_EXEC)) + if (acc_mode & MAY_WRITE) return -EISDIR; + if (acc_mode & MAY_EXEC) + return -EACCES; break; case S_IFBLK: case S_IFCHR: -- cgit From 0f71241a8e32baf9ac290cf915bc9b3ff265f7d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Aug 2020 17:30:17 -0700 Subject: selftests/exec: add file type errno tests Make sure execve() returns the expected errno values for non-regular files. Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Marc Zyngier Link: http://lkml.kernel.org/r/20200813231723.2725102-3-keescook@chromium.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/exec/.gitignore | 1 + tools/testing/selftests/exec/Makefile | 5 +- tools/testing/selftests/exec/non-regular.c | 196 +++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/exec/non-regular.c diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 94b02a18f230..344a99c6da1b 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -10,3 +10,4 @@ execveat.denatured /recursion-depth xxxxxxxx* pipe +S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 4453b8f8def3..0a13b110c1e6 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script +TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test @@ -11,7 +11,8 @@ TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth -EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* +EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ + $(OUTPUT)/S_I*.test include ../lib.mk diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c new file mode 100644 index 000000000000..cd3a34aca93e --- /dev/null +++ b/tools/testing/selftests/exec/non-regular.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +/* Remove a file, ignoring the result if it didn't exist. */ +void rm(struct __test_metadata *_metadata, const char *pathname, + int is_dir) +{ + int rc; + + if (is_dir) + rc = rmdir(pathname); + else + rc = unlink(pathname); + + if (rc < 0) { + ASSERT_EQ(errno, ENOENT) { + TH_LOG("Not ENOENT: %s", pathname); + } + } else { + ASSERT_EQ(rc, 0) { + TH_LOG("Failed to remove: %s", pathname); + } + } +} + +FIXTURE(file) { + char *pathname; + int is_dir; +}; + +FIXTURE_VARIANT(file) +{ + const char *name; + int expected; + int is_dir; + void (*setup)(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant); + int major, minor, mode; /* for mknod() */ +}; + +void setup_link(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + const char * const paths[] = { + "/bin/true", + "/usr/bin/true", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(paths); i++) { + if (access(paths[i], X_OK) == 0) { + ASSERT_EQ(symlink(paths[i], self->pathname), 0); + return; + } + } + ASSERT_EQ(1, 0) { + TH_LOG("Could not find viable 'true' binary"); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFLNK) +{ + .name = "S_IFLNK", + .expected = ELOOP, + .setup = setup_link, +}; + +void setup_dir(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkdir(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFDIR) +{ + .name = "S_IFDIR", + .is_dir = 1, + .expected = EACCES, + .setup = setup_dir, +}; + +void setup_node(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + dev_t dev; + int rc; + + dev = makedev(variant->major, variant->minor); + rc = mknod(self->pathname, 0755 | variant->mode, dev); + ASSERT_EQ(rc, 0) { + if (errno == EPERM) + SKIP(return, "Please run as root; cannot mknod(%s)", + variant->name); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFBLK) +{ + .name = "S_IFBLK", + .expected = EACCES, + .setup = setup_node, + /* /dev/loop0 */ + .major = 7, + .minor = 0, + .mode = S_IFBLK, +}; + +FIXTURE_VARIANT_ADD(file, S_IFCHR) +{ + .name = "S_IFCHR", + .expected = EACCES, + .setup = setup_node, + /* /dev/zero */ + .major = 1, + .minor = 5, + .mode = S_IFCHR, +}; + +void setup_fifo(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkfifo(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFIFO) +{ + .name = "S_IFIFO", + .expected = EACCES, + .setup = setup_fifo, +}; + +FIXTURE_SETUP(file) +{ + ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6); + self->is_dir = variant->is_dir; + + rm(_metadata, self->pathname, variant->is_dir); + variant->setup(_metadata, self, variant); +} + +FIXTURE_TEARDOWN(file) +{ + rm(_metadata, self->pathname, self->is_dir); +} + +TEST_F(file, exec_errno) +{ + char * const argv[2] = { (char * const)self->pathname, NULL }; + + EXPECT_LT(execv(argv[0], argv), 0); + EXPECT_EQ(errno, variant->expected); +} + +/* S_IFSOCK */ +FIXTURE(sock) +{ + int fd; +}; + +FIXTURE_SETUP(sock) +{ + self->fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_TEARDOWN(sock) +{ + if (self->fd >= 0) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(sock, exec_errno) +{ + char * const argv[2] = { " magic socket ", NULL }; + char * const envp[1] = { NULL }; + + EXPECT_LT(fexecve(self->fd, argv, envp), 0); + EXPECT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN -- cgit From 14a36a435343931296277f4a3c5adf4e29a8f46d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 14 Aug 2020 17:30:20 -0700 Subject: mailmap: add entry for Greg Kurz I had stopped using gkurz@linux.vnet.ibm.com a while back already but this email address was shutdown last June when I quit IBM. It's about time to map it to groug@kaod.org. Signed-off-by: Greg Kurz Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/159724692879.76040.4938578139173154028.stgit@bahia.lan Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 83d8d0158f3b..57fe0085b465 100644 --- a/.mailmap +++ b/.mailmap @@ -104,6 +104,7 @@ Gerald Schaefer Greg Kroah-Hartman Greg Kroah-Hartman Greg Kroah-Hartman +Greg Kurz Gregory CLEMENT Hanjun Guo Heiko Carstens -- cgit From 1378a5ee451a5e87d0d8dd6356a0b7844db231f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:23 -0700 Subject: mm: store compound_nr as well as compound_order Patch series "THP prep patches". These are some generic cleanups and improvements, which I would like merged into mmotm soon. The first one should be a performance improvement for all users of compound pages, and the others are aimed at getting code to compile away when CONFIG_TRANSPARENT_HUGEPAGE is disabled (ie small systems). Also better documented / less confusing than the current prefix mixture of compound, hpage and thp. This patch (of 7): This removes a few instructions from functions which need to know how many pages are in a compound page. The storage used is either page->mapping on 64-bit or page->index on 32-bit. Both of these are fine to overlay on tail pages. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-1-willy@infradead.org Link: http://lkml.kernel.org/r/20200629151959.15779-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ++++- include/linux/mm_types.h | 1 + mm/page_alloc.c | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e7602a3bcef1..10b1e6e5f885 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -922,12 +922,15 @@ static inline int compound_pincount(struct page *page) static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; + page[1].compound_nr = 1U << order; } /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { - return 1UL << compound_order(page); + if (!PageHead(page)) + return 1; + return page[1].compound_nr; } /* Returns the number of bytes in this potentially compound page. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0277fbab7c93..496c3ff97cce 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,6 +134,7 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + unsigned int compound_nr; /* 1 << compound_order */ }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b7d0ecf30b1..0e2bab486fea 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -666,8 +666,6 @@ void prep_compound_page(struct page *page, unsigned int order) int i; int nr_pages = 1 << order; - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); - set_compound_order(page, order); __SetPageHead(page); for (i = 1; i < nr_pages; i++) { struct page *p = page + i; @@ -675,6 +673,9 @@ void prep_compound_page(struct page *page, unsigned int order) p->mapping = TAIL_MAPPING; set_compound_head(p, page); } + + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0); -- cgit From 419015675fef6c4b28689bd2ace559564c2e106c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:26 -0700 Subject: mm: move page-flags include to top of file Give up on the notion that we can remove page-flags.h from mm.h. There are currently 14 inline functions which use a PageFoo function. Also, two of the files directly included by mm.h include page-flags.h themselves, and there are probably more indirect inclusions. So just include it at the top like any other header file. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 10b1e6e5f885..fd0cd4e93029 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -668,11 +669,6 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; -/* - * FIXME: take this include out, include page-flags.h in - * files which need it (119 of them) - */ -#include #include /* -- cgit From 6ffbb45826f5d9ae09aa60cd88594b7816c96190 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:30 -0700 Subject: mm: add thp_order This function returns the order of a transparent huge page. It compiles to 0 if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 467302056e17..9521cfdf18ca 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -258,6 +258,19 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, else return NULL; } + +/** + * thp_order - Order of a transparent huge page. + * @page: Head page of a transparent huge page. + */ +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) + return HPAGE_PMD_ORDER; + return 0; +} + static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -317,6 +330,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return 0; +} + static inline int hpage_nr_pages(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); -- cgit From af3bbc12df80e8c279b94c752b6edca29841f4f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:33 -0700 Subject: mm: add thp_size This function returns the number of bytes in a THP. It is like page_size(), but compiles to just PAGE_SIZE if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-5-willy@infradead.org Signed-off-by: Linus Torvalds --- drivers/nvdimm/btt.c | 4 +--- drivers/nvdimm/pmem.c | 6 ++---- include/linux/huge_mm.h | 11 +++++++++++ mm/internal.h | 2 +- mm/page_io.c | 2 +- mm/page_vma_mapped.c | 4 ++-- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 412d21d8f643..0ff610e728ff 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1490,10 +1490,8 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, { struct btt *btt = bdev->bd_disk->private_data; int rc; - unsigned int len; - len = hpage_nr_pages(page) * PAGE_SIZE; - rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector); + rc = btt_do_bvec(btt, NULL, page, thp_size(page), 0, op, sector); if (rc == 0) page_endio(page, op_is_write(op), 0); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 94790e6e0e4c..fab29b514372 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -238,11 +238,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, blk_status_t rc; if (op_is_write(op)) - rc = pmem_do_write(pmem, page, 0, sector, - hpage_nr_pages(page) * PAGE_SIZE); + rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); else - rc = pmem_do_read(pmem, page, 0, sector, - hpage_nr_pages(page) * PAGE_SIZE); + rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); /* * The ->rw_page interface is subtle and tricky. The core * retries on any error, so we can only invoke page_endio() in diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9521cfdf18ca..9b33ac774fdd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -462,4 +462,15 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +/** + * thp_size - Size of a transparent huge page. + * @page: Head page of a transparent huge page. + * + * Return: Number of bytes in this page. + */ +static inline unsigned long thp_size(struct page *page) +{ + return PAGE_SIZE << thp_order(page); +} + #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/internal.h b/mm/internal.h index d11a9a8d2135..912bb1a1c10e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -396,7 +396,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) unsigned long start, end; start = __vma_address(page, vma); - end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); + end = start + thp_size(page) - PAGE_SIZE; /* page should be within @vma mapping range */ VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); diff --git a/mm/page_io.c b/mm/page_io.c index 9e362567d454..f5e8bec8a8c7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -40,7 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); + bio_add_page(bio, page, thp_size(page), 0); } return bio; } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 719c35246cfa..e65629c056e8 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -227,7 +227,7 @@ next_pte: if (pvmw->address >= pvmw->vma->vm_end || pvmw->address >= __vma_address(pvmw->page, pvmw->vma) + - hpage_nr_pages(pvmw->page) * PAGE_SIZE) + thp_size(pvmw->page)) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { @@ -268,7 +268,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) unsigned long start, end; start = __vma_address(page, vma); - end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); + end = start + thp_size(page) - PAGE_SIZE; if (unlikely(end < vma->vm_start || start >= vma->vm_end)) return 0; -- cgit From 6c357848b44b4016ca422178aa368a7472245f6f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:37 -0700 Subject: mm: replace hpage_nr_pages with thp_nr_pages The thp prefix is more frequently used than hpage and we should be consistent between the various functions. [akpm@linux-foundation.org: fix mm/migrate.c] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: David Hildenbrand Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-6-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 13 +++++++++---- include/linux/mm_inline.h | 6 +++--- include/linux/pagemap.h | 6 +++--- mm/compaction.c | 2 +- mm/filemap.c | 2 +- mm/gup.c | 2 +- mm/internal.h | 2 +- mm/memcontrol.c | 10 +++++----- mm/memory_hotplug.c | 7 +++---- mm/mempolicy.c | 2 +- mm/migrate.c | 18 +++++++++--------- mm/mlock.c | 9 ++++----- mm/page_io.c | 2 +- mm/page_vma_mapped.c | 2 +- mm/rmap.c | 8 ++++---- mm/swap.c | 16 ++++++++-------- mm/swap_state.c | 6 +++--- mm/swapfile.c | 2 +- mm/vmscan.c | 6 +++--- mm/workingset.c | 6 +++--- 20 files changed, 65 insertions(+), 62 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b33ac774fdd..229f986d535a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -271,9 +271,14 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) { - if (unlikely(PageTransHuge(page))) + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) return HPAGE_PMD_NR; return 1; } @@ -336,9 +341,9 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +static inline int thp_nr_pages(struct page *page) { - VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PGFLAGS(PageTail(page), page); return 1; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 219bef41d87c..8fc71e9d7bb0 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -48,14 +48,14 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); } @@ -63,7 +63,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { list_del(&page->lru); - update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); } /** diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d1f4eff605ad..7de11dcd534d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,7 +381,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) if (PageHuge(head)) return head; - return head + (index & (hpage_nr_pages(head) - 1)); + return head + (index & (thp_nr_pages(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); @@ -773,7 +773,7 @@ static inline struct page *readahead_page(struct readahead_control *rac) page = xa_load(&rac->mapping->i_pages, rac->_index); VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = hpage_nr_pages(page); + rac->_batch_count = thp_nr_pages(page); return page; } @@ -796,7 +796,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; - rac->_batch_count += hpage_nr_pages(page); + rac->_batch_count += thp_nr_pages(page); /* * The page cache isn't using multi-index entries yet, diff --git a/mm/compaction.c b/mm/compaction.c index b89581bf859c..176dcded298e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1009,7 +1009,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, del_page_from_lru_list(page, lruvec, page_lru(page)); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), - hpage_nr_pages(page)); + thp_nr_pages(page)); isolate_success: list_add(&page->lru, &cc->migratepages); diff --git a/mm/filemap.c b/mm/filemap.c index 8e75bce0346d..653190943aa7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -198,7 +198,7 @@ static void unaccount_page_cache_page(struct address_space *mapping, if (PageHuge(page)) return; - nr = hpage_nr_pages(page); + nr = thp_nr_pages(page); __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) { diff --git a/mm/gup.c b/mm/gup.c index 39e58df6925d..ae096ea7583f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1637,7 +1637,7 @@ check_again: mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } } } diff --git a/mm/internal.h b/mm/internal.h index 912bb1a1c10e..10c677655912 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -369,7 +369,7 @@ extern void clear_page_mlock(struct page *page); static inline void mlock_migrate_page(struct page *newpage, struct page *page) { if (TestClearPageMlocked(page)) { - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); /* Holding pmd lock, no change in irq context: __mod is safe */ __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9d87082e64aa..b807952b4d43 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5589,7 +5589,7 @@ static int mem_cgroup_move_account(struct page *page, { struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; + unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; int ret; VM_BUG_ON(from == to); @@ -6682,7 +6682,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, */ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - unsigned int nr_pages = hpage_nr_pages(page); + unsigned int nr_pages = thp_nr_pages(page); struct mem_cgroup *memcg = NULL; int ret = 0; @@ -6912,7 +6912,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; /* Force-charge the new page. The old one will be freed soon */ - nr_pages = hpage_nr_pages(newpage); + nr_pages = thp_nr_pages(newpage); page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -7114,7 +7114,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * ancestor for the swap instead and transfer the memory+swap charge. */ swap_memcg = mem_cgroup_id_get_online(memcg); - nr_entries = hpage_nr_pages(page); + nr_entries = thp_nr_pages(page); /* Get references for the tail pages, too */ if (nr_entries > 1) mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); @@ -7158,7 +7158,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) */ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { - unsigned int nr_pages = hpage_nr_pages(page); + unsigned int nr_pages = thp_nr_pages(page); struct page_counter *counter; struct mem_cgroup *memcg; unsigned short oldid; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c32ead89c911..e9d5ab5d3ca0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1299,7 +1299,7 @@ static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; - struct page *page; + struct page *page, *head; int ret = 0; LIST_HEAD(source); @@ -1307,15 +1307,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); + head = compound_head(page); if (PageHuge(page)) { - struct page *head = compound_head(page); pfn = page_to_pfn(head) + compound_nr(head) - 1; isolate_huge_page(head, &source); continue; } else if (PageTransHuge(page)) - pfn = page_to_pfn(compound_head(page)) - + hpage_nr_pages(page) - 1; + pfn = page_to_pfn(head) + thp_nr_pages(page) - 1; /* * HWPoison pages have elevated reference counts so the migration would diff --git a/mm/mempolicy.c b/mm/mempolicy.c index afaa09ff9f6c..eddbe4e56c73 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1049,7 +1049,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } else if (flags & MPOL_MF_STRICT) { /* * Non-movable page may reach here. And, there may be diff --git a/mm/migrate.c b/mm/migrate.c index 5053439be6ab..34a842a8eb6a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -193,7 +193,7 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -thp_nr_pages(page)); putback_lru_page(page); } } @@ -386,7 +386,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += hpage_nr_pages(page) + page_has_private(page); + expected_count += thp_nr_pages(page) + page_has_private(page); return expected_count; } @@ -441,7 +441,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - page_ref_add(newpage, hpage_nr_pages(page)); /* add cache reference */ + page_ref_add(newpage, thp_nr_pages(page)); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -474,7 +474,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - hpage_nr_pages(page)); + page_ref_unfreeze(page, expected_count - thp_nr_pages(page)); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -591,7 +591,7 @@ static void copy_huge_page(struct page *dst, struct page *src) } else { /* thp page */ BUG_ON(!PageTransHuge(src)); - nr_pages = hpage_nr_pages(src); + nr_pages = thp_nr_pages(src); } for (i = 0; i < nr_pages; i++) { @@ -1213,7 +1213,7 @@ out: */ if (likely(!__PageMovable(page))) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -thp_nr_pages(page)); } /* @@ -1446,7 +1446,7 @@ retry: * during migration. */ is_thp = PageTransHuge(page); - nr_subpages = hpage_nr_pages(page); + nr_subpages = thp_nr_pages(page); cond_resched(); if (PageHuge(page)) @@ -1670,7 +1670,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } out_putpage: /* @@ -2034,7 +2034,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) page_lru = page_is_file_lru(page); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru, - hpage_nr_pages(page)); + thp_nr_pages(page)); /* * Isolating the page has taken another reference, so the diff --git a/mm/mlock.c b/mm/mlock.c index f8736136fad7..93ca2bf30b4f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -61,8 +61,7 @@ void clear_page_mlock(struct page *page) if (!TestClearPageMlocked(page)) return; - mod_zone_page_state(page_zone(page), NR_MLOCK, - -hpage_nr_pages(page)); + mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGCLEARED); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() @@ -95,7 +94,7 @@ void mlock_vma_page(struct page *page) if (!TestSetPageMlocked(page)) { mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); + thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); if (!isolate_lru_page(page)) putback_lru_page(page); @@ -192,7 +191,7 @@ unsigned int munlock_vma_page(struct page *page) /* * Serialize with any parallel __split_huge_page_refcount() which * might otherwise copy PageMlocked to part of the tail pages before - * we clear it in the head page. It also stabilizes hpage_nr_pages(). + * we clear it in the head page. It also stabilizes thp_nr_pages(). */ spin_lock_irq(&pgdat->lru_lock); @@ -202,7 +201,7 @@ unsigned int munlock_vma_page(struct page *page) goto unlock_out; } - nr_pages = hpage_nr_pages(page); + nr_pages = thp_nr_pages(page); __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); if (__munlock_isolate_lru_page(page, true)) { diff --git a/mm/page_io.c b/mm/page_io.c index f5e8bec8a8c7..454b70d8cda7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -274,7 +274,7 @@ static inline void count_swpout_vm_event(struct page *page) if (unlikely(PageTransHuge(page))) count_vm_event(THP_SWPOUT); #endif - count_vm_events(PSWPOUT, hpage_nr_pages(page)); + count_vm_events(PSWPOUT, thp_nr_pages(page)); } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index e65629c056e8..5e77b269c330 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -61,7 +61,7 @@ static inline bool pfn_is_match(struct page *page, unsigned long pfn) return page_pfn == pfn; /* THP can be referenced by any subpage */ - return pfn >= page_pfn && pfn - page_pfn < hpage_nr_pages(page); + return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page); } /** diff --git a/mm/rmap.c b/mm/rmap.c index 6cce9ef06753..4ace1e32f705 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1130,7 +1130,7 @@ void do_page_add_anon_rmap(struct page *page, } if (first) { - int nr = compound ? hpage_nr_pages(page) : 1; + int nr = compound ? thp_nr_pages(page) : 1; /* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and @@ -1169,7 +1169,7 @@ void do_page_add_anon_rmap(struct page *page, void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, bool compound) { - int nr = compound ? hpage_nr_pages(page) : 1; + int nr = compound ? thp_nr_pages(page) : 1; VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); __SetPageSwapBacked(page); @@ -1860,7 +1860,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, return; pgoff_start = page_to_pgoff(page); - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff_start, pgoff_end) { struct vm_area_struct *vma = avc->vma; @@ -1913,7 +1913,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, return; pgoff_start = page_to_pgoff(page); - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; if (!locked) i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, diff --git a/mm/swap.c b/mm/swap.c index 9285e60c7d6e..d26c22baf7c5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -241,7 +241,7 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, del_page_from_lru_list(page, lruvec, page_lru(page)); ClearPageActive(page); add_page_to_lru_list_tail(page, lruvec, page_lru(page)); - (*pgmoved) += hpage_nr_pages(page); + (*pgmoved) += thp_nr_pages(page); } } @@ -312,7 +312,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) void lru_note_cost_page(struct page *page) { lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)), - page_is_file_lru(page), hpage_nr_pages(page)); + page_is_file_lru(page), thp_nr_pages(page)); } static void __activate_page(struct page *page, struct lruvec *lruvec, @@ -320,7 +320,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int lru = page_lru_base_type(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); @@ -500,7 +500,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, * lock is held(spinlock), which implies preemption disabled. */ __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); + thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } lru_cache_add(page); @@ -532,7 +532,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, { int lru; bool active; - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); if (!PageLRU(page)) return; @@ -580,7 +580,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, { if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { int lru = page_lru_base_type(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); ClearPageActive(page); @@ -599,7 +599,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, LRU_INACTIVE_ANON + active); @@ -972,7 +972,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, { enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); VM_BUG_ON_PAGE(PageLRU(page), page); diff --git a/mm/swap_state.c b/mm/swap_state.c index b73aabdfd35a..d9d4a49f3241 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -130,7 +130,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swp_offset(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); - unsigned long i, nr = hpage_nr_pages(page); + unsigned long i, nr = thp_nr_pages(page); void *old; VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -183,7 +183,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow) { struct address_space *address_space = swap_address_space(entry); - int i, nr = hpage_nr_pages(page); + int i, nr = thp_nr_pages(page); pgoff_t idx = swp_offset(entry); XA_STATE(xas, &address_space->i_pages, idx); @@ -278,7 +278,7 @@ void delete_from_swap_cache(struct page *page) xa_unlock_irq(&address_space->i_pages); put_swap_page(page, entry); - page_ref_sub(page, hpage_nr_pages(page)); + page_ref_sub(page, thp_nr_pages(page)); } void clear_shadow_from_swap_cache(int type, unsigned long begin, diff --git a/mm/swapfile.c b/mm/swapfile.c index e653eea1eb88..eb410d3c8de8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1370,7 +1370,7 @@ void put_swap_page(struct page *page, swp_entry_t entry) unsigned char *map; unsigned int i, free_entries = 0; unsigned char val; - int size = swap_entry_size(hpage_nr_pages(page)); + int size = swap_entry_size(thp_nr_pages(page)); si = _swap_info_get(entry); if (!si) diff --git a/mm/vmscan.c b/mm/vmscan.c index 738115ed75e2..99e1796eb833 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1354,7 +1354,7 @@ static unsigned int shrink_page_list(struct list_head *page_list, case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: - stat->nr_pageout += hpage_nr_pages(page); + stat->nr_pageout += thp_nr_pages(page); if (PageWriteback(page)) goto keep; @@ -1862,7 +1862,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, SetPageLRU(page); lru = page_lru(page); - nr_pages = hpage_nr_pages(page); + nr_pages = thp_nr_pages(page); update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); list_move(&page->lru, &lruvec->lists[lru]); @@ -2065,7 +2065,7 @@ static void shrink_active_list(unsigned long nr_to_scan, * so we ignore them here. */ if ((vm_flags & VM_EXEC) && page_is_file_lru(page)) { - nr_rotated += hpage_nr_pages(page); + nr_rotated += thp_nr_pages(page); list_add(&page->lru, &l_active); continue; } diff --git a/mm/workingset.c b/mm/workingset.c index 8cbe4e3cbe5c..92e66113a577 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -263,7 +263,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(!PageLocked(page), page); lruvec = mem_cgroup_lruvec(target_memcg, pgdat); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); @@ -374,7 +374,7 @@ void workingset_refault(struct page *page, void *shadow) goto out; SetPageActive(page); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); /* Page was active prior to eviction */ @@ -411,7 +411,7 @@ void workingset_activation(struct page *page) if (!mem_cgroup_disabled() && !memcg) goto out; lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); } -- cgit From 2be1d71841b7ecfb01ce4c59f6e1d082c3c18a8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:40 -0700 Subject: mm: add thp_head This is like compound_head() but compiles away when CONFIG_TRANSPARENT_HUGEPAGE is not enabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-7-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 229f986d535a..8a8bc46a2432 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -259,6 +259,15 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } +/** + * thp_head - Head page of a transparent huge page. + * @page: Any page (tail, head or regular) found in the page cache. + */ +static inline struct page *thp_head(struct page *page) +{ + return compound_head(page); +} + /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. @@ -335,6 +344,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline struct page *thp_head(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return page; +} + static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); -- cgit From ee6c400f5c05459b8c5f2884a176a1287ce2f68f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:43 -0700 Subject: mm: introduce offset_in_thp Mirroring offset_in_page(), this gives you the offset within this particular page, no matter what size page it is. It optimises down to offset_in_page() if CONFIG_TRANSPARENT_HUGEPAGE is not set. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-8-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index fd0cd4e93029..2e7ec3ee34cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1594,6 +1594,7 @@ static inline void clear_page_pfmemalloc(struct page *page) extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) +#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in -- cgit From c734124c5c824511f553f794a514a185dfc0e3e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 14 Aug 2020 17:30:46 -0700 Subject: fs: autofs: delete repeated words in comments Drop duplicated words {the, at} in comments. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Ian Kent Link: http://lkml.kernel.org/r/20200811021817.24982-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- fs/autofs/dev-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index f3a0f412b43b..75105f45c51a 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -20,7 +20,7 @@ * another mount. This situation arises when starting automount(8) * or other user space daemon which uses direct mounts or offset * mounts (used for autofs lazy mount/umount of nested mount trees), - * which have been left busy at at service shutdown. + * which have been left busy at service shutdown. */ typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, @@ -496,7 +496,7 @@ static int autofs_dev_ioctl_askumount(struct file *fp, * located path is the root of a mount we return 1 along with * the super magic of the mount or 0 otherwise. * - * In both cases the the device number (as returned by + * In both cases the device number (as returned by * new_encode_dev()) is also returned. */ static int autofs_dev_ioctl_ismountpoint(struct file *fp, -- cgit From 88db0aa2421666d2f73486d15b239a4521983d55 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 14 Aug 2020 17:31:07 -0700 Subject: all arch: remove system call sys_sysctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. [nixiaoming@huawei.com: s390: fix build error for sys_call_table_emu] Link: http://lkml.kernel.org/r/20200618141426.16884-1-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Signed-off-by: Andrew Morton Acked-by: Will Deacon [arm/arm64] Acked-by: "Eric W. Biederman" Cc: Aleksa Sarai Cc: Alexander Shishkin Cc: Al Viro Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bin Meng Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: chenzefeng Cc: Christian Borntraeger Cc: Christian Brauner Cc: Chris Zankel Cc: David Howells Cc: David S. Miller Cc: Diego Elio Pettenò Cc: Dmitry Vyukov Cc: Dominik Brodowski Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Iurii Zaikin Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: Jiri Olsa Cc: Kars de Jong Cc: Kees Cook Cc: Krzysztof Kozlowski Cc: Luis Chamberlain Cc: Marco Elver Cc: Mark Rutland Cc: Martin K. Petersen Cc: Masahiro Yamada Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miklos Szeredi Cc: Minchan Kim Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Nick Piggin Cc: Oleg Nesterov Cc: Olof Johansson Cc: Paul Burton Cc: "Paul E. McKenney" Cc: Paul Mackerras Cc: Peter Zijlstra (Intel) Cc: Randy Dunlap Cc: Ravi Bangoria Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sami Tolvanen Cc: Sargun Dhillon Cc: Stephen Rothwell Cc: Sudeep Holla Cc: Sven Schnelle Cc: Thiago Jung Bauermann Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Yoshinori Sato Cc: Zhou Yanjie Link: http://lkml.kernel.org/r/20200616030734.87257-1-nixiaoming@huawei.com Signed-off-by: Linus Torvalds --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/configs/am200epdkit_defconfig | 1 - arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 4 +- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/configs/cu1000-neo_defconfig | 1 - arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/configs/dreamcast_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/microdev_defconfig | 1 - arch/sh/configs/migor_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rts7751r2d1_defconfig | 1 - arch/sh/configs/rts7751r2dplus_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7619_defconfig | 1 - arch/sh/configs/se7705_defconfig | 1 - arch/sh/configs/se7750_defconfig | 1 - arch/sh/configs/se7751_defconfig | 1 - arch/sh/configs/secureedge5410_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh7710voipgw_defconfig | 1 - arch/sh/configs/sh7757lcr_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/shmin_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h | 2 - include/linux/sysctl.h | 6 +- kernel/Makefile | 2 +- kernel/sys_ni.c | 1 - kernel/sysctl_binary.c | 171 --------------------- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 52 files changed, 24 insertions(+), 227 deletions(-) delete mode 100644 kernel/sysctl_binary.c diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a28fb211881d..ec8bed9e7b75 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 316 common mlockall sys_mlockall 317 common munlockall sys_munlockall 318 common sysinfo sys_sysinfo -319 common _sysctl sys_sysctl +319 common _sysctl sys_ni_syscall # 320 was sys_idle 321 common oldumount sys_oldumount 322 common swapon sys_swapon diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig index f56ac394caf1..4e49d6cb2f62 100644 --- a/arch/arm/configs/am200epdkit_defconfig +++ b/arch/arm/configs/am200epdkit_defconfig @@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum" CONFIG_SYSVIPC=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 7e8ee4adf269..171077cbf419 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -162,7 +162,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 17e81bd9a2d3..734860ac7cf9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev) __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 __SYSCALL(__NR_fdatasync, sys_fdatasync) -#define __NR__sysctl 149 -__SYSCALL(__NR__sysctl, compat_sys_sysctl) + /* 149 was sys_sysctl */ +__SYSCALL(149, sys_ni_syscall) #define __NR_mlock 150 __SYSCALL(__NR_mlock, sys_mlock) #define __NR_munlock 151 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ced9c83e47c9..f52a41f4c340 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -135,7 +135,7 @@ 123 common writev sys_writev 124 common pread64 sys_pread64 125 common pwrite64 sys_pwrite64 -126 common _sysctl sys_sysctl +126 common _sysctl sys_ni_syscall 127 common mmap sys_mmap 128 common munmap sys_munmap 129 common mlock sys_mlock diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 1a4822de7292..81fc799d8392 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3f4be8e7238..b4e263916f41 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig index 6b471cdb16cf..e924c817f73d 100644 --- a/arch/mips/configs/cu1000-neo_defconfig +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 6b4ee92e3aed..f9df9edb67a4 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -159,7 +159,7 @@ 149 n32 munlockall sys_munlockall 150 n32 vhangup sys_vhangup 151 n32 pivot_root sys_pivot_root -152 n32 _sysctl compat_sys_sysctl +152 n32 _sysctl sys_ni_syscall 153 n32 prctl sys_prctl 154 n32 adjtimex sys_adjtimex_time32 155 n32 setrlimit compat_sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 391acbf425a0..557f9954a2b9 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -159,7 +159,7 @@ 149 n64 munlockall sys_munlockall 150 n64 vhangup sys_vhangup 151 n64 pivot_root sys_pivot_root -152 n64 _sysctl sys_sysctl +152 n64 _sysctl sys_ni_syscall 153 n64 prctl sys_prctl 154 n64 adjtimex sys_adjtimex 155 n64 setrlimit sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 5727c5187508..195b43cf27c8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -164,7 +164,7 @@ 150 o32 unused150 sys_ni_syscall 151 o32 getsid sys_getsid 152 o32 fdatasync sys_fdatasync -153 o32 _sysctl sys_sysctl compat_sys_sysctl +153 o32 _sysctl sys_ni_syscall 154 o32 mlock sys_mlock 155 o32 munlock sys_munlock 156 o32 mlockall sys_mlockall diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 292baabefade..def64d221cd4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -163,7 +163,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index be9f74546068..c2d737ff2e7b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -197,7 +197,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index f1fda4375526..10456bc936fb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock sys_mlock 151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig index ae067e0b15e3..6a82c7b8ff32 100644 --- a/arch/sh/configs/dreamcast_defconfig +++ b/arch/sh/configs/dreamcast_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_MODULES=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index a5b865a75d22..9a988c347e9d 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index a92db6694ce2..70e6605d7f7e 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7709=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 567af752b1bb..ba6ec042606f 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 10f6d371ce2c..05e4ac6fed5f 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index ed84d1303acf..c65667d00313 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH4_202=y diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 37e9521a99e5..a24cf8cd2cea 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index c97ec60cff27..e922659fdadb 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 55fce65eb454..5978866358ec 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index 6a3cfe08295f..fc9c22152b08 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 2b3d7d280672..ff3fd6787fd6 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 21a43f14ffac..ff5bb4489922 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -18,7 +18,6 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 4e794e719a28..5d6c19338ebf 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 3264415a5931..71a672c30716 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 4496b94b7d88..ed00a6eeadf5 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -2,7 +2,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index b23f67542728..3f1c13799d79 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig index 162343683937..4a024065bb75 100644 --- a/arch/sh/configs/se7751_defconfig +++ b/arch/sh/configs/se7751_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig index 360592d63a2f..8422599cfb04 100644 --- a/arch/sh/configs/secureedge5410_defconfig +++ b/arch/sh/configs/secureedge5410_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 87db9a84b5ec..f0073ed39947 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index 08426913c0e3..0d814770b07f 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index d0933a9b9799..a2700ab165af 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index d0a0aa74cecf..26c5fd02c87a 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index a27b129b93c5..c0b6f40d01cc 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 4ec961ace688..ba887f1351be 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 96848db9659e..ae0a00beea5f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 46024e80ee86..4af114e84f20 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -300,7 +300,7 @@ 249 64 nanosleep sys_nanosleep 250 32 mremap sys_mremap 250 64 mremap sys_64_mremap -251 common _sysctl sys_sysctl compat_sys_sysctl +251 common _sysctl sys_ni_syscall 252 common getsid sys_getsid 253 common fdatasync sys_fdatasync 254 32 nfsservctl sys_ni_syscall sys_nis_syscall diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index e31a75262c9c..9d1102873666 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -160,7 +160,7 @@ 146 i386 writev sys_writev compat_sys_writev 147 i386 getsid sys_getsid 148 i386 fdatasync sys_fdatasync -149 i386 _sysctl sys_sysctl compat_sys_sysctl +149 i386 _sysctl sys_ni_syscall 150 i386 mlock sys_mlock 151 i386 munlock sys_munlock 152 i386 mlockall sys_mlockall diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 9d82078c949a..f30d6ae9a688 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index d216ccba42f7..6276e3c2d3fc 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -222,7 +222,7 @@ 204 common quotactl sys_quotactl # 205 was old nfsservctl 205 common nfsservctl sys_ni_syscall -206 common _sysctl sys_sysctl +206 common _sysctl sys_ni_syscall 207 common bdflush sys_bdflush 208 common uname sys_newuname 209 common sysinfo sys_sysinfo diff --git a/include/linux/compat.h b/include/linux/compat.h index c4255d8a4a8a..d38c4d7e83bd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -851,7 +851,6 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); -asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); /* obsolete: fs/readdir.c */ asmlinkage long compat_sys_old_readdir(unsigned int fd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dc2b827c81e5..75ac7f8ae93c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -47,7 +47,6 @@ struct stat64; struct statfs; struct statfs64; struct statx; -struct __sysctl_args; struct sysinfo; struct timespec; struct __kernel_old_timeval; @@ -1117,7 +1116,6 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned); asmlinkage long sys_bdflush(int func, long data); asmlinkage long sys_oldumount(char __user *name); asmlinkage long sys_uselib(const char __user *library); -asmlinkage long sys_sysctl(struct __sysctl_args __user *args); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); asmlinkage long sys_fork(void); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 50bb7f383a1b..51298a4f4623 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -74,15 +74,13 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer, * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. * - * The table's mode will be honoured both for sys_sysctl(2) and - * proc-fs access. + * The table's mode will be honoured for proc-fs access. * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table + * The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * diff --git a/kernel/Makefile b/kernel/Makefile index b3da548691c9..9a20016d4900 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,7 +5,7 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o softirq.o resource.o \ - sysctl.o sysctl_binary.o capability.o ptrace.o user.o \ + sysctl.o capability.o ptrace.o user.o \ signal.o sys.o umh.o workqueue.o pid.o task_work.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 3b69a560a7ac..4d59775ea79c 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -364,7 +364,6 @@ COND_SYSCALL(socketcall); COND_SYSCALL_COMPAT(socketcall); /* compat syscalls for arm64, x86, ... */ -COND_SYSCALL_COMPAT(sysctl); COND_SYSCALL_COMPAT(fanotify_mark); /* x86 */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c deleted file mode 100644 index 7d550cc76a3b..000000000000 --- a/kernel/sysctl_binary.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include "../fs/xfs/xfs_sysctl.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static ssize_t binary_sysctl(const int *name, int nlen, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -static void deprecated_sysctl_warning(const int *name, int nlen) -{ - int i; - - /* - * CTL_KERN/KERN_VERSION is used by older glibc and cannot - * ever go away. - */ - if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION) - return; - - if (printk_ratelimit()) { - printk(KERN_INFO - "warning: process `%s' used the deprecated sysctl " - "system call with ", current->comm); - for (i = 0; i < nlen; i++) - printk(KERN_CONT "%d.", name[i]); - printk(KERN_CONT "\n"); - } - return; -} - -#define WARN_ONCE_HASH_BITS 8 -#define WARN_ONCE_HASH_SIZE (1<nlen. */ - if (nlen < 0 || nlen > CTL_MAXNAME) - return -ENOTDIR; - /* Read in the sysctl name for simplicity */ - for (i = 0; i < nlen; i++) - if (get_user(name[i], args_name + i)) - return -EFAULT; - - warn_on_bintable(name, nlen); - - return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen); -} - -SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) -{ - struct __sysctl_args tmp; - size_t oldlen = 0; - ssize_t result; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && !tmp.oldlenp) - return -EFAULT; - - if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp)) - return -EFAULT; - - result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen, - tmp.newval, tmp.newlen); - - if (result >= 0) { - oldlen = result; - result = 0; - } - - if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp)) - return -EFAULT; - - return result; -} - - -#ifdef CONFIG_COMPAT - -struct compat_sysctl_args { - compat_uptr_t name; - int nlen; - compat_uptr_t oldval; - compat_uptr_t oldlenp; - compat_uptr_t newval; - compat_size_t newlen; - compat_ulong_t __unused[4]; -}; - -COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args) -{ - struct compat_sysctl_args tmp; - compat_size_t __user *compat_oldlenp; - size_t oldlen = 0; - ssize_t result; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && !tmp.oldlenp) - return -EFAULT; - - compat_oldlenp = compat_ptr(tmp.oldlenp); - if (compat_oldlenp && get_user(oldlen, compat_oldlenp)) - return -EFAULT; - - result = do_sysctl(compat_ptr(tmp.name), tmp.nlen, - compat_ptr(tmp.oldval), oldlen, - compat_ptr(tmp.newval), tmp.newlen); - - if (result >= 0) { - oldlen = result; - result = 0; - } - - if (compat_oldlenp && put_user(oldlen, compat_oldlenp)) - return -EFAULT; - - return result; -} - -#endif /* CONFIG_COMPAT */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b190f2eb2611..3ca6fe057a0b 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -193,7 +193,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 56ae24b6e4be..6a0bbea225db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock compat_sys_mlock 151 common munlock sys_munlock compat_sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 9d82078c949a..f30d6ae9a688 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex -- cgit From 69d0b54d41f915ae45afed6669d34a305cf95ff3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:14 -0700 Subject: mm/kmemleak: silence KCSAN splats in checksum Even if KCSAN is disabled for kmemleak, update_checksum() could still call crc32() (which is outside of kmemleak.c) to dereference object->pointer. Thus, the value of object->pointer could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in crc32_le_base / do_raw_spin_lock write to 0xffffb0ea683a7d50 of 4 bytes by task 23575 on cpu 12: do_raw_spin_lock+0x114/0x200 debug_spin_lock_after at kernel/locking/spinlock_debug.c:91 (inlined by) do_raw_spin_lock at kernel/locking/spinlock_debug.c:115 _raw_spin_lock+0x40/0x50 __handle_mm_fault+0xa9e/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffb0ea683a7d50 of 4 bytes by task 839 on cpu 60: crc32_le_base+0x67/0x350 crc32_le_base+0x67/0x350: crc32_body at lib/crc32.c:106 (inlined by) crc32_le_generic at lib/crc32.c:179 (inlined by) crc32_le at lib/crc32.c:197 kmemleak_scan+0x528/0xd90 update_checksum at mm/kmemleak.c:1172 (inlined by) kmemleak_scan at mm/kmemleak.c:1497 kmemleak_scan_thread+0xcc/0xfa kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 If a shattered value was returned due to a data race, it will be corrected in the next scan. Thus, let KCSAN ignore all reads in the region to silence KCSAN in case the write side is non-atomic. Suggested-by: Marco Elver Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Acked-by: Catalin Marinas Link: http://lkml.kernel.org/r/20200317182754.2180-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e362dc3d2028..5e252d91eb14 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object) u32 old_csum = object->checksum; kasan_disable_current(); + kcsan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); + kcsan_enable_current(); return object->checksum != old_csum; } -- cgit From 96bdd2bcc1a79818eac6902d6f9565b720f726aa Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:17 -0700 Subject: mm/frontswap: mark various intentional data races There are a few information counters that are intentionally not protected against increment races, so just annotate them using the data_race() macro. BUG: KCSAN: data-race in __frontswap_store / __frontswap_store write to 0xffffffff8b7174d8 of 8 bytes by task 6396 on cpu 103: __frontswap_store+0x2d0/0x344 inc_frontswap_failed_stores at mm/frontswap.c:70 (inlined by) __frontswap_store at mm/frontswap.c:280 swap_writepage+0x83/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffffff8b7174d8 of 8 bytes by task 6405 on cpu 47: __frontswap_store+0x2b9/0x344 inc_frontswap_failed_stores at mm/frontswap.c:70 (inlined by) __frontswap_store at mm/frontswap.c:280 swap_writepage+0x83/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1581114499-5042-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/frontswap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/frontswap.c b/mm/frontswap.c index 9d977b1fc016..2183a56c7874 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -61,16 +61,16 @@ static u64 frontswap_failed_stores; static u64 frontswap_invalidates; static inline void inc_frontswap_loads(void) { - frontswap_loads++; + data_race(frontswap_loads++); } static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; + data_race(frontswap_succ_stores++); } static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; + data_race(frontswap_failed_stores++); } static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; + data_race(frontswap_invalidates++); } #else static inline void inc_frontswap_loads(void) { } -- cgit From 7b37e22675dfe8673083aaea84c9e7d6e436abbc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:20 -0700 Subject: mm/page_io: mark various intentional data races struct swap_info_struct si.flags could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in scan_swap_map_slots / swap_readpage write to 0xffff9c77b80ac400 of 8 bytes by task 91325 on cpu 16: scan_swap_map_slots+0x6fe/0xb50 scan_swap_map_slots at mm/swapfile.c:887 get_swap_pages+0x39d/0x5c0 get_swap_page+0x377/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1740/0x2820 shrink_inactive_list+0x316/0x8b0 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9c77b80ac400 of 8 bytes by task 5422 on cpu 7: swap_readpage+0x204/0x6a0 swap_readpage at mm/page_io.c:380 read_swap_cache_async+0xa2/0xb0 swapin_readahead+0x6a0/0x890 do_swap_page+0x465/0xeb0 __handle_mm_fault+0xc7a/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 7 PID: 5422 Comm: gmain Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Other reads, read to 0xffff91ea33eac400 of 8 bytes by task 11276 on cpu 120: __swap_writepage+0x140/0xc20 __swap_writepage at mm/page_io.c:289 read to 0xffff91ea33eac400 of 8 bytes by task 11264 on cpu 16: swap_set_page_dirty+0x44/0x1f4 swap_set_page_dirty at mm/page_io.c:442 The write is under &si->lock, but the reads are done as lockless. Since the reads only check for a specific bit in the flag, it is harmless even if load tearing happens. Thus, just mark them as intentional data races using the data_race() macro. [cai@lca.pw: add a missing annotation] Link: http://lkml.kernel.org/r/1581612585-5812-1-git-send-email-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/20200207003601.1526-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/page_io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 454b70d8cda7..e485a6e8a6cd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -85,7 +85,7 @@ static void swap_slot_free_notify(struct page *page) return; sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) + if (data_race(!(sis->flags & SWP_BLKDEV))) return; /* @@ -302,7 +302,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -393,7 +393,7 @@ int swap_readpage(struct page *page, bool synchronous) goto out; } - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -455,7 +455,7 @@ int swap_set_page_dirty(struct page *page) { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); -- cgit From b96a3db2f37485e902794f28130f70dc834796a4 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:24 -0700 Subject: mm/swap_state: mark various intentional data races swap_cache_info.* could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in lookup_swap_cache / lookup_swap_cache write to 0xffffffff85517318 of 8 bytes by task 94138 on cpu 101: lookup_swap_cache+0x12e/0x460 lookup_swap_cache at mm/swap_state.c:322 do_swap_page+0x112/0xeb0 __handle_mm_fault+0xc7a/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffffff85517318 of 8 bytes by task 91655 on cpu 100: lookup_swap_cache+0x117/0x460 lookup_swap_cache at mm/swap_state.c:322 shmem_swapin_page+0xc7/0x9e0 shmem_getpage_gfp+0x2ca/0x16c0 shmem_fault+0xef/0x3c0 __do_fault+0x9e/0x220 do_fault+0x4a0/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 100 PID: 91655 Comm: systemd-journal Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 write to 0xffffffff8d717308 of 8 bytes by task 11365 on cpu 87: __delete_from_swap_cache+0x681/0x8b0 __delete_from_swap_cache at mm/swap_state.c:178 read to 0xffffffff8d717308 of 8 bytes by task 11275 on cpu 53: __delete_from_swap_cache+0x66e/0x8b0 __delete_from_swap_cache at mm/swap_state.c:178 Both the read and write are done as lockless. Since swap_cache_info.* are only used to print out counter information, even if any of them missed a few incremental due to data races, it will be harmless, so just mark it as an intentional data race using the data_race() macro. While at it, fix a checkpatch.pl warning, WARNING: Single statement macros should not use a do {} while (0) loop Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/20200207003715.1578-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/swap_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index d9d4a49f3241..c16eebb81d8b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -57,8 +57,8 @@ static bool enable_vma_readahead __read_mostly = true; #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) static struct { unsigned long add_total; -- cgit From e630bfac79456d3acd22c9286b50e83aafb7a07c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Aug 2020 17:31:27 -0700 Subject: mm/filemap.c: fix a data race in filemap_fault() struct file_ra_state ra.mmap_miss could be accessed concurrently during page faults as noticed by KCSAN, BUG: KCSAN: data-race in filemap_fault / filemap_map_pages write to 0xffff9b1700a2c1b4 of 4 bytes by task 3292 on cpu 30: filemap_fault+0x920/0xfc0 do_sync_mmap_readahead at mm/filemap.c:2384 (inlined by) filemap_fault at mm/filemap.c:2486 __xfs_filemap_fault+0x112/0x3e0 [xfs] xfs_filemap_fault+0x74/0x90 [xfs] __do_fault+0x9e/0x220 do_fault+0x4a0/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9b1700a2c1b4 of 4 bytes by task 3313 on cpu 32: filemap_map_pages+0xc2e/0xd80 filemap_map_pages at mm/filemap.c:2625 do_fault+0x3da/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 32 PID: 3313 Comm: systemd-udevd Tainted: G W L 5.5.0-next-20200210+ #1 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 ra.mmap_miss is used to contribute the readahead decisions, a data race could be undesirable. Both the read and write is only under non-exclusive mmap_sem, two concurrent writers could even underflow the counter. Fix the underflow by writing to a local variable before committing a final store to ra.mmap_miss given a small inaccuracy of the counter should be acceptable. Signed-off-by: Kirill A. Shutemov Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Tested-by: Qian Cai Reviewed-by: Matthew Wilcox (Oracle) Cc: Marco Elver Link: http://lkml.kernel.org/r/20200211030134.1847-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/filemap.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 653190943aa7..1aaea26556cc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2468,6 +2468,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; pgoff_t offset = vmf->pgoff; + unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) @@ -2483,14 +2484,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) } /* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return fpin; /* @@ -2516,13 +2518,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; + unsigned int mmap_miss; pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_readahead(mapping, ra, file, @@ -2688,6 +2692,7 @@ void filemap_map_pages(struct vm_fault *vmf, unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2724,8 +2729,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock; - if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2745,6 +2750,7 @@ next: break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages); -- cgit From a449bf58e45abf919b27ba62a9dcbae3ad36e725 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:31 -0700 Subject: mm/swapfile: fix and annotate various data races swap_info_struct si.highest_bit, si.swap_map[offset] and si.flags could be accessed concurrently separately as noticed by KCSAN, === si.highest_bit === write to 0xffff8d5abccdc4d4 of 4 bytes by task 5353 on cpu 24: swap_range_alloc+0x81/0x130 swap_range_alloc at mm/swapfile.c:681 scan_swap_map_slots+0x371/0xb90 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff8d5abccdc4d4 of 4 bytes by task 6672 on cpu 70: scan_swap_map_slots+0x4a6/0xb90 scan_swap_map_slots at mm/swapfile.c:892 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 Reported by Kernel Concurrency Sanitizer on: CPU: 70 PID: 6672 Comm: oom01 Tainted: G W L 5.5.0-next-20200205+ #3 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 === si.swap_map[offset] === write to 0xffffbc370c29a64c of 1 bytes by task 6856 on cpu 86: __swap_entry_free_locked+0x8c/0x100 __swap_entry_free_locked at mm/swapfile.c:1209 (discriminator 4) __swap_entry_free.constprop.20+0x69/0xb0 free_swap_and_cache+0x53/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 read to 0xffffbc370c29a64c of 1 bytes by task 6855 on cpu 20: _swap_info_get+0x81/0xa0 _swap_info_get at mm/swapfile.c:1140 free_swap_and_cache+0x40/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 === si.flags === write to 0xffff956c8fc6c400 of 8 bytes by task 6087 on cpu 23: scan_swap_map_slots+0x6fe/0xb50 scan_swap_map_slots at mm/swapfile.c:887 get_swap_pages+0x39d/0x5c0 get_swap_page+0x377/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff956c8fc6c400 of 8 bytes by task 6207 on cpu 63: _swap_info_get+0x41/0xa0 __swap_info_get at mm/swapfile.c:1114 put_swap_page+0x84/0x490 __remove_mapping+0x384/0x5f0 shrink_page_list+0xff1/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 The writes are under si->lock but the reads are not. For si.highest_bit and si.swap_map[offset], data race could trigger logic bugs, so fix them by having WRITE_ONCE() for the writes and READ_ONCE() for the reads except those isolated reads where they compare against zero which a data race would cause no harm. Thus, annotate them as intentional data races using the data_race() macro. For si.flags, the readers are only interested in a single bit where a data race there would cause no issue there. [cai@lca.pw: add a missing annotation for si->flags in memory.c] Link: http://lkml.kernel.org/r/1581612647-5958-1-git-send-email-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Hugh Dickins Link: http://lkml.kernel.org/r/1581095163-12198-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/memory.c | 4 ++-- mm/swapfile.c | 31 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 228efaca75d3..dbc123535827 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3130,8 +3130,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); diff --git a/mm/swapfile.c b/mm/swapfile.c index eb410d3c8de8..12f59e641b5e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) - si->highest_bit -= nr_entries; + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); si->inuse_pages += nr_entries; if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; @@ -705,7 +705,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, if (end > si->highest_bit) { bool was_full = !si->highest_bit; - si->highest_bit = end; + WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } @@ -870,7 +870,7 @@ checks: else goto done; } - si->swap_map[offset] = usage; + WRITE_ONCE(si->swap_map[offset], usage); inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); @@ -929,12 +929,13 @@ done: scan: spin_unlock(&si->lock); - while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) { + while (++offset <= READ_ONCE(si->highest_bit)) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -946,11 +947,12 @@ scan: } offset = si->lowest_bit; while (offset < scan_base) { - if (!si->swap_map[offset]) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -1149,7 +1151,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) p = swp_swap_info(entry); if (!p) goto bad_nofile; - if (!(p->flags & SWP_USED)) + if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) @@ -1175,7 +1177,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry) p = __swap_info_get(entry); if (!p) goto out; - if (!p->swap_map[swp_offset(entry)]) + if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; @@ -1244,7 +1246,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + if (usage) + WRITE_ONCE(p->swap_map[offset], usage); + else + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } @@ -1296,7 +1301,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry) goto bad_nofile; rcu_read_lock(); - if (!(si->flags & SWP_VALID)) + if (data_race(!(si->flags & SWP_VALID))) goto unlock_out; offset = swp_offset(entry); if (offset >= si->max) @@ -3484,7 +3489,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) } else err = -ENOENT; /* unused swap entry */ - p->swap_map[offset] = count | has_cache; + WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); -- cgit From 6e4bd50f3888fa8fea8bc66a0ad4ad5f1c862961 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:34 -0700 Subject: mm/page_counter: fix various data races at memsw Commit 3e32cb2e0a12 ("mm: memcontrol: lockless page counters") could had memcg->memsw->watermark and memcg->memsw->failcnt been accessed concurrently as reported by KCSAN, BUG: KCSAN: data-race in page_counter_try_charge / page_counter_try_charge read to 0xffff8fb18c4cd190 of 8 bytes by task 1081 on cpu 59: page_counter_try_charge+0x4d/0x150 mm/page_counter.c:138 try_charge+0x131/0xd50 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x58/0x140 __memcg_kmem_charge+0xcc/0x280 __alloc_pages_nodemask+0x1e1/0x450 alloc_pages_current+0xa6/0x120 pte_alloc_one+0x17/0xd0 __pte_alloc+0x3a/0x1f0 copy_p4d_range+0xc36/0x1990 copy_page_range+0x21d/0x360 dup_mmap+0x5f5/0x7a0 dup_mm+0xa2/0x240 copy_process+0x1b3f/0x3460 _do_fork+0xaa/0xa20 __x64_sys_clone+0x13b/0x170 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe write to 0xffff8fb18c4cd190 of 8 bytes by task 1153 on cpu 120: page_counter_try_charge+0x5b/0x150 mm/page_counter.c:139 try_charge+0x131/0xd50 mm/memcontrol.c:2405 mem_cgroup_try_charge+0x159/0x460 mem_cgroup_try_charge_delay+0x3d/0xa0 wp_page_copy+0x14d/0x930 do_wp_page+0x107/0x7b0 __handle_mm_fault+0xce6/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 BUG: KCSAN: data-race in page_counter_try_charge / page_counter_try_charge write to 0xffff88809bbf2158 of 8 bytes by task 11782 on cpu 0: page_counter_try_charge+0x100/0x170 mm/page_counter.c:129 try_charge+0x185/0xbf0 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x4a/0xe0 mm/memcontrol.c:2837 __memcg_kmem_charge+0xcf/0x1b0 mm/memcontrol.c:2877 __alloc_pages_nodemask+0x26c/0x310 mm/page_alloc.c:4780 read to 0xffff88809bbf2158 of 8 bytes by task 11814 on cpu 1: page_counter_try_charge+0xef/0x170 mm/page_counter.c:129 try_charge+0x185/0xbf0 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x4a/0xe0 mm/memcontrol.c:2837 __memcg_kmem_charge+0xcf/0x1b0 mm/memcontrol.c:2877 __alloc_pages_nodemask+0x26c/0x310 mm/page_alloc.c:4780 Since watermark could be compared or set to garbage due to a data race which would change the code logic, fix it by adding a pair of READ_ONCE() and WRITE_ONCE() in those places. The "failcnt" counter is tolerant of some degree of inaccuracy and is only used to report stats, a data race will not be harmful, thus mark it as an intentional data race using the data_race() macro. Fixes: 3e32cb2e0a12 ("mm: memcontrol: lockless page counters") Reported-by: syzbot+f36cfe60b1006a94f9dc@syzkaller.appspotmail.com Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Tetsuo Handa Cc: Marco Elver Cc: Dmitry Vyukov Cc: Johannes Weiner Link: http://lkml.kernel.org/r/1581519682-23594-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/page_counter.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/page_counter.c b/mm/page_counter.c index b4663844c9b3..afe22ad335cc 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } } @@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter, propagate_protected_usage(c, new); /* * This is racy, but we can live with some - * inaccuracy in the failcnt. + * inaccuracy in the failcnt which is only used + * to report stats. */ - c->failcnt++; + data_race(c->failcnt++); *fail = c; goto failed; } @@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter, * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } return true; -- cgit From e0e3f42fd96cf830c61aa720f0abb4eef41c5ce3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:37 -0700 Subject: mm/memcontrol: fix a data race in scan count struct mem_cgroup_per_node mz.lru_zone_size[zone_idx][lru] could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in lruvec_lru_size / mem_cgroup_update_lru_size write to 0xffff9c804ca285f8 of 8 bytes by task 50951 on cpu 12: mem_cgroup_update_lru_size+0x11c/0x1d0 mem_cgroup_update_lru_size at mm/memcontrol.c:1266 isolate_lru_pages+0x6a9/0xf30 shrink_active_list+0x123/0xcc0 shrink_lruvec+0x8fd/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9c804ca285f8 of 8 bytes by task 50964 on cpu 95: lruvec_lru_size+0xbb/0x270 mem_cgroup_get_zone_lru_size at include/linux/memcontrol.h:536 (inlined by) lruvec_lru_size at mm/vmscan.c:326 shrink_lruvec+0x1d0/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_current+0xa6/0x120 alloc_slab_page+0x3b1/0x540 allocate_slab+0x70/0x660 new_slab+0x46/0x70 ___slab_alloc+0x4ad/0x7d0 __slab_alloc+0x43/0x70 kmem_cache_alloc+0x2c3/0x420 getname_flags+0x4c/0x230 getname+0x22/0x30 do_sys_openat2+0x205/0x3b0 do_sys_open+0x9a/0xf0 __x64_sys_openat+0x62/0x80 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported by Kernel Concurrency Sanitizer on: CPU: 95 PID: 50964 Comm: cc1 Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The write is under lru_lock, but the read is done as lockless. The scan count is used to determine how aggressively the anon and file LRU lists should be scanned. Load tearing could generate an inefficient heuristic, so fix it by adding READ_ONCE() for the read. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200206034945.2481-1-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 385237e4cb44..d0b036123c6a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -630,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); -- cgit From a1f459354a0f011a7baf52e5f16cd9ca95f1e6f2 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:41 -0700 Subject: mm/list_lru: fix a data race in list_lru_count_one struct list_lru_one l.nr_items could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in list_lru_count_one / list_lru_isolate_move write to 0xffffa102789c4510 of 8 bytes by task 823 on cpu 39: list_lru_isolate_move+0xf9/0x130 list_lru_isolate_move at mm/list_lru.c:180 inode_lru_isolate+0x12b/0x2a0 __list_lru_walk_one+0x122/0x3d0 list_lru_walk_one+0x75/0xa0 prune_icache_sb+0x8b/0xc0 super_cache_scan+0x1b8/0x250 do_shrink_slab+0x256/0x6d0 shrink_slab+0x41b/0x4a0 shrink_node+0x35c/0xd80 balance_pgdat+0x652/0xd90 kswapd+0x396/0x8d0 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 read to 0xffffa102789c4510 of 8 bytes by task 6345 on cpu 56: list_lru_count_one+0x116/0x2f0 list_lru_count_one at mm/list_lru.c:193 super_cache_count+0xe8/0x170 do_shrink_slab+0x95/0x6d0 shrink_slab+0x41b/0x4a0 shrink_node+0x35c/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 56 PID: 6345 Comm: oom01 Tainted: G W L 5.5.0-next-20200205+ #4 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 A shattered l.nr_items could affect the shrinker behaviour due to a data race. Fix it by adding READ_ONCE() for the read. Since the writes are aligned and up to word-size, assume those are safe from data races to avoid readability issues of writing WRITE_ONCE(var, var + val). Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1581114679-5488-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/list_lru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index e825804b3928..5aa6e44bc2ae 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru, rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); - count = l->nr_items; + count = READ_ONCE(l->nr_items); rcu_read_unlock(); return count; -- cgit From abe1de4209f670ca81ba0d96f64fa9285c05a5ad Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:44 -0700 Subject: mm/mempool: fix a data race in mempool_free() mempool_t pool.curr_nr could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in mempool_free / remove_element write to 0xffffffffa937638c of 4 bytes by task 6359 on cpu 113: remove_element+0x4a/0x1c0 remove_element at mm/mempool.c:132 mempool_alloc+0x102/0x210 (inlined by) mempool_alloc at mm/mempool.c:399 bio_alloc_bioset+0x106/0x2c0 get_swap_bio+0x49/0x230 __swap_writepage+0x680/0xc30 swap_writepage+0x9c/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffffffffa937638c of 4 bytes by interrupt on cpu 64: mempool_free+0x3e/0x150 mempool_free at mm/mempool.c:492 bio_free+0x192/0x280 bio_put+0x91/0xd0 end_swap_bio_write+0x1d8/0x280 bio_endio+0x2c2/0x5b0 dec_pending+0x22b/0x440 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x2c2/0x5b0 blk_update_request+0x217/0x940 scsi_end_request+0x6b/0x4d0 scsi_io_completion+0xb7/0x7e0 scsi_finish_command+0x223/0x310 scsi_softirq_done+0x1d5/0x210 blk_mq_complete_request+0x224/0x250 scsi_mq_done+0xc2/0x250 pqi_raid_io_complete+0x5a/0x70 [smartpqi] pqi_irq_handler+0x150/0x1410 [smartpqi] __handle_irq_event_percpu+0x90/0x540 handle_irq_event_percpu+0x49/0xd0 handle_irq_event+0x85/0xca handle_edge_irq+0x13f/0x3e0 do_IRQ+0x86/0x190 Since the write is under pool->lock but the read is done as lockless. Even though the commit 5b990546e334 ("mempool: fix and document synchronization and memory barrier usage") introduced the smp_wmb() and smp_rmb() pair to improve the situation, it is adequate to protect it from data races which could lead to a logic bug, so fix it by adding READ_ONCE() for the read. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Tejun Heo Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1581446384-2131-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/mempool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempool.c b/mm/mempool.c index 85efab3da720..79bff63ecf27 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool) * ensures that there will be frees which return elements to the * pool waking up the waiters. */ - if (unlikely(pool->curr_nr < pool->min_nr)) { + if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); -- cgit From 9c1177b62a8c7f78fee5ede540825b844a1bf0c8 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:47 -0700 Subject: mm/rmap: annotate a data race at tlb_flush_batched mm->tlb_flush_batched could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in flush_tlb_batched_pending / try_to_unmap_one write to 0xffff93f754880bd0 of 1 bytes by task 822 on cpu 6: try_to_unmap_one+0x59a/0x1ab0 set_tlb_ubc_flush_pending at mm/rmap.c:635 (inlined by) try_to_unmap_one at mm/rmap.c:1538 rmap_walk_anon+0x296/0x650 rmap_walk+0xdf/0x100 try_to_unmap+0x18a/0x2f0 shrink_page_list+0xef6/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 balance_pgdat+0x652/0xd90 kswapd+0x396/0x8d0 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 read to 0xffff93f754880bd0 of 1 bytes by task 6364 on cpu 4: flush_tlb_batched_pending+0x29/0x90 flush_tlb_batched_pending at mm/rmap.c:682 change_p4d_range+0x5dd/0x1030 change_pte_range at mm/mprotect.c:44 (inlined by) change_pmd_range at mm/mprotect.c:212 (inlined by) change_pud_range at mm/mprotect.c:240 (inlined by) change_p4d_range at mm/mprotect.c:260 change_protection+0x222/0x310 change_prot_numa+0x3e/0x60 task_numa_work+0x219/0x350 task_work_run+0xed/0x140 prepare_exit_to_usermode+0x2cc/0x2e0 ret_from_intr+0x32/0x42 Reported by Kernel Concurrency Sanitizer on: CPU: 4 PID: 6364 Comm: mtest01 Tainted: G W L 5.5.0-next-20200210+ #5 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 flush_tlb_batched_pending() is under PTL but the write is not, but mm->tlb_flush_batched is only a bool type, so the value is unlikely to be shattered. Thus, mark it as an intentional data race by using the data race macro. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/1581450783-8262-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index 4ace1e32f705..83cc459edc40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - if (mm->tlb_flush_batched) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* -- cgit From 7e0cc01ea181c68f6aa013d8d7e7acbf9b49fda0 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:50 -0700 Subject: mm/swap.c: annotate data races for lru_rotate_pvecs Read to lru_add_pvec->nr could be interrupted and then write to the same variable. The write has local interrupt disabled, but the plain reads result in data races. However, it is unlikely the compilers could do much damage here given that lru_add_pvec->nr is a "unsigned char" and there is an existing compiler barrier. Thus, annotate the reads using the data_race() macro. The data races were reported by KCSAN, BUG: KCSAN: data-race in lru_add_drain_cpu / rotate_reclaimable_page write to 0xffff9291ebcb8a40 of 1 bytes by interrupt on cpu 23: rotate_reclaimable_page+0x2df/0x490 pagevec_add at include/linux/pagevec.h:81 (inlined by) rotate_reclaimable_page at mm/swap.c:259 end_page_writeback+0x1b5/0x2b0 end_swap_bio_write+0x1d0/0x280 bio_endio+0x297/0x560 dec_pending+0x218/0x430 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x297/0x560 blk_update_request+0x201/0x920 scsi_end_request+0x6b/0x4a0 scsi_io_completion+0xb7/0x7e0 scsi_finish_command+0x1ed/0x2a0 scsi_softirq_done+0x1c9/0x1d0 blk_done_softirq+0x181/0x1d0 __do_softirq+0xd9/0x57c irq_exit+0xa2/0xc0 do_IRQ+0x8b/0x190 ret_from_intr+0x0/0x42 delay_tsc+0x46/0x80 __const_udelay+0x3c/0x40 __udelay+0x10/0x20 kcsan_setup_watchpoint+0x202/0x3a0 __tsan_read1+0xc2/0x100 lru_add_drain_cpu+0xb8/0x3f0 lru_add_drain+0x25/0x40 shrink_active_list+0xe1/0xc80 shrink_lruvec+0x766/0xb70 shrink_node+0x2d6/0xca0 do_try_to_free_pages+0x1f7/0x9a0 try_to_free_pages+0x252/0x5b0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9291ebcb8a40 of 1 bytes by task 37761 on cpu 23: lru_add_drain_cpu+0xb8/0x3f0 lru_add_drain_cpu at mm/swap.c:602 lru_add_drain+0x25/0x40 shrink_active_list+0xe1/0xc80 shrink_lruvec+0x766/0xb70 shrink_node+0x2d6/0xca0 do_try_to_free_pages+0x1f7/0x9a0 try_to_free_pages+0x252/0x5b0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 2 locks held by oom02/37761: #0: ffff9281e5928808 (&mm->mmap_sem#2){++++}, at: do_page_fault #1: ffffffffb3ade380 (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part irq event stamp: 1949217 trace_hardirqs_on_thunk+0x1a/0x1c __do_softirq+0x2e7/0x57c __do_softirq+0x34c/0x57c irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 23 PID: 37761 Comm: oom02 Not tainted 5.6.0-rc3-next-20200226+ #6 Hardware name: HP ProLiant BL660c Gen9, BIOS I38 10/17/2018 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Link: http://lkml.kernel.org/r/20200228044018.1263-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/swap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index d26c22baf7c5..d16d65d9b4e0 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -632,7 +632,8 @@ void lru_add_drain_cpu(int cpu) __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate.pvec, cpu); - if (pagevec_count(pvec)) { + /* Disabling interrupts below acts as a compiler barrier. */ + if (data_race(pagevec_count(pvec))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -793,7 +794,7 @@ void lru_add_drain_all(void) struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) || - pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) || + data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || -- cgit From c403f6a3a792a6601185497c12b0bdf4be880439 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:53 -0700 Subject: mm: annotate a data race in page_zonenum() BUG: KCSAN: data-race in page_cpupid_xchg_last / put_page write (marked) to 0xfffffc0d48ec1a00 of 8 bytes by task 91442 on cpu 3: page_cpupid_xchg_last+0x51/0x80 page_cpupid_xchg_last at mm/mmzone.c:109 (discriminator 11) wp_page_reuse+0x3e/0xc0 wp_page_reuse at mm/memory.c:2453 do_wp_page+0x472/0x7b0 do_wp_page at mm/memory.c:2798 __handle_mm_fault+0xcb0/0xd00 handle_pte_fault at mm/memory.c:4049 (inlined by) __handle_mm_fault at mm/memory.c:4163 handle_mm_fault+0xfc/0x2f0 handle_mm_fault at mm/memory.c:4200 do_page_fault+0x263/0x6f9 do_user_addr_fault at arch/x86/mm/fault.c:1465 (inlined by) do_page_fault at arch/x86/mm/fault.c:1539 page_fault+0x34/0x40 read to 0xfffffc0d48ec1a00 of 8 bytes by task 94817 on cpu 69: put_page+0x15a/0x1f0 page_zonenum at include/linux/mm.h:923 (inlined by) is_zone_device_page at include/linux/mm.h:929 (inlined by) page_is_devmap_managed at include/linux/mm.h:948 (inlined by) put_page at include/linux/mm.h:1023 wp_page_copy+0x571/0x930 wp_page_copy at mm/memory.c:2615 do_wp_page+0x107/0x7b0 __handle_mm_fault+0xcb0/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 69 PID: 94817 Comm: systemd-udevd Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 A page never changes its zone number. The zone number happens to be stored in the same word as other bits which are modified, but the zone number bits will never be modified by any other write, so it can accept a reload of the zone bits after an intervening write and it don't need to use READ_ONCE(). Thus, annotate this data race using ASSERT_EXCLUSIVE_BITS() to also assert that there are no concurrent writes to it. Suggested-by: Marco Elver Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Paul E. McKenney Cc: David Hildenbrand Cc: Jan Kara Cc: John Hubbard Cc: Ira Weiny Cc: Dan Williams Link: http://lkml.kernel.org/r/1581619089-14472-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e7ec3ee34cf..1983e08f5906 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1067,6 +1067,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } -- cgit From 7f897acbe5d57995438c831670b7c400e9c0dc00 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Fri, 14 Aug 2020 17:31:57 -0700 Subject: include/asm-generic/vmlinux.lds.h: align ro_after_init Since the patch [1], building the kernel using a toolchain built with binutils 2.33.1 prevents booting a sh4 system under Qemu. Apply the patch provided by Alan Modra [2] that fix alignment of rodata. [1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=ebd2263ba9a9124d93bbc0ece63d7e0fae89b40e [2] https://www.sourceware.org/ml/binutils/2019-12/msg00112.html Signed-off-by: Romain Naour Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Kuninori Morimoto Cc: Rich Felker Cc: Sam Ravnborg Cc: Yoshinori Sato Cc: Arnd Bergmann Cc: Link: https://marc.info/?l=linux-sh&m=158429470221261 Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7616ff0b96ec..5430febd34be 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -394,6 +394,7 @@ */ #ifndef RO_AFTER_INIT_DATA #define RO_AFTER_INIT_DATA \ + . = ALIGN(8); \ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ -- cgit From a8e3943b58ac8321d4817cfa08aadcfa59c2bf3f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 14 Aug 2020 17:32:00 -0700 Subject: sh: clkfwk: remove r8/r16/r32 SH will get below warning ${LINUX}/drivers/sh/clk/cpg.c: In function 'r8': ${LINUX}/drivers/sh/clk/cpg.c:41:17: warning: passing argument 1 of 'ioread8' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] return ioread8(addr); ^~~~ In file included from ${LINUX}/arch/sh/include/asm/io.h:21, from ${LINUX}/include/linux/io.h:13, from ${LINUX}/drivers/sh/clk/cpg.c:14: ${LINUX}/include/asm-generic/iomap.h:29:29: note: expected 'void *' but argument is of type 'const void *' extern unsigned int ioread8(void __iomem *); ^~~~~~~~~~~~~~ We don't need "const" for r8/r16/r32. And we don't need r8/r16/r32 themselvs. This patch cleanup these. Signed-off-by: Kuninori Morimoto Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Rich Felker Cc: Romain Naour Cc: Sam Ravnborg Cc: Yoshinori Sato X-MARC-Message: https://marc.info/?l=linux-renesas-soc&m=157852973916903 Signed-off-by: Linus Torvalds --- drivers/sh/clk/cpg.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index eeb028b9cdb3..a5cacfe24a42 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -36,36 +36,21 @@ static void sh_clk_write(int value, struct clk *clk) iowrite32(value, clk->mapped_reg); } -static unsigned int r8(const void __iomem *addr) -{ - return ioread8(addr); -} - -static unsigned int r16(const void __iomem *addr) -{ - return ioread16(addr); -} - -static unsigned int r32(const void __iomem *addr) -{ - return ioread32(addr); -} - static int sh_clk_mstp_enable(struct clk *clk) { sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk); if (clk->status_reg) { - unsigned int (*read)(const void __iomem *addr); + unsigned int (*read)(void __iomem *addr); int i; void __iomem *mapped_status = (phys_addr_t)clk->status_reg - (phys_addr_t)clk->enable_reg + clk->mapped_reg; if (clk->flags & CLK_ENABLE_REG_8BIT) - read = r8; + read = ioread8; else if (clk->flags & CLK_ENABLE_REG_16BIT) - read = r16; + read = ioread16; else - read = r32; + read = ioread32; for (i = 1000; (read(mapped_status) & (1 << clk->enable_bit)) && i; -- cgit From f9e7ff9c6fc758b6f25674a9a4451db30344ce1e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 14 Aug 2020 17:32:04 -0700 Subject: sh: use generic strncpy() Current SH will get below warning at strncpy() In file included from ${LINUX}/arch/sh/include/asm/string.h:3, from ${LINUX}/include/linux/string.h:20, from ${LINUX}/include/linux/bitmap.h:9, from ${LINUX}/include/linux/nodemask.h:95, from ${LINUX}/include/linux/mmzone.h:17, from ${LINUX}/include/linux/gfp.h:6, from ${LINUX}/innclude/linux/slab.h:15, from ${LINUX}/linux/drivers/mmc/host/vub300.c:38: ${LINUX}/drivers/mmc/host/vub300.c: In function 'new_system_port_status': ${LINUX}/arch/sh/include/asm/string_32.h:51:42: warning: array subscript\ 80 is above array bounds of 'char[26]' [-Warray-bounds] : "0" (__dest), "1" (__src), "r" (__src+__n) ~~~~~^~~~ In general, strncpy() should behave like below. char dest[10]; char *src = "12345"; strncpy(dest, src, 10); // dest = {'1', '2', '3', '4', '5', '\0','\0','\0','\0','\0'} But, current SH strnpy() has 2 issues. 1st is it will access to out-of-memory (= src + 10). 2nd is it needs big fixup for it, and maintenance __asm__ code is difficult. To solve these issues, this patch simply uses generic strncpy() instead of architecture specific one. Signed-off-by: Kuninori Morimoto Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Rich Felker Cc: Romain Naour Cc: Sam Ravnborg Cc: Yoshinori Sato Link: https://marc.info/?l=linux-renesas-soc&m=157664657013309 Signed-off-by: Linus Torvalds --- arch/sh/include/asm/string_32.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h index 3558b1d7123e..be3f9a08cbc9 100644 --- a/arch/sh/include/asm/string_32.h +++ b/arch/sh/include/asm/string_32.h @@ -28,32 +28,6 @@ static inline char *strcpy(char *__dest, const char *__src) return __xdest; } -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *__dest, const char *__src, size_t __n) -{ - register char *__xdest = __dest; - unsigned long __dummy; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - "1:\n" - "mov.b @%1+, %2\n\t" - "mov.b %2, @%0\n\t" - "cmp/eq #0, %2\n\t" - "bt/s 2f\n\t" - " cmp/eq %5,%1\n\t" - "bf/s 1b\n\t" - " add #1, %0\n" - "2:" - : "=r" (__dest), "=r" (__src), "=&z" (__dummy) - : "0" (__dest), "1" (__src), "r" (__src+__n) - : "memory", "t"); - - return __xdest; -} - #define __HAVE_ARCH_STRCMP static inline int strcmp(const char *__cs, const char *__ct) { -- cgit From 8f28ca6bd8211214faf717677bbffe375c2a6072 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:07 -0700 Subject: iomap: constify ioreadX() iomem argument (as in generic implementation) Patch series "iomap: Constify ioreadX() iomem argument", v3. The ioread8/16/32() and others have inconsistent interface among the architectures: some taking address as const, some not. It seems there is nothing really stopping all of them to take pointer to const. This patch (of 4): The ioreadX() and ioreadX_rep() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. [krzk@kernel.org: sh: clk: fix assignment from incompatible pointer type for ioreadX()] Link: http://lkml.kernel.org/r/20200723082017.24053-1-krzk@kernel.org [akpm@linux-foundation.org: fix drivers/mailbox/bcm-pdc-mailbox.c] Link: http://lkml.kernel.org/r/202007132209.Rxmv4QyS%25lkp@intel.com Suggested-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Yoshinori Sato Cc: Rich Felker Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Dave Jiang Cc: Jon Mason Cc: Allen Hubbe Cc: "Michael S. Tsirkin" Cc: Jason Wang Link: http://lkml.kernel.org/r/20200709072837.5869-1-krzk@kernel.org Link: http://lkml.kernel.org/r/20200709072837.5869-2-krzk@kernel.org Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/core_apecs.h | 6 +-- arch/alpha/include/asm/core_cia.h | 6 +-- arch/alpha/include/asm/core_lca.h | 6 +-- arch/alpha/include/asm/core_marvel.h | 4 +- arch/alpha/include/asm/core_mcpcia.h | 6 +-- arch/alpha/include/asm/core_t2.h | 2 +- arch/alpha/include/asm/io.h | 12 +++--- arch/alpha/include/asm/io_trivial.h | 16 ++++---- arch/alpha/include/asm/jensen.h | 2 +- arch/alpha/include/asm/machvec.h | 6 +-- arch/alpha/kernel/core_marvel.c | 2 +- arch/alpha/kernel/io.c | 12 +++--- arch/parisc/include/asm/io.h | 4 +- arch/parisc/lib/iomap.c | 72 +++++++++++++++++------------------ arch/powerpc/kernel/iomap.c | 28 +++++++------- arch/sh/kernel/iomap.c | 22 +++++------ drivers/mailbox/bcm-pdc-mailbox.c | 2 +- drivers/sh/clk/cpg.c | 2 +- include/asm-generic/iomap.h | 28 +++++++------- include/linux/io-64-nonatomic-hi-lo.h | 4 +- include/linux/io-64-nonatomic-lo-hi.h | 4 +- lib/iomap.c | 30 +++++++-------- 22 files changed, 138 insertions(+), 138 deletions(-) diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h index 0a07055bc0fe..2d9726fc02ef 100644 --- a/arch/alpha/include/asm/core_apecs.h +++ b/arch/alpha/include/asm/core_apecs.h @@ -384,7 +384,7 @@ struct el_apecs_procdata } \ } while (0) -__EXTERN_INLINE unsigned int apecs_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -420,7 +420,7 @@ __EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -456,7 +456,7 @@ __EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < APECS_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h index c706a7f2b061..cb22991f6761 100644 --- a/arch/alpha/include/asm/core_cia.h +++ b/arch/alpha/include/asm/core_cia.h @@ -342,7 +342,7 @@ struct el_CIA_sysdata_mcheck { #define vuip volatile unsigned int __force * #define vulp volatile unsigned long __force * -__EXTERN_INLINE unsigned int cia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -374,7 +374,7 @@ __EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -404,7 +404,7 @@ __EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < CIA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h index 84d5e5b84f4f..ec86314418cb 100644 --- a/arch/alpha/include/asm/core_lca.h +++ b/arch/alpha/include/asm/core_lca.h @@ -230,7 +230,7 @@ union el_lca { } while (0) -__EXTERN_INLINE unsigned int lca_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -266,7 +266,7 @@ __EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -302,7 +302,7 @@ __EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < LCA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h index cc6fd92d5fa9..b266e02e284b 100644 --- a/arch/alpha/include/asm/core_marvel.h +++ b/arch/alpha/include/asm/core_marvel.h @@ -332,10 +332,10 @@ struct io7 { #define vucp volatile unsigned char __force * #define vusp volatile unsigned short __force * -extern unsigned int marvel_ioread8(void __iomem *); +extern unsigned int marvel_ioread8(const void __iomem *); extern void marvel_iowrite8(u8 b, void __iomem *); -__EXTERN_INLINE unsigned int marvel_ioread16(void __iomem *addr) +__EXTERN_INLINE unsigned int marvel_ioread16(const void __iomem *addr) { return __kernel_ldwu(*(vusp)addr); } diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h index b30dc128210d..cb24d1bd6141 100644 --- a/arch/alpha/include/asm/core_mcpcia.h +++ b/arch/alpha/include/asm/core_mcpcia.h @@ -267,7 +267,7 @@ extern inline int __mcpcia_is_mmio(unsigned long addr) return (addr & 0x80000000UL) == 0; } -__EXTERN_INLINE unsigned int mcpcia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -291,7 +291,7 @@ __EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x00) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -315,7 +315,7 @@ __EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x08) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr; diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h index e0b33d09e93a..12bb7addc789 100644 --- a/arch/alpha/include/asm/core_t2.h +++ b/arch/alpha/include/asm/core_t2.h @@ -572,7 +572,7 @@ __EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr) it doesn't make sense to merge the pio and mmio routines. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int t2_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int t2_ioread##NS(const void __iomem *xaddr) \ { \ if (t2_is_mmio(xaddr)) \ return t2_read##OS(xaddr); \ diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 640e1a2f57b4..1f6a909d1fa5 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -150,9 +150,9 @@ static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr) \ alpha_mv.mv_##NAME(b, addr); \ } -REMAP1(unsigned int, ioread8, /**/) -REMAP1(unsigned int, ioread16, /**/) -REMAP1(unsigned int, ioread32, /**/) +REMAP1(unsigned int, ioread8, const) +REMAP1(unsigned int, ioread16, const) +REMAP1(unsigned int, ioread32, const) REMAP1(u8, readb, const volatile) REMAP1(u16, readw, const volatile) REMAP1(u32, readl, const volatile) @@ -307,7 +307,7 @@ static inline int __is_mmio(const volatile void __iomem *addr) */ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) -extern inline unsigned int ioread8(void __iomem *addr) +extern inline unsigned int ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -316,7 +316,7 @@ extern inline unsigned int ioread8(void __iomem *addr) return ret; } -extern inline unsigned int ioread16(void __iomem *addr) +extern inline unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -359,7 +359,7 @@ extern inline void outw(u16 b, unsigned long port) #endif #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) -extern inline unsigned int ioread32(void __iomem *addr) +extern inline unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h index ba3d8f0cfe0c..a1a29cbe02fa 100644 --- a/arch/alpha/include/asm/io_trivial.h +++ b/arch/alpha/include/asm/io_trivial.h @@ -7,15 +7,15 @@ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread8)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread8)(const void __iomem *a) { - return __kernel_ldbu(*(volatile u8 __force *)a); + return __kernel_ldbu(*(const volatile u8 __force *)a); } __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread16)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread16)(const void __iomem *a) { - return __kernel_ldwu(*(volatile u16 __force *)a); + return __kernel_ldwu(*(const volatile u16 __force *)a); } __EXTERN_INLINE void @@ -33,9 +33,9 @@ IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a) #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread32)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread32)(const void __iomem *a) { - return *(volatile u32 __force *)a; + return *(const volatile u32 __force *)a; } __EXTERN_INLINE void @@ -73,14 +73,14 @@ IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a) __EXTERN_INLINE u8 IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread8)(addr); } __EXTERN_INLINE u16 IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread16)(addr); } diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h index 436dc905b6ad..916895155a88 100644 --- a/arch/alpha/include/asm/jensen.h +++ b/arch/alpha/include/asm/jensen.h @@ -305,7 +305,7 @@ __EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr) that it doesn't make sense to merge them. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int jensen_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int jensen_ioread##NS(const void __iomem *xaddr) \ { \ if (jensen_is_mmio(xaddr)) \ return jensen_read##OS(xaddr - 0x100000000ul); \ diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index a6b73c6d10ee..a4e96e2bec74 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -46,9 +46,9 @@ struct alpha_machine_vector void (*mv_pci_tbi)(struct pci_controller *hose, dma_addr_t start, dma_addr_t end); - unsigned int (*mv_ioread8)(void __iomem *); - unsigned int (*mv_ioread16)(void __iomem *); - unsigned int (*mv_ioread32)(void __iomem *); + unsigned int (*mv_ioread8)(const void __iomem *); + unsigned int (*mv_ioread16)(const void __iomem *); + unsigned int (*mv_ioread32)(const void __iomem *); void (*mv_iowrite8)(u8, void __iomem *); void (*mv_iowrite16)(u16, void __iomem *); diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 4c80d992a659..4485b77f8658 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -806,7 +806,7 @@ void __iomem *marvel_ioportmap (unsigned long addr) } unsigned int -marvel_ioread8(void __iomem *xaddr) +marvel_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (__marvel_is_port_kbd(addr)) diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 938de13adfbf..838586abb1e0 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -14,7 +14,7 @@ "generic", which bumps through the machine vector. */ unsigned int -ioread8(void __iomem *addr) +ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -23,7 +23,7 @@ ioread8(void __iomem *addr) return ret; } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -32,7 +32,7 @@ unsigned int ioread16(void __iomem *addr) return ret; } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); @@ -257,7 +257,7 @@ EXPORT_SYMBOL(readq_relaxed); /* * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. */ -void ioread8_rep(void __iomem *port, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *port, void *dst, unsigned long count) { while ((unsigned long)dst & 0x3) { if (!count) @@ -300,7 +300,7 @@ EXPORT_SYMBOL(insb); * the interfaces seems to be slow: just using the inlined version * of the inw() breaks things. */ -void ioread16_rep(void __iomem *port, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { if (!count) @@ -340,7 +340,7 @@ EXPORT_SYMBOL(insw); * but the interfaces seems to be slow: just using the inlined version * of the inl() breaks things. */ -void ioread32_rep(void __iomem *port, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { while (count--) { diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 116effe26143..45e20d38dc59 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -303,8 +303,8 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); #define ioread64be ioread64be #define iowrite64 iowrite64 #define iowrite64be iowrite64be -extern u64 ioread64(void __iomem *addr); -extern u64 ioread64be(void __iomem *addr); +extern u64 ioread64(const void __iomem *addr); +extern u64 ioread64be(const void __iomem *addr); extern void iowrite64(u64 val, void __iomem *addr); extern void iowrite64be(u64 val, void __iomem *addr); diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 0195aec657e2..ce400417d54e 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -43,13 +43,13 @@ #endif struct iomap_ops { - unsigned int (*read8)(void __iomem *); - unsigned int (*read16)(void __iomem *); - unsigned int (*read16be)(void __iomem *); - unsigned int (*read32)(void __iomem *); - unsigned int (*read32be)(void __iomem *); - u64 (*read64)(void __iomem *); - u64 (*read64be)(void __iomem *); + unsigned int (*read8)(const void __iomem *); + unsigned int (*read16)(const void __iomem *); + unsigned int (*read16be)(const void __iomem *); + unsigned int (*read32)(const void __iomem *); + unsigned int (*read32be)(const void __iomem *); + u64 (*read64)(const void __iomem *); + u64 (*read64be)(const void __iomem *); void (*write8)(u8, void __iomem *); void (*write16)(u16, void __iomem *); void (*write16be)(u16, void __iomem *); @@ -57,9 +57,9 @@ struct iomap_ops { void (*write32be)(u32, void __iomem *); void (*write64)(u64, void __iomem *); void (*write64be)(u64, void __iomem *); - void (*read8r)(void __iomem *, void *, unsigned long); - void (*read16r)(void __iomem *, void *, unsigned long); - void (*read32r)(void __iomem *, void *, unsigned long); + void (*read8r)(const void __iomem *, void *, unsigned long); + void (*read16r)(const void __iomem *, void *, unsigned long); + void (*read32r)(const void __iomem *, void *, unsigned long); void (*write8r)(void __iomem *, const void *, unsigned long); void (*write16r)(void __iomem *, const void *, unsigned long); void (*write32r)(void __iomem *, const void *, unsigned long); @@ -69,17 +69,17 @@ struct iomap_ops { #define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff) -static unsigned int ioport_read8(void __iomem *addr) +static unsigned int ioport_read8(const void __iomem *addr) { return inb(ADDR2PORT(addr)); } -static unsigned int ioport_read16(void __iomem *addr) +static unsigned int ioport_read16(const void __iomem *addr) { return inw(ADDR2PORT(addr)); } -static unsigned int ioport_read32(void __iomem *addr) +static unsigned int ioport_read32(const void __iomem *addr) { return inl(ADDR2PORT(addr)); } @@ -99,17 +99,17 @@ static void ioport_write32(u32 datum, void __iomem *addr) outl(datum, ADDR2PORT(addr)); } -static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read8r(const void __iomem *addr, void *dst, unsigned long count) { insb(ADDR2PORT(addr), dst, count); } -static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read16r(const void __iomem *addr, void *dst, unsigned long count) { insw(ADDR2PORT(addr), dst, count); } -static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read32r(const void __iomem *addr, void *dst, unsigned long count) { insl(ADDR2PORT(addr), dst, count); } @@ -150,37 +150,37 @@ static const struct iomap_ops ioport_ops = { /* Legacy I/O memory ops */ -static unsigned int iomem_read8(void __iomem *addr) +static unsigned int iomem_read8(const void __iomem *addr) { return readb(addr); } -static unsigned int iomem_read16(void __iomem *addr) +static unsigned int iomem_read16(const void __iomem *addr) { return readw(addr); } -static unsigned int iomem_read16be(void __iomem *addr) +static unsigned int iomem_read16be(const void __iomem *addr) { return __raw_readw(addr); } -static unsigned int iomem_read32(void __iomem *addr) +static unsigned int iomem_read32(const void __iomem *addr) { return readl(addr); } -static unsigned int iomem_read32be(void __iomem *addr) +static unsigned int iomem_read32be(const void __iomem *addr) { return __raw_readl(addr); } -static u64 iomem_read64(void __iomem *addr) +static u64 iomem_read64(const void __iomem *addr) { return readq(addr); } -static u64 iomem_read64be(void __iomem *addr) +static u64 iomem_read64be(const void __iomem *addr) { return __raw_readq(addr); } @@ -220,7 +220,7 @@ static void iomem_write64be(u64 datum, void __iomem *addr) __raw_writel(datum, addr); } -static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read8r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u8 *)dst = __raw_readb(addr); @@ -228,7 +228,7 @@ static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read16r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u16 *)dst = __raw_readw(addr); @@ -236,7 +236,7 @@ static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read32r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u32 *)dst = __raw_readl(addr); @@ -297,49 +297,49 @@ static const struct iomap_ops *iomap_ops[8] = { }; -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr); return *((u8 *)addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr); return le16_to_cpup((u16 *)addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr); return *((u16 *)addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr); return le32_to_cpup((u32 *)addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr); return *((u32 *)addr); } -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64(addr); return le64_to_cpup((u64 *)addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64be(addr); @@ -411,7 +411,7 @@ void iowrite64be(u64 datum, void __iomem *addr) /* Repeating interfaces */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count); @@ -423,7 +423,7 @@ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count); @@ -435,7 +435,7 @@ void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count); diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 5ac84efc6ede..9fe4fb3b08aa 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -15,23 +15,23 @@ * Here comes the ppc64 implementation of the IOMAP * interfaces. */ -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return readw_be(addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return readl_be(addr); } @@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); #ifdef __powerpc64__ -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { return readq(addr); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { return readq(addr); } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { return readq(addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { return readq_be(addr); } @@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * FIXME! We could make these do EEH handling if we really * wanted. Not clear if we do. */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { readsb(addr, dst, count); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { readsw(addr, dst, count); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { readsl(addr, dst, count); } diff --git a/arch/sh/kernel/iomap.c b/arch/sh/kernel/iomap.c index ef9e2c97cbb7..0a0dff4e66de 100644 --- a/arch/sh/kernel/iomap.c +++ b/arch/sh/kernel/iomap.c @@ -8,31 +8,31 @@ #include #include -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } EXPORT_SYMBOL(ioread8); -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } EXPORT_SYMBOL(ioread16); -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return be16_to_cpu(__raw_readw(addr)); } EXPORT_SYMBOL(ioread16be); -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } EXPORT_SYMBOL(ioread32); -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return be32_to_cpu(__raw_readl(addr)); } @@ -74,7 +74,7 @@ EXPORT_SYMBOL(iowrite32be); * convert to CPU byte order. We write in "IO byte * order" (we also don't have IO barriers). */ -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { u8 data = __raw_readb(addr); @@ -83,7 +83,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) } } -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count) { while (--count >= 0) { u16 data = __raw_readw(addr); @@ -92,7 +92,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) } } -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count) { while (--count >= 0) { u32 data = __raw_readl(addr); @@ -125,19 +125,19 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) } } -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insb(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insw(addr, dst, count); } EXPORT_SYMBOL(ioread16_rep); -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insl(addr, dst, count); } diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index c10a9318a4b7..53945ca5d785 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -679,7 +679,7 @@ pdc_receive(struct pdc_state *pdcs) /* read last_rx_curr from register once */ pdcs->last_rx_curr = - (ioread32(&pdcs->rxregs_64->status0) & + (ioread32((const void __iomem *)&pdcs->rxregs_64->status0) & CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE; do { diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index a5cacfe24a42..fd72d9088bdc 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -40,7 +40,7 @@ static int sh_clk_mstp_enable(struct clk *clk) { sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk); if (clk->status_reg) { - unsigned int (*read)(void __iomem *addr); + unsigned int (*read)(const void __iomem *addr); int i; void __iomem *mapped_status = (phys_addr_t)clk->status_reg - (phys_addr_t)clk->enable_reg + clk->mapped_reg; diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 9d28a5e82f73..649224664969 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -26,14 +26,14 @@ * in the low address range. Architectures for which this is not * true can't use this generic implementation. */ -extern unsigned int ioread8(void __iomem *); -extern unsigned int ioread16(void __iomem *); -extern unsigned int ioread16be(void __iomem *); -extern unsigned int ioread32(void __iomem *); -extern unsigned int ioread32be(void __iomem *); +extern unsigned int ioread8(const void __iomem *); +extern unsigned int ioread16(const void __iomem *); +extern unsigned int ioread16be(const void __iomem *); +extern unsigned int ioread32(const void __iomem *); +extern unsigned int ioread32be(const void __iomem *); #ifdef CONFIG_64BIT -extern u64 ioread64(void __iomem *); -extern u64 ioread64be(void __iomem *); +extern u64 ioread64(const void __iomem *); +extern u64 ioread64be(const void __iomem *); #endif #ifdef readq @@ -41,10 +41,10 @@ extern u64 ioread64be(void __iomem *); #define ioread64_hi_lo ioread64_hi_lo #define ioread64be_lo_hi ioread64be_lo_hi #define ioread64be_hi_lo ioread64be_hi_lo -extern u64 ioread64_lo_hi(void __iomem *addr); -extern u64 ioread64_hi_lo(void __iomem *addr); -extern u64 ioread64be_lo_hi(void __iomem *addr); -extern u64 ioread64be_hi_lo(void __iomem *addr); +extern u64 ioread64_lo_hi(const void __iomem *addr); +extern u64 ioread64_hi_lo(const void __iomem *addr); +extern u64 ioread64be_lo_hi(const void __iomem *addr); +extern u64 ioread64be_hi_lo(const void __iomem *addr); #endif extern void iowrite8(u8, void __iomem *); @@ -79,9 +79,9 @@ extern void iowrite64be_hi_lo(u64 val, void __iomem *addr); * memory across multiple ports, use "memcpy_toio()" * and friends. */ -extern void ioread8_rep(void __iomem *port, void *buf, unsigned long count); -extern void ioread16_rep(void __iomem *port, void *buf, unsigned long count); -extern void ioread32_rep(void __iomem *port, void *buf, unsigned long count); +extern void ioread8_rep(const void __iomem *port, void *buf, unsigned long count); +extern void ioread16_rep(const void __iomem *port, void *buf, unsigned long count); +extern void ioread32_rep(const void __iomem *port, void *buf, unsigned long count); extern void iowrite8_rep(void __iomem *port, const void *buf, unsigned long count); extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count); diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index ae21b72cce85..f32522bb3aa5 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -57,7 +57,7 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_hi_lo #define ioread64_hi_lo ioread64_hi_lo -static inline u64 ioread64_hi_lo(void __iomem *addr) +static inline u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) #ifndef ioread64be_hi_lo #define ioread64be_hi_lo ioread64be_hi_lo -static inline u64 ioread64be_hi_lo(void __iomem *addr) +static inline u64 ioread64be_hi_lo(const void __iomem *addr) { u32 low, high; diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index faaa842dbdb9..448a21435dba 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -57,7 +57,7 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_lo_hi #define ioread64_lo_hi ioread64_lo_hi -static inline u64 ioread64_lo_hi(void __iomem *addr) +static inline u64 ioread64_lo_hi(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) #ifndef ioread64be_lo_hi #define ioread64be_lo_hi ioread64be_lo_hi -static inline u64 ioread64be_lo_hi(void __iomem *addr) +static inline u64 ioread64be_lo_hi(const void __iomem *addr) { u32 low, high; diff --git a/lib/iomap.c b/lib/iomap.c index e909ab71e995..fbaa3e8f19d6 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -70,27 +70,27 @@ static void bad_io_access(unsigned long port, const char *access) #define mmio_read64be(addr) swab64(readq(addr)) #endif -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { IO_COND(addr, return inb(port), return readb(addr)); return 0xff; } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { IO_COND(addr, return inw(port), return readw(addr)); return 0xffff; } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); return 0xffff; } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { IO_COND(addr, return inl(port), return readl(addr)); return 0xffffffff; } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); return 0xffffffff; @@ -142,26 +142,26 @@ static u64 pio_read64be_hi_lo(unsigned long port) return lo | (hi << 32); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr)); return 0xffffffffffffffffULL; } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr)); return 0xffffffffffffffffULL; } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64be_lo_hi(port), return mmio_read64be(addr)); return 0xffffffffffffffffULL; } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64be_hi_lo(port), return mmio_read64be(addr)); @@ -275,7 +275,7 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * order" (we also don't have IO barriers). */ #ifndef mmio_insb -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { u8 data = __raw_readb(addr); @@ -283,7 +283,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) dst++; } } -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count) { while (--count >= 0) { u16 data = __raw_readw(addr); @@ -291,7 +291,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) dst++; } } -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count) { while (--count >= 0) { u32 data = __raw_readl(addr); @@ -325,15 +325,15 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) } #endif -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count)); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count)); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count)); } -- cgit From 5ca6ad7dce2f3d05bd67a15031c750c0214b7b33 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:11 -0700 Subject: rtl818x: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Acked-by: Kalle Valo Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Dave Jiang Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-3-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h index 7948a2da195a..2ff00800d45b 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h @@ -150,17 +150,17 @@ void rtl8180_write_phy(struct ieee80211_hw *dev, u8 addr, u32 data); void rtl8180_set_anaparam(struct rtl8180_priv *priv, u32 anaparam); void rtl8180_set_anaparam2(struct rtl8180_priv *priv, u32 anaparam2); -static inline u8 rtl818x_ioread8(struct rtl8180_priv *priv, u8 __iomem *addr) +static inline u8 rtl818x_ioread8(struct rtl8180_priv *priv, const u8 __iomem *addr) { return ioread8(addr); } -static inline u16 rtl818x_ioread16(struct rtl8180_priv *priv, __le16 __iomem *addr) +static inline u16 rtl818x_ioread16(struct rtl8180_priv *priv, const __le16 __iomem *addr) { return ioread16(addr); } -static inline u32 rtl818x_ioread32(struct rtl8180_priv *priv, __le32 __iomem *addr) +static inline u32 rtl818x_ioread32(struct rtl8180_priv *priv, const __le32 __iomem *addr) { return ioread32(addr); } -- cgit From 58184e95e9f1f80dc7adca215ccbeabfee081b23 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:15 -0700 Subject: ntb: intel: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Acked-by: Dave Jiang Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Kalle Valo Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-4-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/ntb/hw/intel/ntb_hw_gen1.c | 2 +- drivers/ntb/hw/intel/ntb_hw_gen3.h | 2 +- drivers/ntb/hw/intel/ntb_hw_intel.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 423f9b8fbbcf..3185efeab487 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -1205,7 +1205,7 @@ int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, ndev->peer_reg->spad); } -static u64 xeon_db_ioread(void __iomem *mmio) +static u64 xeon_db_ioread(const void __iomem *mmio) { return (u64)ioread16(mmio); } diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.h b/drivers/ntb/hw/intel/ntb_hw_gen3.h index 2bc5d8356045..dea93989942d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.h @@ -91,7 +91,7 @@ #define GEN3_DB_TOTAL_SHIFT 33 #define GEN3_SPAD_COUNT 16 -static inline u64 gen3_db_ioread(void __iomem *mmio) +static inline u64 gen3_db_ioread(const void __iomem *mmio) { return ioread64(mmio); } diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index d61fcd91714b..05e2335c9596 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -103,7 +103,7 @@ struct intel_ntb_dev; struct intel_ntb_reg { int (*poll_link)(struct intel_ntb_dev *ndev); int (*link_is_up)(struct intel_ntb_dev *ndev); - u64 (*db_ioread)(void __iomem *mmio); + u64 (*db_ioread)(const void __iomem *mmio); void (*db_iowrite)(u64 db_bits, void __iomem *mmio); unsigned long ntb_ctl; resource_size_t db_size; -- cgit From fe0580ac5cea1e534ce32a44306161f34ddbc60a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:20 -0700 Subject: virtio: pci: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Dave Jiang Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Kalle Valo Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-5-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_pci_modern.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 9bdc6f68221f..3e14e700b231 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -27,16 +27,16 @@ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses * for 16-bit fields and 8-bit accesses for 8-bit fields. */ -static inline u8 vp_ioread8(u8 __iomem *addr) +static inline u8 vp_ioread8(const u8 __iomem *addr) { return ioread8(addr); } -static inline u16 vp_ioread16 (__le16 __iomem *addr) +static inline u16 vp_ioread16 (const __le16 __iomem *addr) { return ioread16(addr); } -static inline u32 vp_ioread32(__le32 __iomem *addr) +static inline u32 vp_ioread32(const __le32 __iomem *addr) { return ioread32(addr); } -- cgit From d4e7cd36a90e38e0276d6ce0c20f5ccef17ec38c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 15 Aug 2020 11:44:50 -0700 Subject: io_uring: sanitize double poll handling There's a bit of confusion on the matching pairs of poll vs double poll, depending on if the request is a pure poll (IORING_OP_POLL_ADD) or poll driven retry. Add io_poll_get_double() that returns the double poll waitqueue, if any, and io_poll_get_single() that returns the original poll waitqueue. With that, remove the argument to io_poll_remove_double(). Finally ensure that wait->private is cleared once the double poll handler has run, so that remove knows it's already been seen. Cc: stable@vger.kernel.org # v5.8 Reported-by: syzbot+7f617d4a9369028b8a2c@syzkaller.appspotmail.com Fixes: 18bceab101ad ("io_uring: allow POLL_ADD with double poll_wait() users") Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7dd6df15bc49..cb030912bf5e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4649,9 +4649,24 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) return false; } -static void io_poll_remove_double(struct io_kiocb *req, void *data) +static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) { - struct io_poll_iocb *poll = data; + /* pure poll stashes this in ->io, poll driven retry elsewhere */ + if (req->opcode == IORING_OP_POLL_ADD) + return (struct io_poll_iocb *) req->io; + return req->apoll->double_poll; +} + +static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) +{ + if (req->opcode == IORING_OP_POLL_ADD) + return &req->poll; + return &req->apoll->poll; +} + +static void io_poll_remove_double(struct io_kiocb *req) +{ + struct io_poll_iocb *poll = io_poll_get_double(req); lockdep_assert_held(&req->ctx->completion_lock); @@ -4671,7 +4686,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) { struct io_ring_ctx *ctx = req->ctx; - io_poll_remove_double(req, req->io); + io_poll_remove_double(req); req->poll.done = true; io_cqring_fill_event(req, error ? error : mangle_poll(mask)); io_commit_cqring(ctx); @@ -4711,7 +4726,7 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = req->apoll->double_poll; + struct io_poll_iocb *poll = io_poll_get_single(req); __poll_t mask = key_to_poll(key); /* for instances that support it check for an event match first: */ @@ -4725,6 +4740,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, done = list_empty(&poll->wait.entry); if (!done) list_del_init(&poll->wait.entry); + /* make sure double remove sees this as being gone */ + wait->private = NULL; spin_unlock(&poll->head->lock); if (!done) __io_async_wake(req, poll, mask, io_poll_task_func); @@ -4808,7 +4825,7 @@ static void io_async_task_func(struct callback_head *cb) if (hash_hashed(&req->hash_node)) hash_del(&req->hash_node); - io_poll_remove_double(req, apoll->double_poll); + io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (!READ_ONCE(apoll->poll.canceled)) @@ -4919,7 +4936,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret || ipt.error) { - io_poll_remove_double(req, apoll->double_poll); + io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); kfree(apoll->double_poll); kfree(apoll); @@ -4951,14 +4968,13 @@ static bool io_poll_remove_one(struct io_kiocb *req) { bool do_complete; + io_poll_remove_double(req); + if (req->opcode == IORING_OP_POLL_ADD) { - io_poll_remove_double(req, req->io); do_complete = __io_poll_remove_one(req, &req->poll); } else { struct async_poll *apoll = req->apoll; - io_poll_remove_double(req, apoll->double_poll); - /* non-poll requests have submit ref still */ do_complete = __io_poll_remove_one(req, &apoll->poll); if (do_complete) { -- cgit From f91daf565b0e272a33bd3fcd19eaebd331c5cffd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 15 Aug 2020 15:58:42 -0700 Subject: io_uring: short circuit -EAGAIN for blocking read attempt One case was missed in the short IO retry handling, and that's hitting -EAGAIN on a blocking attempt read (eg from io-wq context). This is a problem on sockets that are marked as non-blocking when created, they don't carry any REQ_F_NOWAIT information to help us terminate them instead of perpetually retrying. Fixes: 227c0c9673d8 ("io_uring: internally retry short reads") Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb030912bf5e..dc506b75659c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3186,6 +3186,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret = 0; goto out_free; } else if (ret == -EAGAIN) { + if (!force_nonblock) + goto done; ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (ret) goto out_free; @@ -3195,7 +3197,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, } /* read it all, or we did blocking attempt. no retry. */ - if (!iov_iter_count(iter) || !force_nonblock) + if (!iov_iter_count(iter) || !force_nonblock || + (req->file->f_flags & O_NONBLOCK)) goto done; io_size -= ret; -- cgit From 6f6aea7e966cda5a817d091e938c2d9b52209893 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 16 Aug 2020 17:24:03 +0300 Subject: parisc: fix PMD pages allocation by restoring pmd_alloc_one() Commit 1355c31eeb7e ("asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()") converted parisc to use generic version of pmd_alloc_one() but it missed the fact that parisc uses order-1 pages for PMD. Restore the original version of pmd_alloc_one() for parisc, just use GFP_PGTABLE_KERNEL that implies __GFP_ZERO instead of GFP_KERNEL and memset. Fixes: 1355c31eeb7e ("asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()") Reported-by: Meelis Roos Signed-off-by: Mike Rapoport Tested-by: Meelis Roos Reviewed-by: Matthew Wilcox (Oracle) Link: https://lkml.kernel.org/r/9f2b5ebd-e4a4-0fa1-6cd3-4b9f6892d1ad@linux.ee Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index cc7ecc2ef55d..a6482b2ce0ea 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -10,6 +10,7 @@ #include +#define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PMD_FREE #define __HAVE_ARCH_PGD_FREE #include @@ -67,6 +68,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT))); } +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER); +} + static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) { -- cgit From 9123e3a74ec7b934a4a099e98af6a61c2f80bbf5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Aug 2020 13:04:57 -0700 Subject: Linux 5.9-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 254e80a96b23..9cac6fde3479 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 8 +PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit From 4473171db68fe9e3de3f2bc68a00527f23852ad8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:14 +0530 Subject: firmware: ti_sci: Drop the device id to resource type translation With ABI 3.0, sysfw deprecated special resource types used for AM65x SoC. Instead started using device id as resource type similar to the convention used in J721E SOC. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-2-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 53cee17d0115..81e4d7797ac0 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -3355,16 +3355,6 @@ static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { .rm_type_map = NULL, }; -static struct ti_sci_rm_type_map ti_sci_am654_rm_type_map[] = { - {.dev_id = 56, .type = 0x00b}, /* GIC_IRQ */ - {.dev_id = 179, .type = 0x000}, /* MAIN_NAV_UDMASS_IA0 */ - {.dev_id = 187, .type = 0x009}, /* MAIN_NAV_RA */ - {.dev_id = 188, .type = 0x006}, /* MAIN_NAV_UDMAP */ - {.dev_id = 194, .type = 0x007}, /* MCU_NAV_UDMAP */ - {.dev_id = 195, .type = 0x00a}, /* MCU_NAV_RA */ - {.dev_id = 0, .type = 0x000}, /* end of table */ -}; - /* Description for AM654 */ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { .default_host_id = 12, @@ -3373,7 +3363,7 @@ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 60, - .rm_type_map = ti_sci_am654_rm_type_map, + .rm_type_map = NULL, }; static const struct of_device_id ti_sci_of_match[] = { -- cgit From 9b98e02a3d369c5d0875338ea0717030471b5210 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:15 +0530 Subject: firmware: ti_sci: Drop unused structure ti_sci_rm_type_map struct ti_sci_rm_type_map is no longer used. Drop its definition and its declarations. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-3-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 56 +---------------------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 81e4d7797ac0..03bd01ba5fe7 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -64,22 +64,6 @@ struct ti_sci_xfers_info { spinlock_t xfer_lock; }; -/** - * struct ti_sci_rm_type_map - Structure representing TISCI Resource - * management representation of dev_ids. - * @dev_id: TISCI device ID - * @type: Corresponding id as identified by TISCI RM. - * - * Note: This is used only as a work around for using RM range apis - * for AM654 SoC. For future SoCs dev_id will be used as type - * for RM range APIs. In order to maintain ABI backward compatibility - * type is not being changed for AM654 SoC. - */ -struct ti_sci_rm_type_map { - u32 dev_id; - u16 type; -}; - /** * struct ti_sci_desc - Description of SoC integration * @default_host_id: Host identifier representing the compute entity @@ -87,14 +71,12 @@ struct ti_sci_rm_type_map { * @max_msgs: Maximum number of messages that can be pending * simultaneously in the system * @max_msg_size: Maximum size of data per message that can be handled. - * @rm_type_map: RM resource type mapping structure. */ struct ti_sci_desc { u8 default_host_id; int max_rx_timeout_ms; int max_msgs; int max_msg_size; - struct ti_sci_rm_type_map *rm_type_map; }; /** @@ -1710,33 +1692,6 @@ fail: return ret; } -static int ti_sci_get_resource_type(struct ti_sci_info *info, u16 dev_id, - u16 *type) -{ - struct ti_sci_rm_type_map *rm_type_map = info->desc->rm_type_map; - bool found = false; - int i; - - /* If map is not provided then assume dev_id is used as type */ - if (!rm_type_map) { - *type = dev_id; - return 0; - } - - for (i = 0; rm_type_map[i].dev_id; i++) { - if (rm_type_map[i].dev_id == dev_id) { - *type = rm_type_map[i].type; - found = true; - break; - } - } - - if (!found) - return -EINVAL; - - return 0; -} - /** * ti_sci_get_resource_range - Helper to get a range of resources assigned * to a host. Resource is uniquely identified by @@ -1760,7 +1715,6 @@ static int ti_sci_get_resource_range(const struct ti_sci_handle *handle, struct ti_sci_xfer *xfer; struct ti_sci_info *info; struct device *dev; - u16 type; int ret = 0; if (IS_ERR(handle)) @@ -1780,15 +1734,9 @@ static int ti_sci_get_resource_range(const struct ti_sci_handle *handle, return ret; } - ret = ti_sci_get_resource_type(info, dev_id, &type); - if (ret) { - dev_err(dev, "rm type lookup failed for %u\n", dev_id); - goto fail; - } - req = (struct ti_sci_msg_req_get_resource_range *)xfer->xfer_buf; req->secondary_host = s_host; - req->type = type & MSG_RM_RESOURCE_TYPE_MASK; + req->type = dev_id & MSG_RM_RESOURCE_TYPE_MASK; req->subtype = subtype & MSG_RM_RESOURCE_SUBTYPE_MASK; ret = ti_sci_do_xfer(info, xfer); @@ -3352,7 +3300,6 @@ static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 64, - .rm_type_map = NULL, }; /* Description for AM654 */ @@ -3363,7 +3310,6 @@ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 60, - .rm_type_map = NULL, }; static const struct of_device_id ti_sci_of_match[] = { -- cgit From 53bf2b0e4e4c1ff0a957474237f9dcd20036ca54 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:16 +0530 Subject: firmware: ti_sci: Add support for getting resource with subtype With SYSFW ABI 3.0 changes, interrupts coming out of an interrupt controller is identified by a type and it is consistent across SoCs. Similarly global events for Interrupt aggregator. So add an API to get resource range using a resource type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-4-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 89 +++++++++++++++++++++++++--------- include/linux/soc/ti/ti_sci_protocol.h | 13 +++++ 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 03bd01ba5fe7..722af9ee53d6 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -3208,61 +3208,50 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res) EXPORT_SYMBOL_GPL(ti_sci_get_num_resources); /** - * devm_ti_sci_get_of_resource() - Get a TISCI resource assigned to a device + * devm_ti_sci_get_resource_sets() - Get a TISCI resources assigned to a device * @handle: TISCI handle * @dev: Device pointer to which the resource is assigned * @dev_id: TISCI device id to which the resource is assigned - * @of_prop: property name by which the resource are represented + * @sub_types: Array of sub_types assigned corresponding to device + * @sets: Number of sub_types * * Return: Pointer to ti_sci_resource if all went well else appropriate * error pointer. */ -struct ti_sci_resource * -devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, - struct device *dev, u32 dev_id, char *of_prop) +static struct ti_sci_resource * +devm_ti_sci_get_resource_sets(const struct ti_sci_handle *handle, + struct device *dev, u32 dev_id, u32 *sub_types, + u32 sets) { struct ti_sci_resource *res; bool valid_set = false; - u32 resource_subtype; int i, ret; res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); - ret = of_property_count_elems_of_size(dev_of_node(dev), of_prop, - sizeof(u32)); - if (ret < 0) { - dev_err(dev, "%s resource type ids not available\n", of_prop); - return ERR_PTR(ret); - } - res->sets = ret; - + res->sets = sets; res->desc = devm_kcalloc(dev, res->sets, sizeof(*res->desc), GFP_KERNEL); if (!res->desc) return ERR_PTR(-ENOMEM); for (i = 0; i < res->sets; i++) { - ret = of_property_read_u32_index(dev_of_node(dev), of_prop, i, - &resource_subtype); - if (ret) - return ERR_PTR(-EINVAL); - ret = handle->ops.rm_core_ops.get_range(handle, dev_id, - resource_subtype, + sub_types[i], &res->desc[i].start, &res->desc[i].num); if (ret) { dev_dbg(dev, "dev = %d subtype %d not allocated for this host\n", - dev_id, resource_subtype); + dev_id, sub_types[i]); res->desc[i].start = 0; res->desc[i].num = 0; continue; } dev_dbg(dev, "dev = %d, subtype = %d, start = %d, num = %d\n", - dev_id, resource_subtype, res->desc[i].start, + dev_id, sub_types[i], res->desc[i].start, res->desc[i].num); valid_set = true; @@ -3280,6 +3269,62 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, return ERR_PTR(-EINVAL); } +/** + * devm_ti_sci_get_of_resource() - Get a TISCI resource assigned to a device + * @handle: TISCI handle + * @dev: Device pointer to which the resource is assigned + * @dev_id: TISCI device id to which the resource is assigned + * @of_prop: property name by which the resource are represented + * + * Return: Pointer to ti_sci_resource if all went well else appropriate + * error pointer. + */ +struct ti_sci_resource * +devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, + struct device *dev, u32 dev_id, char *of_prop) +{ + struct ti_sci_resource *res; + u32 *sub_types; + int sets; + + sets = of_property_count_elems_of_size(dev_of_node(dev), of_prop, + sizeof(u32)); + if (sets < 0) { + dev_err(dev, "%s resource type ids not available\n", of_prop); + return ERR_PTR(sets); + } + + sub_types = kcalloc(sets, sizeof(*sub_types), GFP_KERNEL); + if (!sub_types) + return ERR_PTR(-ENOMEM); + + of_property_read_u32_array(dev_of_node(dev), of_prop, sub_types, sets); + res = devm_ti_sci_get_resource_sets(handle, dev, dev_id, sub_types, + sets); + + kfree(sub_types); + return res; +} +EXPORT_SYMBOL_GPL(devm_ti_sci_get_of_resource); + +/** + * devm_ti_sci_get_resource() - Get a resource range assigned to the device + * @handle: TISCI handle + * @dev: Device pointer to which the resource is assigned + * @dev_id: TISCI device id to which the resource is assigned + * @suub_type: TISCI resource subytpe representing the resource. + * + * Return: Pointer to ti_sci_resource if all went well else appropriate + * error pointer. + */ +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type) +{ + return devm_ti_sci_get_resource_sets(handle, dev, dev_id, &sub_type, 1); +} +EXPORT_SYMBOL_GPL(devm_ti_sci_get_resource); + static int tisci_reboot_handler(struct notifier_block *nb, unsigned long mode, void *cmd) { diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 49c5d29cd33c..cf27b080e148 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -220,6 +220,9 @@ struct ti_sci_rm_core_ops { u16 *range_start, u16 *range_num); }; +#define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 +#define TI_SCI_RESASG_SUBTYPE_IA_VINT 0xa +#define TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT 0xd /** * struct ti_sci_rm_irq_ops: IRQ management operations * @set_irq: Set an IRQ route between the requested source @@ -556,6 +559,9 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res); struct ti_sci_resource * devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, struct device *dev, u32 dev_id, char *of_prop); +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); #else /* CONFIG_TI_SCI_PROTOCOL */ @@ -609,6 +615,13 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, { return ERR_PTR(-EINVAL); } + +static inline struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TI_SCI_PROTOCOL */ #endif /* __TISCI_PROTOCOL_H */ -- cgit From 9a8e2ae71f3553f1b6cd4e3681f04e5d0f147387 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:17 +0530 Subject: dt-bindings: irqchip: ti, sci-intr: Update bindings to drop the usage of gic as parent Drop the firmware related dt-bindings and use the hardware specified interrupt numbers within Interrupt Router. This ensures interrupt router DT node need not assume any interrupt parent type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-5-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-intr.txt | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt index 178fca08278f..c7046f3da201 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt @@ -44,15 +44,17 @@ Required Properties: 4: If intr supports level triggered interrupts. - interrupt-controller: Identifies the node as an interrupt controller - #interrupt-cells: Specifies the number of cells needed to encode an - interrupt source. The value should be 2. - First cell should contain the TISCI device ID of source - Second cell should contain the interrupt source offset - within the device. + interrupt source. The value should be 1. + First cell should contain interrupt router input number + as specified by hardware. - ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dst-id: TISCI device ID of the destination IRQ controller. -- ti,sci-rm-range-girq: Array of TISCI subtype ids representing the host irqs - assigned to this interrupt router. Each subtype id - corresponds to a range of host irqs. +- ti,sci-dev-id: TISCI device id of interrupt controller. +- ti,interrupt-ranges: Set of triplets containing ranges that convert + the INTR output interrupt numbers to parent's + interrupt number. Each triplet has following entries: + - First entry specifies the base for intr output irq + - Second entry specifies the base for parent irqs + - Third entry specifies the limit For more details on TISCI IRQ resource management refer: https://downloads.ti.com/tisci/esd/latest/2_tisci_msgs/rm/rm_irq.html @@ -62,21 +64,20 @@ Example: The following example demonstrates both interrupt router node and the consumer node(main gpio) on the AM654 SoC: -main_intr: interrupt-controller0 { +main_gpio_intr: interrupt-controller0 { compatible = "ti,sci-intr"; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <0 360 32>; }; main_gpio0: gpio@600000 { ... - interrupt-parent = <&main_intr>; - interrupts = <57 256>, <57 257>, <57 258>, - <57 259>, <57 260>, <57 261>; + interrupt-parent = <&main_gpio_intr>; + interrupts = <192>, <193>, <194>, <195>, <196>, <197>; ... }; -- cgit From b8713af858997c3df5bc5b48e66ac1f1bfe19779 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:18 +0530 Subject: dt-bindings: irqchip: Convert ti, sci-intr bindings to yaml In order to automate the verification of DT nodes convert ti,sci-intr.txt ti,sci-intr.yaml. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-6-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-intr.txt | 83 ----------------- .../bindings/interrupt-controller/ti,sci-intr.yaml | 102 +++++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 103 insertions(+), 84 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt deleted file mode 100644 index c7046f3da201..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +++ /dev/null @@ -1,83 +0,0 @@ -Texas Instruments K3 Interrupt Router -===================================== - -The Interrupt Router (INTR) module provides a mechanism to mux M -interrupt inputs to N interrupt outputs, where all M inputs are selectable -to be driven per N output. An Interrupt Router can either handle edge triggered -or level triggered interrupts and that is fixed in hardware. - - Interrupt Router - +----------------------+ - | Inputs Outputs | - +-------+ | +------+ +-----+ | - | GPIO |----------->| | irq0 | | 0 | | Host IRQ - +-------+ | +------+ +-----+ | controller - | . . | +-------+ - +-------+ | . . |----->| IRQ | - | INTA |----------->| . . | +-------+ - +-------+ | . +-----+ | - | +------+ | N | | - | | irqM | +-----+ | - | +------+ | - | | - +----------------------+ - -There is one register per output (MUXCNTL_N) that controls the selection. -Configuration of these MUXCNTL_N registers is done by a system controller -(like the Device Memory and Security Controller on K3 AM654 SoC). System -controller will keep track of the used and unused registers within the Router. -Driver should request the system controller to get the range of GIC IRQs -assigned to the requesting hosts. It is the drivers responsibility to keep -track of Host IRQs. - -Communication between the host processor running an OS and the system -controller happens through a protocol called TI System Control Interface -(TISCI protocol). For more details refer: -Documentation/devicetree/bindings/arm/keystone/ti,sci.txt - -TISCI Interrupt Router Node: ----------------------------- -Required Properties: -- compatible: Must be "ti,sci-intr". -- ti,intr-trigger-type: Should be one of the following: - 1: If intr supports edge triggered interrupts. - 4: If intr supports level triggered interrupts. -- interrupt-controller: Identifies the node as an interrupt controller -- #interrupt-cells: Specifies the number of cells needed to encode an - interrupt source. The value should be 1. - First cell should contain interrupt router input number - as specified by hardware. -- ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device id of interrupt controller. -- ti,interrupt-ranges: Set of triplets containing ranges that convert - the INTR output interrupt numbers to parent's - interrupt number. Each triplet has following entries: - - First entry specifies the base for intr output irq - - Second entry specifies the base for parent irqs - - Third entry specifies the limit - -For more details on TISCI IRQ resource management refer: -https://downloads.ti.com/tisci/esd/latest/2_tisci_msgs/rm/rm_irq.html - -Example: --------- -The following example demonstrates both interrupt router node and the consumer -node(main gpio) on the AM654 SoC: - -main_gpio_intr: interrupt-controller0 { - compatible = "ti,sci-intr"; - ti,intr-trigger-type = <1>; - interrupt-controller; - interrupt-parent = <&gic500>; - #interrupt-cells = <1>; - ti,sci = <&dmsc>; - ti,sci-dev-id = <131>; - ti,interrupt-ranges = <0 360 32>; -}; - -main_gpio0: gpio@600000 { - ... - interrupt-parent = <&main_gpio_intr>; - interrupts = <192>, <193>, <194>, <195>, <196>, <197>; - ... -}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml new file mode 100644 index 000000000000..cff6a956afb4 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/ti,sci-intr.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments K3 Interrupt Router + +maintainers: + - Lokesh Vutla + +allOf: + - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# + +description: | + The Interrupt Router (INTR) module provides a mechanism to mux M + interrupt inputs to N interrupt outputs, where all M inputs are selectable + to be driven per N output. An Interrupt Router can either handle edge + triggered or level triggered interrupts and that is fixed in hardware. + + Interrupt Router + +----------------------+ + | Inputs Outputs | + +-------+ | +------+ +-----+ | + | GPIO |----------->| | irq0 | | 0 | | Host IRQ + +-------+ | +------+ +-----+ | controller + | . . | +-------+ + +-------+ | . . |----->| IRQ | + | INTA |----------->| . . | +-------+ + +-------+ | . +-----+ | + | +------+ | N | | + | | irqM | +-----+ | + | +------+ | + | | + +----------------------+ + + There is one register per output (MUXCNTL_N) that controls the selection. + Configuration of these MUXCNTL_N registers is done by a system controller + (like the Device Memory and Security Controller on K3 AM654 SoC). System + controller will keep track of the used and unused registers within the Router. + Driver should request the system controller to get the range of GIC IRQs + assigned to the requesting hosts. It is the drivers responsibility to keep + track of Host IRQs. + + Communication between the host processor running an OS and the system + controller happens through a protocol called TI System Control Interface + (TISCI protocol). + +properties: + compatible: + const: ti,sci-intr + + ti,intr-trigger-type: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 4] + description: | + Should be one of the following. + 1 = If intr supports edge triggered interrupts. + 4 = If intr supports level triggered interrupts. + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + description: | + The 1st cell should contain interrupt router input hw number. + + ti,interrupt-ranges: + $ref: /schemas/types.yaml#/definitions/uint32-matrix + description: | + Interrupt ranges that converts the INTR output hw irq numbers + to parents's input interrupt numbers. + items: + items: + - description: | + "output_irq" specifies the base for intr output irq + - description: | + "parent's input irq" specifies the base for parent irq + - description: | + "limit" specifies the limit for translation + +required: + - compatible + - ti,intr-trigger-type + - interrupt-controller + - '#interrupt-cells' + - ti,sci + - ti,sci-dev-id + - ti,interrupt-ranges + +examples: + - | + main_gpio_intr: interrupt-controller0 { + compatible = "ti,sci-intr"; + ti,intr-trigger-type = <1>; + interrupt-controller; + interrupt-parent = <&gic500>; + #interrupt-cells = <1>; + ti,sci = <&dmsc>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <0 360 32>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..e08405c2c22e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17117,7 +17117,7 @@ F: Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml F: Documentation/devicetree/bindings/arm/keystone/ti,sci.txt F: Documentation/devicetree/bindings/clock/ti,sci-clk.txt F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt -F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml F: Documentation/devicetree/bindings/reset/ti,sci-reset.txt F: Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt F: drivers/clk/keystone/sci-clk.c -- cgit From a5b659bd4bc7518a8e45fda5256c5e5e8d3b7c49 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:19 +0530 Subject: irqchip/ti-sci-intr: Add support for INTR being a parent to INTR Driver assumes that Interrupt parent to Interrupt router is always GIC. This is not true always and an Interrupt Router can be a parent to Interrupt Router. Update the driver to detect the parent and request the parent irqs accordingly. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200806074826.24607-7-lokeshvutla@ti.com --- drivers/irqchip/irq-ti-sci-intr.c | 152 +++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 59 deletions(-) diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c index 5ea148faf2ab..cbc1758228d9 100644 --- a/drivers/irqchip/irq-ti-sci-intr.c +++ b/drivers/irqchip/irq-ti-sci-intr.c @@ -17,29 +17,20 @@ #include #include -#define TI_SCI_DEV_ID_MASK 0xffff -#define TI_SCI_DEV_ID_SHIFT 16 -#define TI_SCI_IRQ_ID_MASK 0xffff -#define TI_SCI_IRQ_ID_SHIFT 0 -#define HWIRQ_TO_DEVID(hwirq) (((hwirq) >> (TI_SCI_DEV_ID_SHIFT)) & \ - (TI_SCI_DEV_ID_MASK)) -#define HWIRQ_TO_IRQID(hwirq) ((hwirq) & (TI_SCI_IRQ_ID_MASK)) -#define TO_HWIRQ(dev, index) ((((dev) & TI_SCI_DEV_ID_MASK) << \ - TI_SCI_DEV_ID_SHIFT) | \ - ((index) & TI_SCI_IRQ_ID_MASK)) - /** * struct ti_sci_intr_irq_domain - Structure representing a TISCI based * Interrupt Router IRQ domain. * @sci: Pointer to TISCI handle - * @dst_irq: TISCI resource pointer representing GIC irq controller. - * @dst_id: TISCI device ID of the GIC irq controller. + * @out_irqs: TISCI resource pointer representing INTR irqs. + * @dev: Struct device pointer. + * @ti_sci_id: TI-SCI device identifier * @type: Specifies the trigger type supported by this Interrupt Router */ struct ti_sci_intr_irq_domain { const struct ti_sci_handle *sci; - struct ti_sci_resource *dst_irq; - u32 dst_id; + struct ti_sci_resource *out_irqs; + struct device *dev; + u32 ti_sci_id; u32 type; }; @@ -70,15 +61,44 @@ static int ti_sci_intr_irq_domain_translate(struct irq_domain *domain, { struct ti_sci_intr_irq_domain *intr = domain->host_data; - if (fwspec->param_count != 2) + if (fwspec->param_count != 1) return -EINVAL; - *hwirq = TO_HWIRQ(fwspec->param[0], fwspec->param[1]); + *hwirq = fwspec->param[0]; *type = intr->type; return 0; } +/** + * ti_sci_intr_xlate_irq() - Translate hwirq to parent's hwirq. + * @intr: IRQ domain corresponding to Interrupt Router + * @irq: Hardware irq corresponding to the above irq domain + * + * Return parent irq number if translation is available else -ENOENT. + */ +static int ti_sci_intr_xlate_irq(struct ti_sci_intr_irq_domain *intr, u32 irq) +{ + struct device_node *np = dev_of_node(intr->dev); + u32 base, pbase, size, len; + const __be32 *range; + + range = of_get_property(np, "ti,interrupt-ranges", &len); + if (!range) + return irq; + + for (len /= sizeof(*range); len >= 3; len -= 3) { + base = be32_to_cpu(*range++); + pbase = be32_to_cpu(*range++); + size = be32_to_cpu(*range++); + + if (base <= irq && irq < base + size) + return irq - base + pbase; + } + + return -ENOENT; +} + /** * ti_sci_intr_irq_domain_free() - Free the specified IRQs from the domain. * @domain: Domain to which the irqs belong @@ -89,66 +109,76 @@ static void ti_sci_intr_irq_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { struct ti_sci_intr_irq_domain *intr = domain->host_data; - struct irq_data *data, *parent_data; - u16 dev_id, irq_index; + struct irq_data *data; + int out_irq; - parent_data = irq_domain_get_irq_data(domain->parent, virq); data = irq_domain_get_irq_data(domain, virq); - irq_index = HWIRQ_TO_IRQID(data->hwirq); - dev_id = HWIRQ_TO_DEVID(data->hwirq); + out_irq = (uintptr_t)data->chip_data; - intr->sci->ops.rm_irq_ops.free_irq(intr->sci, dev_id, irq_index, - intr->dst_id, parent_data->hwirq); - ti_sci_release_resource(intr->dst_irq, parent_data->hwirq); + intr->sci->ops.rm_irq_ops.free_irq(intr->sci, + intr->ti_sci_id, data->hwirq, + intr->ti_sci_id, out_irq); + ti_sci_release_resource(intr->out_irqs, out_irq); irq_domain_free_irqs_parent(domain, virq, 1); irq_domain_reset_irq_data(data); } /** - * ti_sci_intr_alloc_gic_irq() - Allocate GIC specific IRQ + * ti_sci_intr_alloc_parent_irq() - Allocate parent IRQ * @domain: Pointer to the interrupt router IRQ domain * @virq: Corresponding Linux virtual IRQ number * @hwirq: Corresponding hwirq for the IRQ within this IRQ domain * - * Returns 0 if all went well else appropriate error pointer. + * Returns parent irq if all went well else appropriate error pointer. */ -static int ti_sci_intr_alloc_gic_irq(struct irq_domain *domain, - unsigned int virq, u32 hwirq) +static int ti_sci_intr_alloc_parent_irq(struct irq_domain *domain, + unsigned int virq, u32 hwirq) { struct ti_sci_intr_irq_domain *intr = domain->host_data; + struct device_node *parent_node; struct irq_fwspec fwspec; - u16 dev_id, irq_index; - u16 dst_irq; - int err; - - dev_id = HWIRQ_TO_DEVID(hwirq); - irq_index = HWIRQ_TO_IRQID(hwirq); + u16 out_irq, p_hwirq; + int err = 0; - dst_irq = ti_sci_get_free_resource(intr->dst_irq); - if (dst_irq == TI_SCI_RESOURCE_NULL) + out_irq = ti_sci_get_free_resource(intr->out_irqs); + if (out_irq == TI_SCI_RESOURCE_NULL) return -EINVAL; - fwspec.fwnode = domain->parent->fwnode; - fwspec.param_count = 3; - fwspec.param[0] = 0; /* SPI */ - fwspec.param[1] = dst_irq - 32; /* SPI offset */ - fwspec.param[2] = intr->type; + p_hwirq = ti_sci_intr_xlate_irq(intr, out_irq); + if (p_hwirq < 0) + goto err_irqs; + + parent_node = of_irq_find_parent(dev_of_node(intr->dev)); + fwspec.fwnode = of_node_to_fwnode(parent_node); + + if (of_device_is_compatible(parent_node, "arm,gic-v3")) { + /* Parent is GIC */ + fwspec.param_count = 3; + fwspec.param[0] = 0; /* SPI */ + fwspec.param[1] = p_hwirq - 32; /* SPI offset */ + fwspec.param[2] = intr->type; + } else { + /* Parent is Interrupt Router */ + fwspec.param_count = 1; + fwspec.param[0] = p_hwirq; + } err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); if (err) goto err_irqs; - err = intr->sci->ops.rm_irq_ops.set_irq(intr->sci, dev_id, irq_index, - intr->dst_id, dst_irq); + err = intr->sci->ops.rm_irq_ops.set_irq(intr->sci, + intr->ti_sci_id, hwirq, + intr->ti_sci_id, out_irq); if (err) goto err_msg; - return 0; + return p_hwirq; err_msg: irq_domain_free_irqs_parent(domain, virq, 1); err_irqs: - ti_sci_release_resource(intr->dst_irq, dst_irq); + ti_sci_release_resource(intr->out_irqs, out_irq); return err; } @@ -168,18 +198,19 @@ static int ti_sci_intr_irq_domain_alloc(struct irq_domain *domain, struct irq_fwspec *fwspec = data; unsigned long hwirq; unsigned int flags; - int err; + int err, p_hwirq; err = ti_sci_intr_irq_domain_translate(domain, fwspec, &hwirq, &flags); if (err) return err; - err = ti_sci_intr_alloc_gic_irq(domain, virq, hwirq); - if (err) - return err; + p_hwirq = ti_sci_intr_alloc_parent_irq(domain, virq, hwirq); + if (p_hwirq < 0) + return p_hwirq; irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &ti_sci_intr_irq_chip, NULL); + &ti_sci_intr_irq_chip, + (void *)(uintptr_t)p_hwirq); return 0; } @@ -214,6 +245,7 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) if (!intr) return -ENOMEM; + intr->dev = dev; ret = of_property_read_u32(dev_of_node(dev), "ti,intr-trigger-type", &intr->type); if (ret) { @@ -230,19 +262,19 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) return ret; } - ret = of_property_read_u32(dev_of_node(dev), "ti,sci-dst-id", - &intr->dst_id); + ret = of_property_read_u32(dev_of_node(dev), "ti,sci-dev-id", + &intr->ti_sci_id); if (ret) { - dev_err(dev, "missing 'ti,sci-dst-id' property\n"); + dev_err(dev, "missing 'ti,sci-dev-id' property\n"); return -EINVAL; } - intr->dst_irq = devm_ti_sci_get_of_resource(intr->sci, dev, - intr->dst_id, - "ti,sci-rm-range-girq"); - if (IS_ERR(intr->dst_irq)) { + intr->out_irqs = devm_ti_sci_get_resource(intr->sci, dev, + intr->ti_sci_id, + TI_SCI_RESASG_SUBTYPE_IR_OUTPUT); + if (IS_ERR(intr->out_irqs)) { dev_err(dev, "Destination irq resource allocation failed\n"); - return PTR_ERR(intr->dst_irq); + return PTR_ERR(intr->out_irqs); } domain = irq_domain_add_hierarchy(parent_domain, 0, 0, dev_of_node(dev), @@ -252,6 +284,8 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) return -ENOMEM; } + dev_info(dev, "Interrupt Router %d domain created\n", intr->ti_sci_id); + return 0; } -- cgit From 6dde29dc31aa265a79d9e6b3571987cfa4b179cc Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:20 +0530 Subject: dt-bindings: irqchip: ti, sci-inta: Update docs to support different parent. Drop the firmware related interrupt ranges and use the hardware specified interrupt numbers within Interrupt Aggregator. This ensures interrupt aggregator DT node need not assume any interrupt parent type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-8-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-inta.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt index 7841cb099e13..5fd3ee0f7167 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt @@ -43,13 +43,14 @@ TISCI Interrupt Aggregator Node: - msi-controller: Identifies the node as an MSI controller. - interrupt-parent: phandle of irq parent. - ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device ID of the Interrupt Aggregator. -- ti,sci-rm-range-vint: Array of TISCI subtype ids representing vints(inta - outputs) range within this INTA, assigned to the - requesting host context. -- ti,sci-rm-range-global-event: Array of TISCI subtype ids representing the - global events range reaching this IA and are assigned - to the requesting host context. +- ti,sci-dev-id: TISCI device id of interrupt controller. +- ti,interrupt-ranges: Set of triplets containing ranges that convert + the INTA output interrupt numbers to parent's + interrupt number. Each triplet has following entries: + - First entry specifies the base for vint + - Second entry specifies the base for parent irqs + - Third entry specifies the limit + Example: -------- @@ -61,6 +62,5 @@ main_udmass_inta: interrupt-controller@33d00000 { interrupt-parent = <&main_navss_intr>; ti,sci = <&dmsc>; ti,sci-dev-id = <179>; - ti,sci-rm-range-vint = <0x0>; - ti,sci-rm-range-global-event = <0x1>; + ti,interrupt-ranges = <0 0 256>; }; -- cgit From c4dff06e79d99691f18dfc8a61a1cb17c602a025 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:21 +0530 Subject: dt-bindings: irqchip: Convert ti, sci-inta bindings to yaml In order to automate the verification of DT nodes convert ti,sci-inta.txt ti,sci-inta.yaml. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-9-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-inta.txt | 66 --------------- .../bindings/interrupt-controller/ti,sci-inta.yaml | 98 ++++++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 99 insertions(+), 67 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt deleted file mode 100644 index 5fd3ee0f7167..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt +++ /dev/null @@ -1,66 +0,0 @@ -Texas Instruments K3 Interrupt Aggregator -========================================= - -The Interrupt Aggregator (INTA) provides a centralized machine -which handles the termination of system events to that they can -be coherently processed by the host(s) in the system. A maximum -of 64 events can be mapped to a single interrupt. - - - Interrupt Aggregator - +-----------------------------------------+ - | Intmap VINT | - | +--------------+ +------------+ | - m ------>| | vint | bit | | 0 |.....|63| vint0 | - . | +--------------+ +------------+ | +------+ - . | . . | | HOST | -Globalevents ------>| . . |------>| IRQ | - . | . . | | CTRL | - . | . . | +------+ - n ------>| +--------------+ +------------+ | - | | vint | bit | | 0 |.....|63| vintx | - | +--------------+ +------------+ | - | | - +-----------------------------------------+ - -Configuration of these Intmap registers that maps global events to vint is done -by a system controller (like the Device Memory and Security Controller on K3 -AM654 SoC). Driver should request the system controller to get the range -of global events and vints assigned to the requesting host. Management -of these requested resources should be handled by driver and requests -system controller to map specific global event to vint, bit pair. - -Communication between the host processor running an OS and the system -controller happens through a protocol called TI System Control Interface -(TISCI protocol). For more details refer: -Documentation/devicetree/bindings/arm/keystone/ti,sci.txt - -TISCI Interrupt Aggregator Node: -------------------------------- -- compatible: Must be "ti,sci-inta". -- reg: Should contain registers location and length. -- interrupt-controller: Identifies the node as an interrupt controller -- msi-controller: Identifies the node as an MSI controller. -- interrupt-parent: phandle of irq parent. -- ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device id of interrupt controller. -- ti,interrupt-ranges: Set of triplets containing ranges that convert - the INTA output interrupt numbers to parent's - interrupt number. Each triplet has following entries: - - First entry specifies the base for vint - - Second entry specifies the base for parent irqs - - Third entry specifies the limit - - -Example: --------- -main_udmass_inta: interrupt-controller@33d00000 { - compatible = "ti,sci-inta"; - reg = <0x0 0x33d00000 0x0 0x100000>; - interrupt-controller; - msi-controller; - interrupt-parent = <&main_navss_intr>; - ti,sci = <&dmsc>; - ti,sci-dev-id = <179>; - ti,interrupt-ranges = <0 0 256>; -}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml new file mode 100644 index 000000000000..c7cd05656a3e --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/ti,sci-inta.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments K3 Interrupt Aggregator + +maintainers: + - Lokesh Vutla + +allOf: + - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# + +description: | + The Interrupt Aggregator (INTA) provides a centralized machine + which handles the termination of system events to that they can + be coherently processed by the host(s) in the system. A maximum + of 64 events can be mapped to a single interrupt. + + Interrupt Aggregator + +-----------------------------------------+ + | Intmap VINT | + | +--------------+ +------------+ | + m ------>| | vint | bit | | 0 |.....|63| vint0 | + . | +--------------+ +------------+ | +------+ + . | . . | | HOST | + Globalevents ------>| . . |----->| IRQ | + . | . . | | CTRL | + . | . . | +------+ + n ------>| +--------------+ +------------+ | + | | vint | bit | | 0 |.....|63| vintx | + | +--------------+ +------------+ | + | | + +-----------------------------------------+ + + Configuration of these Intmap registers that maps global events to vint is + done by a system controller (like the Device Memory and Security Controller + on AM654 SoC). Driver should request the system controller to get the range + of global events and vints assigned to the requesting host. Management + of these requested resources should be handled by driver and requests + system controller to map specific global event to vint, bit pair. + + Communication between the host processor running an OS and the system + controller happens through a protocol called TI System Control Interface + (TISCI protocol). + +properties: + compatible: + const: ti,sci-inta + + reg: + maxItems: 1 + + interrupt-controller: true + + msi-controller: true + + ti,interrupt-ranges: + $ref: /schemas/types.yaml#/definitions/uint32-matrix + description: | + Interrupt ranges that converts the INTA output hw irq numbers + to parents's input interrupt numbers. + items: + items: + - description: | + "output_irq" specifies the base for inta output irq + - description: | + "parent's input irq" specifies the base for parent irq + - description: | + "limit" specifies the limit for translation + +required: + - compatible + - reg + - interrupt-controller + - msi-controller + - ti,sci + - ti,sci-dev-id + - ti,interrupt-ranges + +examples: + - | + bus { + #address-cells = <2>; + #size-cells = <2>; + + main_udmass_inta: msi-controller@33d00000 { + compatible = "ti,sci-inta"; + reg = <0x0 0x33d00000 0x0 0x100000>; + interrupt-controller; + msi-controller; + interrupt-parent = <&main_navss_intr>; + ti,sci = <&dmsc>; + ti,sci-dev-id = <179>; + ti,interrupt-ranges = <0 0 256>; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index e08405c2c22e..e3f1cdb69c84 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17116,7 +17116,7 @@ S: Maintained F: Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml F: Documentation/devicetree/bindings/arm/keystone/ti,sci.txt F: Documentation/devicetree/bindings/clock/ti,sci-clk.txt -F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt +F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml F: Documentation/devicetree/bindings/reset/ti,sci-reset.txt F: Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt -- cgit From 7206f3149b8198c65a0ca8c01bfa1d8ace27bf91 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:22 +0530 Subject: irqchip/ti-sci-inta: Do not store TISCI device id in platform device id field Even though DT doesn't make active use of id field in platform_device, we cannot hijack it to store TISCI device id. So create a field in struct ti_sci_inta for storing TISCI id and drop usage of id field in platform_device. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200806074826.24607-10-lokeshvutla@ti.com --- drivers/irqchip/irq-ti-sci-inta.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index b7cc5d6580d8..fbfa1f4f521e 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -83,6 +83,7 @@ struct ti_sci_inta_vint_desc { * @vint_mutex: Mutex to protect vint_list * @base: Base address of the memory mapped IO registers * @pdev: Pointer to platform device. + * @ti_sci_id: TI-SCI device identifier */ struct ti_sci_inta_irq_domain { const struct ti_sci_handle *sci; @@ -93,6 +94,7 @@ struct ti_sci_inta_irq_domain { struct mutex vint_mutex; void __iomem *base; struct platform_device *pdev; + u32 ti_sci_id; }; #define to_vint_desc(e, i) container_of(e, struct ti_sci_inta_vint_desc, \ @@ -156,7 +158,7 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom parent_fwspec.fwnode = of_node_to_fwnode(of_irq_find_parent(dev_of_node(&inta->pdev->dev))); parent_fwspec.param_count = 2; - parent_fwspec.param[0] = inta->pdev->id; + parent_fwspec.param[0] = inta->ti_sci_id; parent_fwspec.param[1] = vint_desc->vint_id; parent_virq = irq_create_fwspec_mapping(&parent_fwspec); @@ -202,7 +204,7 @@ static struct ti_sci_inta_event_desc *ti_sci_inta_alloc_event(struct ti_sci_inta err = inta->sci->ops.rm_irq_ops.set_event_map(inta->sci, dev_id, dev_index, - inta->pdev->id, + inta->ti_sci_id, vint_desc->vint_id, event_desc->global_event, free_bit); @@ -299,7 +301,7 @@ static void ti_sci_inta_free_irq(struct ti_sci_inta_event_desc *event_desc, inta->sci->ops.rm_irq_ops.free_event_map(inta->sci, HWIRQ_TO_DEVID(hwirq), HWIRQ_TO_IRQID(hwirq), - inta->pdev->id, + inta->ti_sci_id, vint_desc->vint_id, event_desc->global_event, event_desc->vint_bit); @@ -547,21 +549,21 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) return ret; } - ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id", &pdev->id); + ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id", &inta->ti_sci_id); if (ret) { dev_err(dev, "missing 'ti,sci-dev-id' property\n"); return -EINVAL; } - inta->vint = devm_ti_sci_get_of_resource(inta->sci, dev, pdev->id, + inta->vint = devm_ti_sci_get_of_resource(inta->sci, dev, inta->ti_sci_id, "ti,sci-rm-range-vint"); if (IS_ERR(inta->vint)) { dev_err(dev, "VINT resource allocation failed\n"); return PTR_ERR(inta->vint); } - inta->global_event = devm_ti_sci_get_of_resource(inta->sci, dev, pdev->id, - "ti,sci-rm-range-global-event"); + inta->global_event = devm_ti_sci_get_of_resource(inta->sci, dev, inta->ti_sci_id, + "ti,sci-rm-range-global-event"); if (IS_ERR(inta->global_event)) { dev_err(dev, "Global event resource allocation failed\n"); return PTR_ERR(inta->global_event); -- cgit From 5c4b585d29102c7e6a6217112bbf1be774795cd7 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:23 +0530 Subject: irqchip/ti-sci-inta: Add support for INTA directly connecting to GIC Driver assumes that Interrupt parent to Interrupt Aggregator is always Interrupt router. This is not true always and GIC can be a parent to Interrupt Aggregator. Update the driver to detect the parent and request the parent irqs accordingly. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200806074826.24607-11-lokeshvutla@ti.com --- drivers/irqchip/irq-ti-sci-inta.c | 87 +++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index fbfa1f4f521e..d4e97605456b 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -130,6 +131,37 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc) chained_irq_exit(irq_desc_get_chip(desc), desc); } +/** + * ti_sci_inta_xlate_irq() - Translate hwirq to parent's hwirq. + * @inta: IRQ domain corresponding to Interrupt Aggregator + * @irq: Hardware irq corresponding to the above irq domain + * + * Return parent irq number if translation is available else -ENOENT. + */ +static int ti_sci_inta_xlate_irq(struct ti_sci_inta_irq_domain *inta, + u16 vint_id) +{ + struct device_node *np = dev_of_node(&inta->pdev->dev); + u32 base, parent_base, size; + const __be32 *range; + int len; + + range = of_get_property(np, "ti,interrupt-ranges", &len); + if (!range) + return vint_id; + + for (len /= sizeof(*range); len >= 3; len -= 3) { + base = be32_to_cpu(*range++); + parent_base = be32_to_cpu(*range++); + size = be32_to_cpu(*range++); + + if (base <= vint_id && vint_id < base + size) + return vint_id - base + parent_base; + } + + return -ENOENT; +} + /** * ti_sci_inta_alloc_parent_irq() - Allocate parent irq to Interrupt aggregator * @domain: IRQ domain corresponding to Interrupt Aggregator @@ -141,30 +173,52 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom struct ti_sci_inta_irq_domain *inta = domain->host_data; struct ti_sci_inta_vint_desc *vint_desc; struct irq_fwspec parent_fwspec; + struct device_node *parent_node; unsigned int parent_virq; - u16 vint_id; + u16 vint_id, p_hwirq; + int ret; vint_id = ti_sci_get_free_resource(inta->vint); if (vint_id == TI_SCI_RESOURCE_NULL) return ERR_PTR(-EINVAL); + p_hwirq = ti_sci_inta_xlate_irq(inta, vint_id); + if (p_hwirq < 0) { + ret = p_hwirq; + goto free_vint; + } + vint_desc = kzalloc(sizeof(*vint_desc), GFP_KERNEL); - if (!vint_desc) - return ERR_PTR(-ENOMEM); + if (!vint_desc) { + ret = -ENOMEM; + goto free_vint; + } vint_desc->domain = domain; vint_desc->vint_id = vint_id; INIT_LIST_HEAD(&vint_desc->list); - parent_fwspec.fwnode = of_node_to_fwnode(of_irq_find_parent(dev_of_node(&inta->pdev->dev))); - parent_fwspec.param_count = 2; - parent_fwspec.param[0] = inta->ti_sci_id; - parent_fwspec.param[1] = vint_desc->vint_id; + parent_node = of_irq_find_parent(dev_of_node(&inta->pdev->dev)); + parent_fwspec.fwnode = of_node_to_fwnode(parent_node); + + if (of_device_is_compatible(parent_node, "arm,gic-v3")) { + /* Parent is GIC */ + parent_fwspec.param_count = 3; + parent_fwspec.param[0] = 0; + parent_fwspec.param[1] = p_hwirq - 32; + parent_fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH; + } else { + /* Parent is Interrupt Router */ + parent_fwspec.param_count = 1; + parent_fwspec.param[0] = p_hwirq; + } parent_virq = irq_create_fwspec_mapping(&parent_fwspec); if (parent_virq == 0) { - kfree(vint_desc); - return ERR_PTR(-EINVAL); + dev_err(&inta->pdev->dev, "Parent IRQ allocation failed\n"); + ret = -EINVAL; + goto free_vint_desc; + } vint_desc->parent_virq = parent_virq; @@ -173,6 +227,11 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom ti_sci_inta_irq_handler, vint_desc); return vint_desc; +free_vint_desc: + kfree(vint_desc); +free_vint: + ti_sci_release_resource(inta->vint, vint_id); + return ERR_PTR(ret); } /** @@ -555,15 +614,15 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) return -EINVAL; } - inta->vint = devm_ti_sci_get_of_resource(inta->sci, dev, inta->ti_sci_id, - "ti,sci-rm-range-vint"); + inta->vint = devm_ti_sci_get_resource(inta->sci, dev, inta->ti_sci_id, + TI_SCI_RESASG_SUBTYPE_IA_VINT); if (IS_ERR(inta->vint)) { dev_err(dev, "VINT resource allocation failed\n"); return PTR_ERR(inta->vint); } - inta->global_event = devm_ti_sci_get_of_resource(inta->sci, dev, inta->ti_sci_id, - "ti,sci-rm-range-global-event"); + inta->global_event = devm_ti_sci_get_resource(inta->sci, dev, inta->ti_sci_id, + TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT); if (IS_ERR(inta->global_event)) { dev_err(dev, "Global event resource allocation failed\n"); return PTR_ERR(inta->global_event); @@ -594,6 +653,8 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) INIT_LIST_HEAD(&inta->vint_list); mutex_init(&inta->vint_mutex); + dev_info(dev, "Interrupt Aggregator domain %d created\n", pdev->id); + return 0; } -- cgit From 8d523f096da53598c271b5b69fcacb48779884a3 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:24 +0530 Subject: arm64: dts: k3-j721e: ti-sci-inta/intr: Update to latest bindings Update the INTA and INTR dt nodes to the latest DT bindings. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-12-lokeshvutla@ti.com --- .../boot/dts/ti/k3-j721e-common-proc-board.dts | 10 ++--- arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 43 +++++++++++----------- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 11 +++--- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts index 8bc1e6ecc50e..e8fc01d97ada 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts @@ -287,7 +287,7 @@ }; &mailbox0_cluster0 { - interrupts = <214 0>; + interrupts = <436>; mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 { ti,mbox-rx = <0 0 0>; @@ -301,7 +301,7 @@ }; &mailbox0_cluster1 { - interrupts = <215 0>; + interrupts = <432>; mbox_main_r5fss0_core0: mbox-main-r5fss0-core0 { ti,mbox-rx = <0 0 0>; @@ -315,7 +315,7 @@ }; &mailbox0_cluster2 { - interrupts = <216 0>; + interrupts = <428>; mbox_main_r5fss1_core0: mbox-main-r5fss1-core0 { ti,mbox-rx = <0 0 0>; @@ -329,7 +329,7 @@ }; &mailbox0_cluster3 { - interrupts = <217 0>; + interrupts = <424>; mbox_c66_0: mbox-c66-0 { ti,mbox-rx = <0 0 0>; @@ -343,7 +343,7 @@ }; &mailbox0_cluster4 { - interrupts = <218 0>; + interrupts = <420>; mbox_c71_0: mbox-c71-0 { ti,mbox-rx = <0 0 0>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index d14060207f00..12ceea9b3c9a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -80,10 +80,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <8 392 56>; }; main_navss { @@ -101,10 +101,12 @@ ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; - ti,sci-rm-range-girq = <0>, <2>; + ti,sci-dev-id = <213>; + ti,interrupt-ranges = <0 64 64>, + <64 448 64>, + <128 672 64>; }; main_udmass_inta: interrupt-controller@33d00000 { @@ -115,8 +117,7 @@ msi-controller; ti,sci = <&dmsc>; ti,sci-dev-id = <209>; - ti,sci-rm-range-vint = <0xa>; - ti,sci-rm-range-global-event = <0xd>; + ti,interrupt-ranges = <0 0 256>; }; secure_proxy_main: mailbox@32c00000 { @@ -296,7 +297,7 @@ reg-names = "cpts"; clocks = <&k3_clks 201 1>; clock-names = "cpts"; - interrupts-extended = <&main_navss_intr 201 0>; + interrupts-extended = <&main_navss_intr 391>; interrupt-names = "cpts"; ti,cpts-periodic-outputs = <6>; ti,cpts-ext-ts-inputs = <8>; @@ -688,8 +689,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <105 0>, <105 1>, <105 2>, <105 3>, - <105 4>, <105 5>, <105 6>, <105 7>; + interrupts = <256>, <257>, <258>, <259>, + <260>, <261>, <262>, <263>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -705,7 +706,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <106 0>, <106 1>, <106 2>; + interrupts = <288>, <289>, <290>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -721,8 +722,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <107 0>, <107 1>, <107 2>, <107 3>, - <107 4>, <107 5>, <107 6>, <107 7>; + interrupts = <264>, <265>, <266>, <267>, + <268>, <269>, <270>, <271>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -738,7 +739,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <108 0>, <108 1>, <108 2>; + interrupts = <292>, <293>, <294>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -754,8 +755,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <109 0>, <109 1>, <109 2>, <109 3>, - <109 4>, <109 5>, <109 6>, <109 7>; + interrupts = <272>, <273>, <274>, <275>, + <276>, <277>, <278>, <279>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -771,7 +772,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <110 0>, <110 1>, <110 2>; + interrupts = <296>, <297>, <298>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -787,8 +788,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <111 0>, <111 1>, <111 2>, <111 3>, - <111 4>, <111 5>, <111 6>, <111 7>; + interrupts = <280>, <281>, <282>, <283>, + <284>, <285>, <286>, <287>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -804,7 +805,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <112 0>, <112 1>, <112 2>; + interrupts = <300>, <301>, <302>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index 30a735bcd0c8..e00cf2a08e6d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -101,9 +101,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; + ti,sci-dev-id = <137>; + ti,interrupt-ranges = <16 960 16>; ti,sci-rm-range-girq = <0x5>; }; @@ -113,8 +114,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&wkup_gpio_intr>; - interrupts = <113 0>, <113 1>, <113 2>, - <113 3>, <113 4>, <113 5>; + interrupts = <103>, <104>, <105>, <106>, <107>, <108>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <84>; @@ -130,8 +130,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&wkup_gpio_intr>; - interrupts = <114 0>, <114 1>, <114 2>, - <114 3>, <114 4>, <114 5>; + interrupts = <112>, <113>, <114>, <115>, <116>, <117>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <84>; -- cgit From fef845122f6c3c92178e4e4a1f85cce84dca06fe Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:25 +0530 Subject: arm64: dts: k3-am65: ti-sci-inta/intr: Update to latest bindings Update the INTA and INTR dt nodes to the latest DT bindings. Signed-off-by: Lokesh Vutla Signed-off-by: Suman Anna Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-13-lokeshvutla@ti.com --- arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 24 +++++++++++------------- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 8 ++++---- arch/arm64/boot/dts/ti/k3-am654-base-board.dts | 4 ++-- arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 1 - 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 9edfae5944f7..996a82ff983e 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -417,10 +417,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <100>; + ti,interrupt-ranges = <0 392 32>; }; main_navss { @@ -438,10 +438,11 @@ ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x0>, <0x2>; + ti,sci-dev-id = <182>; + ti,interrupt-ranges = <0 64 64>, + <64 448 64>; }; inta_main_udmass: interrupt-controller@33d00000 { @@ -452,8 +453,7 @@ msi-controller; ti,sci = <&dmsc>; ti,sci-dev-id = <179>; - ti,sci-rm-range-vint = <0x0>; - ti,sci-rm-range-global-event = <0x1>; + ti,interrupt-ranges = <0 0 256>; }; secure_proxy_main: mailbox@32c00000 { @@ -622,7 +622,7 @@ reg-names = "cpts"; clocks = <&main_cpts_mux>; clock-names = "cpts"; - interrupts-extended = <&intr_main_navss 163 0>; + interrupts-extended = <&intr_main_navss 391>; interrupt-names = "cpts"; ti,cpts-periodic-outputs = <6>; ti,cpts-ext-ts-inputs = <8>; @@ -645,8 +645,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_main_gpio>; - interrupts = <57 256>, <57 257>, <57 258>, <57 259>, <57 260>, - <57 261>; + interrupts = <192>, <193>, <194>, <195>, <196>, <197>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <96>; @@ -661,8 +660,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_main_gpio>; - interrupts = <58 256>, <58 257>, <58 258>, <58 259>, <58 260>, - <58 261>; + interrupts = <200>, <201>, <202>, <203>, <204>, <205>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <90>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 5f55b9e82cf1..a1ffe88d9664 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -74,10 +74,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x4>; + ti,sci-dev-id = <156>; + ti,interrupt-ranges = <0 712 16>; }; wkup_gpio0: wkup_gpio0@42110000 { @@ -86,7 +86,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_wkup_gpio>; - interrupts = <59 128>, <59 129>, <59 130>, <59 131>; + interrupts = <60>, <61>, <62>, <63>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <56>; diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 611e66207010..b8a8a0fcb8af 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -384,7 +384,7 @@ }; &mailbox0_cluster0 { - interrupts = <164 0>; + interrupts = <436>; mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 { ti,mbox-tx = <1 0 0>; @@ -393,7 +393,7 @@ }; &mailbox0_cluster1 { - interrupts = <165 0>; + interrupts = <432>; mbox_mcu_r5fss0_core1: mbox-mcu-r5fss0-core1 { ti,mbox-tx = <1 0 0>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index e00cf2a08e6d..c4a48e8d420a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -105,7 +105,6 @@ ti,sci = <&dmsc>; ti,sci-dev-id = <137>; ti,interrupt-ranges = <16 960 16>; - ti,sci-rm-range-girq = <0x5>; }; wkup_gpio0: gpio@42110000 { -- cgit From 6da45875fa174d9db238ea0d6846061e68b41b6d Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:26 +0530 Subject: arm64: dts: k3-am65: Update the RM resource types Update the ringacc and udma dt nodes to use the latest RM resource types similar to the ones used in k3-j721e dt nodes. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-14-lokeshvutla@ti.com --- arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 12 ++++++------ arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 996a82ff983e..24ef18fe77df 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -589,7 +589,7 @@ <0x0 0x33000000 0x0 0x40000>; reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target"; ti,num-rings = <818>; - ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */ + ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */ ti,dma-ring-reset-quirk; ti,sci = <&dmsc>; ti,sci-dev-id = <187>; @@ -609,11 +609,11 @@ ti,sci-dev-id = <188>; ti,ringacc = <&ringacc>; - ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */ - <0x2>; /* TX_CHAN */ - ti,sci-rm-range-rchan = <0x4>, /* RX_HCHAN */ - <0x5>; /* RX_CHAN */ - ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */ + ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */ + <0xd>; /* TX_CHAN */ + ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */ + <0xa>; /* RX_CHAN */ + ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */ }; cpts@310d0000 { diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 8c1abcfe0860..51ca4b4d4c21 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -134,7 +134,7 @@ <0x0 0x2a500000 0x0 0x40000>; reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target"; ti,num-rings = <286>; - ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */ + ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */ ti,dma-ring-reset-quirk; ti,sci = <&dmsc>; ti,sci-dev-id = <195>; @@ -154,11 +154,11 @@ ti,sci-dev-id = <194>; ti,ringacc = <&mcu_ringacc>; - ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */ - <0x2>; /* TX_CHAN */ - ti,sci-rm-range-rchan = <0x3>, /* RX_HCHAN */ - <0x4>; /* RX_CHAN */ - ti,sci-rm-range-rflow = <0x5>; /* GP RFLOW */ + ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */ + <0xd>; /* TX_CHAN */ + ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */ + <0xa>; /* RX_CHAN */ + ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */ }; }; -- cgit From 7828a3ef8646fb2e69ed45616c8453a037ca7867 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 6 Aug 2020 10:57:45 +0100 Subject: irqchip: Fix probing deferal when using IRQCHIP_PLATFORM_DRIVER helpers When probing an interrupt controller that is behind a parent, we try to check whether the parent domain is available as an indication that we can actually try to probe. Unfortunately, we are checking this with the firmware node of the about to be probed device, not the parent. This is obviously bound to fail. Instead, use the parent node. Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros") Reported-by: John Stultz Tested-by: John Stultz Signed-off-by: Marc Zyngier --- drivers/irqchip/irqchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c index 1bb0e36c2bf3..d2341153e181 100644 --- a/drivers/irqchip/irqchip.c +++ b/drivers/irqchip/irqchip.c @@ -52,7 +52,7 @@ int platform_irqchip_probe(struct platform_device *pdev) * interrupt controller. The actual initialization callback of this * interrupt controller can check for specific domains as necessary. */ - if (par_np && !irq_find_matching_host(np, DOMAIN_BUS_ANY)) + if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) return -EPROBE_DEFER; return irq_init_cb(np, par_np); -- cgit From a150dac5a8fb711fdc378c23f44bee4546f04246 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Aug 2020 10:38:39 +0100 Subject: irqchip: Revert modular support for drivers using IRQCHIP_PLATFORM_DRIVER helperse It has become obvious that switching a number of irqchip drivers to being platform drivers without considering the platform was a mistake. We have multiple reports of end-point drivers not probing because the irqchip driver isn't there yet, breaking the expectations of the users. This patch reverts: 920ecb8c35cb ("irqchip/mtk-cirq: Convert to a platform driver") f97dbf48ca43 ("irqchip/mtk-sysirq: Convert to a platform driver") 5be57099d445 ("irqchip/qcom-pdc: Switch to using IRQCHIP_PLATFORM_DRIVER helper macros") 95bf9305d2e3 ("irqchip/qcom-pdc: Allow QCOM_PDC to be loadable as a permanent module") and leave QCOM PDC, MTK sysrq and cirq drivers as built-in, special purpose drivers for the time being until we have worked out a better solution. Reported-by: Enric Balletbo i Serra Reported-by: Frank Wunderlich Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/93debe6a0308b66d3f307af67ba7ec2c@kernel.org --- drivers/irqchip/Kconfig | 2 +- drivers/irqchip/irq-mtk-cirq.c | 4 +--- drivers/irqchip/irq-mtk-sysirq.c | 4 +--- drivers/irqchip/qcom-pdc.c | 8 +------- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index bb70b7177f94..bfc9719dbcdc 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -425,7 +425,7 @@ config GOLDFISH_PIC for Goldfish based virtual platforms. config QCOM_PDC - tristate "QCOM PDC" + bool "QCOM PDC" depends on ARCH_QCOM select IRQ_DOMAIN_HIERARCHY help diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c index 62a61275aaa3..69ba8ce3c178 100644 --- a/drivers/irqchip/irq-mtk-cirq.c +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -295,6 +295,4 @@ out_free: return ret; } -IRQCHIP_PLATFORM_DRIVER_BEGIN(mtk_cirq) -IRQCHIP_MATCH("mediatek,mtk-cirq", mtk_cirq_of_init) -IRQCHIP_PLATFORM_DRIVER_END(mtk_cirq) +IRQCHIP_DECLARE(mtk_cirq, "mediatek,mtk-cirq", mtk_cirq_of_init); diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c index 7299c5ab4d10..6ff98b87e5c0 100644 --- a/drivers/irqchip/irq-mtk-sysirq.c +++ b/drivers/irqchip/irq-mtk-sysirq.c @@ -231,6 +231,4 @@ out_free_chip: kfree(chip_data); return ret; } -IRQCHIP_PLATFORM_DRIVER_BEGIN(mtk_sysirq) -IRQCHIP_MATCH("mediatek,mt6577-sysirq", mtk_sysirq_of_init) -IRQCHIP_PLATFORM_DRIVER_END(mtk_sysirq) +IRQCHIP_DECLARE(mtk_sysirq, "mediatek,mt6577-sysirq", mtk_sysirq_of_init); diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index c1c5dfad57cc..6ae9e1f0819d 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -11,11 +11,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -432,8 +430,4 @@ fail: return ret; } -IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_pdc) -IRQCHIP_MATCH("qcom,pdc", qcom_pdc_init) -IRQCHIP_PLATFORM_DRIVER_END(qcom_pdc) -MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Power Domain Controller"); -MODULE_LICENSE("GPL v2"); +IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init); -- cgit From e579076ac0a3bebb440fab101aef3c42c9f4c709 Mon Sep 17 00:00:00 2001 From: qiuguorui1 Date: Thu, 20 Aug 2020 11:16:29 +0800 Subject: irqchip/stm32-exti: Avoid losing interrupts due to clearing pending bits by mistake In the current code, when the eoi callback of the exti clears the pending bit of the current interrupt, it will first read the values of fpr and rpr, then logically OR the corresponding bit of the interrupt number, and finally write back to fpr and rpr. We found through experiments that if two exti interrupts, we call them int1/int2, arrive almost at the same time. in our scenario, the time difference is 30 microseconds, assuming int1 is triggered first. there will be an extreme scenario: both int's pending bit are set to 1, the irq handle of int1 is executed first, and eoi handle is then executed, at this moment, all pending bits are cleared, but the int 2 has not finally been reported to the cpu yet, which eventually lost int2. According to stm32's TRM description about rpr and fpr: Writing a 1 to this bit will trigger a rising edge event on event x, Writing 0 has no effect. Therefore, when clearing the pending bit, we only need to clear the pending bit of the irq. Fixes: 927abfc4461e7 ("irqchip/stm32: Add stm32mp1 support with hierarchy domain") Signed-off-by: qiuguorui1 Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org # v4.18+ Link: https://lore.kernel.org/r/20200820031629.15582-1-qiuguorui1@huawei.com --- drivers/irqchip/irq-stm32-exti.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 03a36be757d8..0c2c61db26b4 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -416,6 +416,16 @@ static void stm32_irq_ack(struct irq_data *d) irq_gc_unlock(gc); } +/* directly set the target bit without reading first. */ +static inline void stm32_exti_write_bit(struct irq_data *d, u32 reg) +{ + struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d); + void __iomem *base = chip_data->host_data->base; + u32 val = BIT(d->hwirq % IRQS_PER_BANK); + + writel_relaxed(val, base + reg); +} + static inline u32 stm32_exti_set_bit(struct irq_data *d, u32 reg) { struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d); @@ -449,9 +459,9 @@ static void stm32_exti_h_eoi(struct irq_data *d) raw_spin_lock(&chip_data->rlock); - stm32_exti_set_bit(d, stm32_bank->rpr_ofst); + stm32_exti_write_bit(d, stm32_bank->rpr_ofst); if (stm32_bank->fpr_ofst != UNDEF_REG) - stm32_exti_set_bit(d, stm32_bank->fpr_ofst); + stm32_exti_write_bit(d, stm32_bank->fpr_ofst); raw_spin_unlock(&chip_data->rlock); -- cgit From 821fc9e261f3af235752f46e59084467cfd440c4 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 19 Aug 2020 20:06:02 +0200 Subject: irqchip/ingenic: Leave parent IRQ unmasked on suspend All the wakeup sources we possibly want will go through the interrupt controller, so the parent IRQ must not be masked during suspend, or there won't be any way to wake up the system. Signed-off-by: Paul Cercueil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200819180602.136969-1-paul@crapouillou.net --- drivers/irqchip/irq-ingenic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c index 9f3da4260ca6..b61a8901ef72 100644 --- a/drivers/irqchip/irq-ingenic.c +++ b/drivers/irqchip/irq-ingenic.c @@ -125,7 +125,7 @@ static int __init ingenic_intc_of_init(struct device_node *node, irq_reg_writel(gc, IRQ_MSK(32), JZ_REG_INTC_SET_MASK); } - if (request_irq(parent_irq, intc_cascade, 0, + if (request_irq(parent_irq, intc_cascade, IRQF_NO_SUSPEND, "SoC intc cascade interrupt", NULL)) pr_err("Failed to register SoC intc cascade interrupt\n"); return 0; -- cgit