From c20c6704bf2dafaba0d90c8310ef9e919fe4d2e2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 16 Nov 2017 04:36:51 +0000 Subject: ASoC: rcar: revert IOMMU support so far commit 4821d914fe74 ("ASoC: rsnd: use dma_sync_single_for_xxx() for IOMMU") had supported IOMMU, but it breaks normal sound "recorde" and both PulseAudio's "playback/recorde". The sound will be noisy. That commit was using dma_sync_single_for_xxx(), and driver should make sure memory is protected during CPU or Device are using it. But if driver returns current "residue" data size correctly on pointer function, player/recorder will access to protected memory. IOMMU feature should be supported, but I don't know how to handle it without memory cache problem at this point. Thus, this patch simply revert it to avoid current noisy sound. Tested-by: Hiroyuki Yokoyama Tested-by: Ryo Kodama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 4 +-- sound/soc/sh/rcar/dma.c | 86 ++++-------------------------------------------- 2 files changed, 8 insertions(+), 82 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index c70eb2097816..f12a88a21dfa 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd) return snd_pcm_lib_preallocate_pages_for_all( rtd->pcm, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), + SNDRV_DMA_TYPE_DEV, + rtd->card->snd_card->dev, PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index fd557abfe390..4d750bdf8e24 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -26,10 +26,7 @@ struct rsnd_dmaen { struct dma_chan *chan; dma_cookie_t cookie; - dma_addr_t dma_buf; unsigned int dma_len; - unsigned int dma_period; - unsigned int dma_cnt; }; struct rsnd_dmapp { @@ -71,38 +68,10 @@ static struct rsnd_mod mem = { /* * Audio DMAC */ -#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1) -#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0) -static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io, - int i, int sync) -{ - struct device *dev = dmaen->chan->device->dev; - enum dma_data_direction dir; - int is_play = rsnd_io_is_play(io); - dma_addr_t buf; - int len, max; - size_t period; - - len = dmaen->dma_len; - period = dmaen->dma_period; - max = len / period; - i = i % max; - buf = dmaen->dma_buf + (period * i); - - dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (sync) - dma_sync_single_for_device(dev, buf, period, dir); - else - dma_sync_single_for_cpu(dev, buf, period, dir); -} - static void __rsnd_dmaen_complete(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct rsnd_dma *dma = rsnd_mod_to_dma(mod); - struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); bool elapsed = false; unsigned long flags; @@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod, */ spin_lock_irqsave(&priv->lock, flags); - if (rsnd_io_is_working(io)) { - rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt); - - /* - * Next period is already started. - * Let's sync Next Next period - * see - * rsnd_dmaen_start() - */ - rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2); - + if (rsnd_io_is_working(io)) elapsed = true; - dmaen->dma_cnt++; - } - spin_unlock_irqrestore(&priv->lock, flags); if (elapsed) @@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); - if (dmaen->chan) { - int is_play = rsnd_io_is_play(io); - + if (dmaen->chan) dmaengine_terminate_all(dmaen->chan); - dma_unmap_single(dmaen->chan->device->dev, - dmaen->dma_buf, dmaen->dma_len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - } return 0; } @@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, struct device *dev = rsnd_priv_to_dev(priv); struct dma_async_tx_descriptor *desc; struct dma_slave_config cfg = {}; - dma_addr_t buf; - size_t len; - size_t period; int is_play = rsnd_io_is_play(io); - int i; int ret; cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; @@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, if (ret < 0) return ret; - len = snd_pcm_lib_buffer_bytes(substream); - period = snd_pcm_lib_period_bytes(substream); - buf = dma_map_single(dmaen->chan->device->dev, - substream->runtime->dma_area, - len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (dma_mapping_error(dmaen->chan->device->dev, buf)) { - dev_err(dev, "dma map failed\n"); - return -EIO; - } - desc = dmaengine_prep_dma_cyclic(dmaen->chan, - buf, len, period, + substream->runtime->dma_addr, + snd_pcm_lib_buffer_bytes(substream), + snd_pcm_lib_period_bytes(substream), is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, desc->callback = rsnd_dmaen_complete; desc->callback_param = rsnd_mod_get(dma); - dmaen->dma_buf = buf; - dmaen->dma_len = len; - dmaen->dma_period = period; - dmaen->dma_cnt = 0; - - /* - * synchronize this and next period - * see - * __rsnd_dmaen_complete() - */ - for (i = 0; i < 2; i++) - rsnd_dmaen_sync(dmaen, io, i); + dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream); dmaen->cookie = dmaengine_submit(desc); if (dmaen->cookie < 0) { -- cgit From 4c761ebfcb2d04ee36783c4c8c45ae00caf59d36 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Fri, 3 Nov 2017 19:15:02 +0530 Subject: ASoC: Intel: kbl: Modify map for Headset Playback to fix pop-noise Patch fixes wrong path in commit 0b06122fc8d0 ("ASoC: Intel: kbl: Add map for new DAIs for Multi-Playback & Echo Ref") which resulted in pop noise. Current topology for Headset results in unwanted pop noise, while switching from spk->hs at the start of Headset Playback. Hence re-introduced mixin-mixout dsp module in topology for headset playback pipe to fix the regression. And the corresponding modification for headset route is updated here. Fixes: 0b06122fc8d0 ("ASoC: Intel: kbl: Add map for new DAIs for Multi-Playback & Echo Ref") Signed-off-by: Naveen Manohar Signed-off-by: Sathya Prakash M R Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_max98927.c | 2 +- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 6f9a8bcf20f3..6dcad0a8a0d0 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 6072164f2d43..271ae3c2c535 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, -- cgit From bc6476d6c1edcb9b97621b5131bd169aa81f27db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:55 +0100 Subject: ASoC: da7218: fix fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed. Fixes: 4d50934abd22 ("ASoC: da7218: Add da7218 codec driver") Signed-off-by: Johan Hovold Acked-by: Adam Thomson Signed-off-by: Mark Brown Cc: stable --- sound/soc/codecs/da7218.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index b2d42ec1dcd9..56564ce90cb6 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; -- cgit From 15f8c5f2415bfac73f33a14bcd83422bcbfb5298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:56 +0100 Subject: ASoC: twl4030: fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed, while the child node was leaked. Fixes: 2d6d649a2e0f ("ASoC: twl4030: Support for DT booted kernel") Signed-off-by: Johan Hovold Signed-off-by: Mark Brown Cc: stable --- sound/soc/codecs/twl4030.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c482b2e7a7d2..cfe72b9d4356 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; -- cgit From 56986b07d17b4a19416e248aaca9367c241a824b Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 22 Nov 2017 13:59:19 +0800 Subject: ASoC: rt5645: reset RT5645_AD_DA_MIXER at probe RT5645_AD_DA_MIXER (0x29) register will not be reset to default after SW reset. So we have to write it to its default value in i2c_probe. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 5f24df4fae8e..fcd02c2c76f1 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, regmap_read(regmap, RT5645_VENDOR_ID, &val); rt5645->v_id = val & 0xff; + regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080); + ret = regmap_register_patch(rt5645->regmap, init_list, ARRAY_SIZE(init_list)); if (ret != 0) -- cgit From 254beff97b4714bac4ec8add5a6888c1adc1ad8f Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Fri, 24 Nov 2017 16:11:22 +0800 Subject: ASoC: rt5514: Make sure the DMIC delay will be happened after normal SUPPLY widgets power on The patch makes sure the DMIC delay will be happened after normal SUPPLY widgets power on. If there are some platforms that provide the MCLK using the SUPPLY widget, it will make sure the delay time is helpful. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2a5b5d74e697..2dd6e9f990a4 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = { SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0, rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, -- cgit From fdaa451107ce543d345a339b4d5e20e8e4bac396 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Nov 2017 20:27:56 -0800 Subject: ASoC: amd: Add error checking to probe function The acp_audio_dma does not perform sufficient error checking in its probe function. This can result in crashes if a critical error path is encountered. Fixes: 7c31335a03b6a ("ASoC: AMD: add AMD ASoC ACP 2.x DMA driver") Cc: Alex Deucher Cc: Dominik Behr Cc: Daniel Kurtz Signed-off-by: Guenter Roeck Reviewed-by: Alex Deucher Signed-off-by: Mark Brown --- sound/soc/amd/acp-pcm-dma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 9f521a55d610..b5e41df6bb3a 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev) struct resource *res; const u32 *pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + return -ENODEV; + } + audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), GFP_KERNEL); if (audio_drv_data == NULL) @@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(audio_drv_data->acp_mmio)) + return PTR_ERR(audio_drv_data->acp_mmio); /* The following members gets populated in device 'open' * function. Till then interrupts are disabled in 'acp_init' -- cgit From 695b78b548d8a26288f041e907ff17758df9e1d5 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:14:55 +0100 Subject: ASoC: fsl_ssi: AC'97 ops need regmap, clock and cleaning up on failure AC'97 ops (register read / write) need SSI regmap and clock, so they have to be set after them. We also need to set these ops back to NULL if we fail the probe. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/fsl/fsl_ssi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index f2f51e06e22c..c3a83ed0297e 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1458,12 +1458,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1574,6 +1568,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1657,6 +1659,10 @@ error_sound_card: fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); -- cgit From b880b8056b31288323745a13930bc45cf4c86e9d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:16:07 +0100 Subject: ASoC: fsl_ssi: serialize AC'97 register access operations AC'97 register access operations (both read and write) on SSI use a one, shared set of SSI registers for AC'97 register address and data. This means that only one such access is possible at a time and so all these operations need to be serialized. Since an AC'97 register access operation in this driver takes 100us+ let's use a mutex for this. Use this opportunity to also change a default value returned from AC'97 register read function from -1 to 0, since that's what AC'97 specs require to be returned when unknown / undefined registers are read. Signed-off-by: Maciej S. Szmigiero Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index c3a83ed0297e..424bafaf51ef 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,8 @@ struct fsl_ssi_private { u32 fifo_watermark; u32 dma_maxburst; + + struct mutex ac97_reg_lock; }; /* @@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, if (reg > 0x7f) return; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 write clk_prepare_enable failed: %d\n", ret); - return; + goto ret_unlock; } lreg = reg << 12; @@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); + +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); } static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, @@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, { struct regmap *regs = fsl_ac97_data->regs; - unsigned short val = -1; + unsigned short val = 0; u32 reg_val; unsigned int lreg; int ret; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 read clk_prepare_enable failed: %d\n", ret); - return -1; + goto ret_unlock; } lreg = (reg & 0x7f) << 12; @@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, clk_disable_unprepare(fsl_ac97_data->clk); +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); return val; } @@ -1569,6 +1581,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) } if (fsl_ssi_is_ac97(ssi_private)) { + mutex_init(&ssi_private->ac97_reg_lock); ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); if (ret) { dev_err(&pdev->dev, "could not set AC'97 ops\n"); @@ -1663,6 +1676,9 @@ error_asoc_register: snd_soc_set_ac97_ops(NULL); error_ac97_ops: + if (fsl_ssi_is_ac97(ssi_private)) + mutex_destroy(&ssi_private->ac97_reg_lock); + if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); @@ -1681,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev) if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi_private)) { snd_soc_set_ac97_ops(NULL); + mutex_destroy(&ssi_private->ac97_reg_lock); + } return 0; } -- cgit From 346cccf88319344c9f513bd85df6ae2258e8a8ea Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Mon, 20 Nov 2017 18:23:19 +0800 Subject: ASoC: rt5514: Add the sanity check for the driver_data in the resume function If the rt5514 spi driver is loaded, but the snd_soc_platform_driver is not loaded by the correct DAI settings, the NULL pointer will be gotten by snd_soc_platform_get_drvdata in the resume function. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514-spi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 2df91db765ac..ca6a90d8fc39 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev) if (device_may_wakeup(dev)) disable_irq_wake(irq); - if (rt5514_dsp->substream) { - rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); - if (buf[0] & RT5514_IRQ_STATUS_BIT) - rt5514_schedule_copy(rt5514_dsp); + if (rt5514_dsp) { + if (rt5514_dsp->substream) { + rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, + sizeof(buf)); + if (buf[0] & RT5514_IRQ_STATUS_BIT) + rt5514_schedule_copy(rt5514_dsp); + } } return 0; -- cgit From d3b0535216f04e7e149eaebe8e967c46bdf88dc3 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Fri, 17 Nov 2017 15:09:27 +0000 Subject: ASoC: da7219: Correct IRQ level in DT binding example Current DT binding documentation shows an example where the IRQ for the device is chosen to be ACTIVE_HIGH. This is incorrect as the device only supports ACTIVE_LOW, so this commit fixes that discrepancy. Signed-off-by: Adam Thomson Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/da7219.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt index cf61681826b6..5b54d2d045c3 100644 --- a/Documentation/devicetree/bindings/sound/da7219.txt +++ b/Documentation/devicetree/bindings/sound/da7219.txt @@ -77,7 +77,7 @@ Example: reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; VDD-supply = <®_audio>; VDDMIC-supply = <®_audio>; -- cgit From b7926c464d6479fc62a4297ca4f48a5da5fb0988 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Fri, 17 Nov 2017 15:09:28 +0000 Subject: ASoC: da7218: Correct IRQ level in DT binding example Current DT binding documentation shows an example where the IRQ for the device is chosen to be ACTIVE_HIGH. This is incorrect as the device only supports ACTIVE_LOW, so this commit fixes that discrepancy. Signed-off-by: Adam Thomson Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/da7218.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt index 5ca5a709b6aa..3ab9dfef38d1 100644 --- a/Documentation/devicetree/bindings/sound/da7218.txt +++ b/Documentation/devicetree/bindings/sound/da7218.txt @@ -73,7 +73,7 @@ Example: compatible = "dlg,da7218"; reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; wakeup-source; VDD-supply = <®_audio>; -- cgit From a91d7fb97092d6b840af5899ded3b389603fd7f1 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 28 Nov 2017 16:05:13 +0900 Subject: ASoC: rsnd: ssiu: clear SSI_MODE for non TDM Extended modes register SSI_MODE is set when SSI works in TDM Extended, but it isn't reset when SSI starts to work in other modes, thus causes issues. This patch clearss SSI_MODE register when SSI works in modes other than TDM Extended. Fixes: 186fadc132f0 ("ASoC: rsnd: add TDM Extend Mode support") Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssiu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300..6ff8a36c2c82 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, { int hdmi = rsnd_ssi_hdmi_port(io); int ret; + u32 mode = 0; ret = rsnd_ssiu_init(mod, io, priv); if (ret < 0) @@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, * see * rsnd_ssi_config_init() */ - rsnd_mod_write(mod, SSI_MODE, 0x1); + mode = 0x1; } + rsnd_mod_write(mod, SSI_MODE, mode); + if (rsnd_ssi_use_busif(io)) { rsnd_mod_write(mod, SSI_BUSIF_ADINR, rsnd_get_adinr_bit(mod, io) | -- cgit From 6a53b7593233ab9e4f96873ebacc0f653a55c3e1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 27 Nov 2017 11:15:16 -0800 Subject: xfrm: check id proto in validate_tmpl() syzbot reported a kernel warning in xfrm_state_fini(), which indicates that we have entries left in the list net->xfrm.state_all whose proto is zero. And xfrm_id_proto_match() doesn't consider them as a match with IPSEC_PROTO_ANY in this case. Proto with value 0 is probably not a valid value, at least verify_newsa_info() doesn't consider it valid either. This patch fixes it by checking the proto value in validate_tmpl() and rejecting invalid ones, like what iproute2 does in xfrm_xfrmproto_getbyname(). Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 983b0233767b..c2cfcc6fdb34 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1445,6 +1445,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) default: return -EINVAL; } + + switch (ut[i].id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + case IPSEC_PROTO_ANY: + break; + default: + return -EINVAL; + } + } return 0; -- cgit From b89b6925bb9d48926d7ba713d3f13b14fc35c544 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 16 Nov 2017 11:55:18 -0800 Subject: ASoC: fsl_asrc: Fix typo in a field define ASRFSTi_IAEi has an 11-bit offset as its _SHIFT macro defines. So this patch just fixes that. Reported-by: Laurent Charpentier Signed-off-by: Nicolin Chen Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h index 0f163abe4ba3..52c27a358933 100644 --- a/sound/soc/fsl/fsl_asrc.h +++ b/sound/soc/fsl/fsl_asrc.h @@ -260,8 +260,8 @@ #define ASRFSTi_OUTPUT_FIFO_SHIFT 12 #define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) #define ASRFSTi_IAEi_SHIFT 11 -#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) -#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) +#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT) +#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT) #define ASRFSTi_INPUT_FIFO_WIDTH 7 #define ASRFSTi_INPUT_FIFO_SHIFT 0 #define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) -- cgit From 51f493ae71adc2c49a317a13c38e54e1cdf46005 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 30 Nov 2017 10:15:02 +0000 Subject: ASoC: codecs: msm8916-wcd: Fix supported formats This codec is configurable for only 16 bit and 32 bit samples, so reflect this in the supported formats also remove 24bit sample from supported list. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/msm8916-wcd-analog.c | 2 +- sound/soc/codecs/msm8916-wcd-digital.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 5f3c42c4f74a..066ea2f4ce7b 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -267,7 +267,7 @@ #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index a10a724eb448..13354d6304a8 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -194,7 +194,7 @@ SNDRV_PCM_RATE_32000 | \ SNDRV_PCM_RATE_48000) #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) struct msm8916_wcd_digital_priv { struct clk *ahbclk, *mclk; @@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream, RX_I2S_CTL_RX_I2S_MODE_MASK, RX_I2S_CTL_RX_I2S_MODE_16); break; - case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S32_LE: snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, TX_I2S_CTL_TX_I2S_MODE_MASK, TX_I2S_CTL_TX_I2S_MODE_32); -- cgit From 737e0b7b67bdfe24090fab2852044bb283282fc5 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:46 -0600 Subject: ASoC: tlv320aic31xx: Fix GPIO1 register definition GPIO1 control register is number 51, fix this here. Fixes: bafcbfe429eb ("ASoC: tlv320aic31xx: Make the register values human readable") Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/tlv320aic31xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 730fb2058869..1ff3edb7bbb6 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -116,7 +116,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ -- cgit From e719135881f00c01ca400abb8a5dadaf297a24f9 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Wed, 29 Nov 2017 18:23:56 +0100 Subject: xfrm: fix XFRMA_OUTPUT_MARK policy entry This seems to be an obvious typo, NLA_U32 is type of the attribute, not its (minimal) length. Fixes: 077fbac405bf ("net: xfrm: support setting an output mark.") Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c2cfcc6fdb34..ff58c37469d6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2485,7 +2485,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, - [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, + [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { -- cgit From 4ce3dbe397d7b6b15f272ae757c78c35e9e4b61d Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 28 Nov 2017 19:55:40 +0200 Subject: xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0) Code path when (encap_type < 0) does not verify the state is valid before progressing. This will result in a crash if, for instance, x->km.state == XFRM_STATE_ACQ. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Aviv Heller Signed-off-by: Yevgeny Kliteynik Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 347ab31574d5..da6447389ffb 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -207,7 +207,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) xfrm_address_t *daddr; struct xfrm_mode *inner_mode; u32 mark = skb->mark; - unsigned int family; + unsigned int family = AF_UNSPEC; int decaps = 0; int async = 0; bool xfrm_gro = false; @@ -216,6 +216,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { x = xfrm_input_state(skb); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + if (x->km.state == XFRM_STATE_ACQ) + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + else + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINSTATEINVALID); + goto drop; + } + family = x->outer_mode->afinfo->family; /* An encap_type of -1 indicates async resumption. */ -- cgit From ddc47e4404b58f03e98345398fb12d38fe291512 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Nov 2017 06:53:55 +0100 Subject: xfrm: Fix stack-out-of-bounds read on socket policy lookup. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by catching address family missmatches of the policy and the flow already before we do the lookup. Reported-by: syzbot Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9542975eb2f9..038ec68f6901 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, family); + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; -- cgit From adf6895754e2503d994a765535fd1813f8834674 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Nov 2017 19:42:52 -0800 Subject: acpi, nfit: fix health event notification Integration testing with a BIOS that generates injected health event notifications fails to communicate those events to userspace. The nfit driver neglects to link the ACPI DIMM device with the necessary driver data so acpi_nvdimm_notify() fails this lookup: nfit_mem = dev_get_drvdata(dev); if (nfit_mem && nfit_mem->flags_attr) sysfs_notify_dirent(nfit_mem->flags_attr); Add the necessary linkage when installing the notification handler and clean it up when the nfit driver instance is torn down. Cc: Cc: Toshi Kani Cc: Vishal Verma Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Reported-by: Daniel Osawa Tested-by: Daniel Osawa Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ff2580e7611d..abeb4df4f22e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } -- cgit From 588fb54b0cc5be5fd2e12bb04810534ffc3d49cc Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 30 Nov 2017 13:14:51 +0100 Subject: clk: Manage proper runtime PM state in clk_change_rate() clk_change_rate() propagates rate change down to all its children. Such operation requires managing proper runtime PM state of each child, what was missing. Add needed calls to clk_pm_runtime*() to ensure that set_rate() clock callback is called on runtime active clock. This fixes following issue found on Exynos5433 TM2 board with devfreq enabled: Synchronous External Abort: synchronous external abort (0x96000210) at 0xffffff80093f5600 Internal error: : 96000210 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 5 Comm: kworker/u16:0 Not tainted 4.15.0-rc1-next-20171129+ #4 Hardware name: Samsung TM2 board (DT) Workqueue: devfreq_wq devfreq_monitor task: ffffffc0ca96b600 task.stack: ffffff80093a8000 pstate: a0000085 (NzCv daIf -PAN -UAO) pc : clk_divider_set_rate+0x54/0x118 lr : clk_divider_set_rate+0x44/0x118 ... Process kworker/u16:0 (pid: 5, stack limit = 0xffffff80093a8000) Call trace: clk_divider_set_rate+0x54/0x118 clk_change_rate+0xfc/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_core_set_rate_nolock+0x138/0x148 clk_set_rate+0x28/0x50 exynos_bus_passive_target+0x6c/0x11c update_devfreq_passive+0x58/0xb4 devfreq_passive_notifier_call+0x50/0x5c notifier_call_chain+0x4c/0x88 __srcu_notifier_call_chain+0x54/0x80 srcu_notifier_call_chain+0x14/0x1c update_devfreq+0x100/0x1b4 devfreq_monitor+0x2c/0x88 process_one_work+0x148/0x3d8 worker_thread+0x13c/0x3f8 kthread+0x100/0x12c ret_from_fork+0x10/0x18 Reported-by: Chanwoo Choi Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Marek Szyprowski Reviewed-by: Ulf Hansson Tested-by: Chanwoo Choi Reviewed-by: Chanwoo Choi Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 647d056df88c..8a1860a36c77 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core) best_parent_rate = core->parent->rate; } + if (clk_pm_runtime_get(core)) + return; + if (core->flags & CLK_SET_RATE_UNGATE) { unsigned long flags; @@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core) /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); + + clk_pm_runtime_put(core); } static int clk_core_set_rate_nolock(struct clk_core *core, -- cgit From 975b820b6836b6b6c42fb84cd2e772e2b41bca67 Mon Sep 17 00:00:00 2001 From: Cai Li Date: Tue, 21 Nov 2017 17:24:38 +0800 Subject: clk: fix a panic error caused by accessing NULL pointer In some cases the clock parent would be set NULL when doing re-parent, it will cause a NULL pointer accessing if clk_set trace event is enabled. This patch sets the parent as "none" if the input parameter is NULL. Fixes: dfc202ead312 (clk: Add tracepoints for hardware operations) Signed-off-by: Cai Li Signed-off-by: Chunyan Zhang Signed-off-by: Stephen Boyd --- include/trace/events/clk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h index 758607226bfd..2cd449328aee 100644 --- a/include/trace/events/clk.h +++ b/include/trace/events/clk.h @@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent, TP_STRUCT__entry( __string( name, core->name ) - __string( pname, parent->name ) + __string( pname, parent ? parent->name : "none" ) ), TP_fast_assign( __assign_str(name, core->name); - __assign_str(pname, parent->name); + __assign_str(pname, parent ? parent->name : "none"); ), TP_printk("%s %s", __get_str(name), __get_str(pname)) -- cgit From c7b92172a61b91936be985cb9bc499a4ebc6489b Mon Sep 17 00:00:00 2001 From: Stefan Potyra Date: Wed, 6 Dec 2017 16:03:24 +0100 Subject: ASoC: rockchip: disable clock on error Disable the clocks in rk_spdif_probe when an error occurs after one of the clocks has been enabled previously. Found by Linux Driver Verification project (linuxtesting.org). Fixes: f874b80e1571 ASoC: rockchip: Add rockchip SPDIF transceiver driver Signed-off-by: Stefan Potyra Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_spdif.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c index ee5055d47d13..a89fe9b6463b 100644 --- a/sound/soc/rockchip/rockchip_spdif.c +++ b/sound/soc/rockchip/rockchip_spdif.c @@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev) spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(spdif->mclk)) { dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); - return PTR_ERR(spdif->mclk); + ret = PTR_ERR(spdif->mclk); + goto err_disable_hclk; } ret = clk_prepare_enable(spdif->mclk); if (ret) { dev_err(spdif->dev, "clock enable failed %d\n", ret); - return ret; + goto err_disable_clocks; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_disable_clocks; + } spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, &rk_spdif_regmap_config); if (IS_ERR(spdif->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(spdif->regmap); + ret = PTR_ERR(spdif->regmap); + goto err_disable_clocks; } spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; @@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev) err_pm_runtime: pm_runtime_disable(&pdev->dev); +err_disable_clocks: + clk_disable_unprepare(spdif->mclk); +err_disable_hclk: + clk_disable_unprepare(spdif->hclk); return ret; } -- cgit From e02b03303f13b6a571f01b4d84b69440696d2dde Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Wed, 6 Dec 2017 16:34:04 +0530 Subject: ASoC: Intel: Skylake: Do not check dev_type for dmic link type Some BIOS have inconsistent dev_type value for DMIC link type. Since there is only one device type for DMIC link type, remove device type check if link type is NHLT_LINK_DMIC. Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-nhlt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index d14c50a60289..3eaac41090ca 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, if ((epnt->virtual_bus_id == instance_id) && (epnt->linktype == link_type) && - (epnt->direction == dirn) && - (epnt->device_type == dev_type)) - return true; - else - return false; + (epnt->direction == dirn)) { + /* do not check dev_type for DMIC link type */ + if (epnt->linktype == NHLT_LINK_DMIC) + return true; + + if (epnt->device_type == dev_type) + return true; + } + + return false; } struct nhlt_specific_cfg -- cgit From 75bf50f4aaa1c78d769d854ab3d975884909e4fb Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Thu, 7 Dec 2017 21:54:27 +0100 Subject: xfrm: fix xfrm_do_migrate() with AEAD e.g(AES-GCM) copy geniv when cloning the xfrm state. x->geniv was not copied to the new state and migration would fail. xfrm_do_migrate .. xfrm_state_clone() .. .. esp_init_aead() crypto_alloc_aead() crypto_alloc_tfm() crypto_find_alg() return EAGAIN and failed Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1f5cee2269af..88d0a563e141 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1344,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, if (orig->aead) { x->aead = xfrm_algo_aead_clone(orig->aead); + x->geniv = orig->geniv; if (!x->aead) goto error; } -- cgit From 732706afe1cc46ef48493b3d2b69c98f36314ae4 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 8 Dec 2017 08:07:25 +0100 Subject: xfrm: Fix stack-out-of-bounds with misconfigured transport mode policies. On policies with a transport mode template, we pass the addresses from the flowi to xfrm_state_find(), assuming that the IP addresses (and address family) don't change during transformation. Unfortunately our policy template validation is not strict enough. It is possible to configure policies with transport mode template where the address family of the template does not match the selectors address family. This lead to stack-out-of-bound reads because we compare arddesses of the wrong family. Fix this by refusing such a configuration, address family can not change on transport mode. We use the assumption that, on transport mode, the first templates address family must match the address family of the policy selector. Subsequent transport mode templates must mach the address family of the previous template. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ff58c37469d6..bdb48e5dba04 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) { + u16 prev_family; int i; if (nr > XFRM_MAX_DEPTH) return -EINVAL; + prev_family = family; + for (i = 0; i < nr; i++) { /* We never validated the ut->family value, so many * applications simply leave it at zero. The check was @@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; + if ((ut[i].mode == XFRM_MODE_TRANSPORT) && + (ut[i].family != prev_family)) + return -EINVAL; + + prev_family = ut[i].family; + switch (ut[i].family) { case AF_INET: break; -- cgit From 50dd2ea8ef67a1617e0c0658bcbec4b9fb03b936 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Dec 2017 16:15:20 +0000 Subject: ASoC: wm_adsp: Fix validation of firmware and coeff lengths The checks for whether another region/block header could be present are subtracting the size from the current offset. Obviously we should instead subtract the offset from the size. The checks for whether the region/block data fit in the file are adding the data size to the current offset and header size, without checking for integer overflow. Rearrange these so that overflow is impossible. Signed-off-by: Ben Hutchings Acked-by: Charles Keepax Tested-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 65c059b5ffd7..66e32f5d2917 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, -- cgit From 0f0be40ba59c2d5fdfea48e3ff93f6165d616440 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 8 Dec 2017 15:18:53 +0100 Subject: ASoC: atmel-classd: select correct Kconfig symbol SND_ATMEL_SOC_CLASSD selects SND_ATMEL_SOC_DMA but the driver itself handles its own DMA operations and doesn't need anything from atmel-pcm-dma.c or atmel_ssc_dai.c. Replace SND_ATMEL_SOC_DMA by SND_SOC_GENERIC_DMAENGINE_PCM which is the only one actually required. This may end up in a configuration leading to a link error: sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_set_audio': atmel_ssc_dai.c:(.text+0x79c): undefined reference to `atmel_pcm_dma_platform_register' atmel_ssc_dai.c:(.text+0x79c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `atmel_pcm_dma_platform_register' sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_put_audio': atmel_ssc_dai.c:(.text+0xf24): undefined reference to `atmel_pcm_dma_platform_unregister' atmel_ssc_dai.c:(.text+0xf24): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `atmel_pcm_dma_platform_unregister' Tested on sama5d2 xplained with the following configuration where nothing selects SND_ATMEL_SOC_DMA: CONFIG_SND_ATMEL_SOC=y CONFIG_SND_ATMEL_SOC_CLASSD=y Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Fixes: e0a25b6d1862 ("ASoC: atmel-classd: add the Audio Class D Amplifier") Signed-off-by: Alexandre Belloni Signed-off-by: Mark Brown --- sound/soc/atmel/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..dcee145dd179 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731 config SND_ATMEL_SOC_CLASSD tristate "Atmel ASoC driver for boards using CLASSD" depends on ARCH_AT91 || COMPILE_TEST - select SND_ATMEL_SOC_DMA + select SND_SOC_GENERIC_DMAENGINE_PCM select REGMAP_MMIO help Say Y if you want to add support for Atmel ASoC driver for boards using -- cgit From 4362934a75ff2a399fd0bcd75937907115770020 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Fri, 8 Dec 2017 09:30:18 +0530 Subject: ASoC: Intel: Change kern log level to avoid unwanted messages patch suppresses the warning message "control load not supported" as this is a debug information to help debug issues in topology. Signed-off-by: Naveen Manohar Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index a072bcf209d2..81923da18ac2 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt, break; default: - dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", + dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n", hdr->ops.get, hdr->ops.put, hdr->ops.info); break; } -- cgit From 33f801366bdf3f8b67dfe325b84f4051a090d01e Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Thu, 7 Dec 2017 22:15:38 -0800 Subject: ASoC: rsnd: ssi: fix race condition in rsnd_ssi_pointer_update Currently there is race condition between set of byte_pos and wrap it around when new buffer starts. If .pointer is called in-between it will result in inconsistent pointer position be returned from .pointer callback. This patch increments buffer pointer atomically to avoid this issue. Signed-off-by: Jiada Wang Reviewed-by: Takashi Sakamoto Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssi.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index fece1e5f582f..cbf3bf312d23 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, int byte) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + bool ret = false; + int byte_pos; - ssi->byte_pos += byte; + byte_pos = ssi->byte_pos + byte; - if (ssi->byte_pos >= ssi->next_period_byte) { + if (byte_pos >= ssi->next_period_byte) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); ssi->period_pos++; ssi->next_period_byte += ssi->byte_per_period; if (ssi->period_pos >= runtime->periods) { - ssi->byte_pos = 0; + byte_pos = 0; ssi->period_pos = 0; ssi->next_period_byte = ssi->byte_per_period; } - return true; + ret = true; } - return false; + WRITE_ONCE(ssi->byte_pos, byte_pos); + + return ret; } /* @@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod, struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - *pointer = bytes_to_frames(runtime, ssi->byte_pos); + *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos)); return 0; } -- cgit From f5f00e7dcc4161f07b76ff1a854e8b1ea7a1ed41 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Tue, 5 Dec 2017 14:45:32 +0800 Subject: drm/i915/gvt: Fix pipe A enable as default for vgpu observed igt drv_module_reload test case failure on 4.15.0 rc2 kernel with panic due to no active pipe available. the gpu will reset during unload/load and make pipe config reg lost which can cause kernel panic issue happen. this patch is to move pipe enabling to emulate_mointor_status_chagne to handle vgpu reset case as well. Fixes: 7e6059020894 ("drm/i915/gvt: enabled pipe A default on creating vgpu") Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 355120865efd..309f3fa6794a 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); - vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; + return 0; } -- cgit From 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 01:18:57 -0800 Subject: crypto: skcipher - set walk.iv for zero-length inputs All the ChaCha20 algorithms as well as the ARM bit-sliced AES-XTS algorithms call skcipher_walk_virt(), then access the IV (walk.iv) before checking whether any bytes need to be processed (walk.nbytes). But if the input is empty, then skcipher_walk_virt() doesn't set the IV, and the algorithms crash trying to use the uninitialized IV pointer. Fix it by setting the IV earlier in skcipher_walk_virt(). Also fix it for the AEAD walk functions. This isn't a perfect solution because we can't actually align the IV to ->cra_alignmask unless there are bytes to process, for one because the temporary buffer for the aligned IV is freed by skcipher_walk_done(), which is only called when there are bytes to process. Thus, algorithms that require aligned IVs will still need to avoid accessing the IV when walk.nbytes == 0. Still, many algorithms/architectures are fine with IVs having any alignment, and even for those that aren't, a misaligned pointer bug is much less severe than an uninitialized pointer bug. This change also matches the behavior of the older blkcipher_walk API. Fixes: 0cabf2af6f5a ("crypto: skcipher - Fix crash on zero-length input") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/skcipher.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 778e0ff42bfa..11af5fd6a443 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, walk->total = req->cryptlen; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->iv = req->iv; - walk->oiv = req->iv; - walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; @@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, int err; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); - walk->iv = req->iv; - walk->oiv = req->iv; - if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else -- cgit From 11edb555966ed2c66c533d17c604f9d7e580a829 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 29 Nov 2017 12:02:23 +0100 Subject: crypto: af_alg - wait for data at beginning of recvmsg The wait for data is a non-atomic operation that can sleep and therefore potentially release the socket lock. The release of the socket lock allows another thread to modify the context data structure. The waiting operation for new data therefore must be called at the beginning of recvmsg. This prevents a race condition where checks of the members of the context data structure are performed by recvmsg while there is a potential for modification of these values. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 6 ------ crypto/algif_aead.c | 6 ++++++ crypto/algif_skcipher.c | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 358749c38894..f1a2caf1b59b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1137,12 +1137,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, if (!af_alg_readable(sk)) break; - if (!ctx->used) { - err = af_alg_wait_for_data(sk, flags); - if (err) - return err; - } - seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 805f485ddf1b..c8a32bef208a 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t usedpages = 0; /* [in] RX bufs to be used from user */ size_t processed = 0; /* [in] TX bufs to be consumed */ + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* * Data length provided by caller via sendmsg/sendpage that has not * yet been processed. diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 30cff827dd8f..6fb595cd63ac 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, int err = 0; size_t len = 0; + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + crypto_skcipher_reqsize(tfm)); -- cgit From 9abffc6f2efe46c3564c04312e52e07622d40e51 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 30 Nov 2017 13:39:27 +0100 Subject: crypto: mcryptd - protect the per-CPU queue with a lock mcryptd_enqueue_request() grabs the per-CPU queue struct and protects access to it with disabled preemption. Then it schedules a worker on the same CPU. The worker in mcryptd_queue_worker() guards access to the same per-CPU variable with disabled preemption. If we take CPU-hotplug into account then it is possible that between queue_work_on() and the actual invocation of the worker the CPU goes down and the worker will be scheduled on _another_ CPU. And here the preempt_disable() protection does not work anymore. The easiest thing is to add a spin_lock() to guard access to the list. Another detail: mcryptd_queue_worker() is not processing more than MCRYPTD_BATCH invocation in a row. If there are still items left, then it will invoke queue_work() to proceed with more later. *I* would suggest to simply drop that check because it does not use a system workqueue and the workqueue is already marked as "CPU_INTENSIVE". And if preemption is required then the scheduler should do it. However if queue_work() is used then the work item is marked as CPU unbound. That means it will try to run on the local CPU but it may run on another CPU as well. Especially with CONFIG_DEBUG_WQ_FORCE_RR_CPU=y. Again, the preempt_disable() won't work here but lock which was introduced will help. In order to keep work-item on the local CPU (and avoid RR) I changed it to queue_work_on(). Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu --- crypto/mcryptd.c | 23 ++++++++++------------- include/crypto/mcryptd.h | 1 + 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 4e6472658852..eca04d3729b3 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index cceafa01f907..b67404fc4b34 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; -- cgit From d53c5135792319e095bb126bc43b2ee98586f7fe Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 8 Dec 2017 11:50:37 +0100 Subject: crypto: af_alg - fix race accessing cipher request When invoking an asynchronous cipher operation, the invocation of the callback may be performed before the subsequent operations in the initial code path are invoked. The callback deletes the cipher request data structure which implies that after the invocation of the asynchronous cipher operation, this data structure must not be accessed any more. The setting of the return code size with the request data structure must therefore be moved before the invocation of the asynchronous cipher operation. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Stephan Mueller Acked-by: Jonathan Cameron Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 10 +++++----- crypto/algif_skcipher.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index c8a32bef208a..b73db2b27656 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -291,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = outlen; + aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); @@ -298,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, crypto_aead_decrypt(&areq->cra_u.aead_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = outlen; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6fb595cd63ac..baef9bfccdda 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -125,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = len; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, af_alg_async_cb, areq); @@ -133,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = len; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { -- cgit From d2950278d2d04ff5314abeb38d9c59c4e7c0ee53 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Dec 2017 18:23:09 +0100 Subject: xfrm: put policies when reusing pcpu xdst entry We need to put the policies when re-using the pcpu xdst entry, else this leaks the reference. Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 038ec68f6901..70aa5cb0c659 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1839,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + xfrm_pols_put(pols, num_pols); while (err > 0) xfrm_state_put(xfrm[--err]); return xdst; -- cgit From d2b3c353595a855794f8b9df5b5bdbe8deb0c413 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 4 Dec 2017 12:11:02 +0300 Subject: pinctrl: cherryview: Mask all interrupts on Intel_Strago based systems Guenter Roeck reported an interrupt storm on a prototype system which is based on Cyan Chromebook. The root cause turned out to be a incorrectly configured pin that triggers spurious interrupts. This will be fixed in coreboot but currently we need to prevent the interrupt storm from happening by masking all interrupts (but not GPEs) on those systems. Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953 Fixes: bcb48cca23ec ("pinctrl: cherryview: Do not mask all interrupts in probe") Reported-and-tested-by: Guenter Roeck Reported-by: Dmitry Torokhov Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index bdedb6325c72..4471fd94e1fe 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq.valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); -- cgit From a782fc8cc6bf6909daf3b65630079e2afec316ef Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 1 Dec 2017 18:21:34 +0800 Subject: drm/ttm: fix incorrect calculate on shrink_pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shrink_pages is in unit of Order after ttm_page_pool_free, but it is used by nr_free in next round so need change it into native page unit Signed-off-by: Monk Liu Reviewed-by: Roger He Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 44343a2bf55c..71945ccaf012 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed += (nr_free_pool - shrink_pages) << pool->order; if (freed >= sc->nr_to_scan) break; + shrink_pages <<= pool->order; } mutex_unlock(&lock); return freed; -- cgit From 13d3fc69a03721d972460fe2bff9b479f7999221 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 1 Dec 2017 18:23:56 +0800 Subject: drm/ttm: max_cpages is in unit of native page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix calculation. Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 71945ccaf012..b5ba6441489f 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -544,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, int r = 0; unsigned i, j, cpages; unsigned npages = 1 << order; - unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); + unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC); /* allocate array for page caching change */ caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); -- cgit From 0507f438ea19d4280006467ba02956f6a693deca Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 23 Nov 2017 18:38:59 +0800 Subject: drm/amdgpu: fix MAP_QUEUES paramter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should be 0. Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da43813d67a4..5aeb5f8816f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); -- cgit From f41d84dddc66b164ac16acf3f584c276146f1c48 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 12 Dec 2017 17:59:15 +0530 Subject: powerpc/perf: Dereference BHRB entries safely It's theoretically possible that branch instructions recorded in BHRB (Branch History Rolling Buffer) entries have already been unmapped before they are processed by the kernel. Hence, trying to dereference such memory location will result in a crash. eg: Unable to handle kernel paging request for data at address 0xd000000019c41764 Faulting instruction address: 0xc000000000084a14 NIP [c000000000084a14] branch_target+0x4/0x70 LR [c0000000000eb828] record_and_restart+0x568/0x5c0 Call Trace: [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable) [c0000000000ec378] perf_event_interrupt+0x298/0x460 [c000000000027964] performance_monitor_exception+0x54/0x70 [c000000000009ba4] performance_monitor_common+0x114/0x120 Fix it by deferefencing the addresses safely. Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB") Cc: stable@vger.kernel.org # v3.10+ Suggested-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Reviewed-by: Naveen N. Rao [mpe: Use probe_kernel_read() which is clearer, tweak change log] Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 153812966365..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); -- cgit From ad2b6e01024ef23bddc3ce0bcb115ecd8c520b7e Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Tue, 5 Dec 2017 11:00:38 +0530 Subject: powerpc/perf/imc: Fix nest-imc cpuhotplug callback failure Oops is observed during boot: Faulting instruction address: 0xc000000000248340 cpu 0x0: Vector: 380 (Data Access Out of Range) at [c000000ff66fb850] pc: c000000000248340: event_function_call+0x50/0x1f0 lr: c00000000024878c: perf_remove_from_context+0x3c/0x100 sp: c000000ff66fbad0 msr: 9000000000009033 dar: 7d20e2a6f92d03c0 pid = 14, comm = cpuhp/0 While registering the cpuhotplug callbacks for nest-imc, if we fail in the cpuhotplug online path for any random node in a multi node system (because the opal call to stop nest-imc counters fails for that node), ppc_nest_imc_cpu_offline() will get invoked for other nodes who successfully returned from cpuhotplug online path. This call trace is generated since in the ppc_nest_imc_cpu_offline() path we are trying to migrate the event context, when nest-imc counters are not even initialized. Patch to add a check to ensure that nest-imc is registered before migrating the event context. Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..f1b940714d65 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) return 0; + /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. -- cgit From 110df8bd3e418b3476cae80babe8add48a8ea523 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 7 Dec 2017 22:53:27 +0530 Subject: powerpc/perf: Fix kfree memory allocated for nest pmus imc_common_cpuhp_mem_free() is the common function for all IMC (In-memory Collection counters) domains to unregister cpuhotplug callback and free memory. Since kfree of memory allocated for nest-imc (per_nest_pmu_arr) is in the common code, all domains (core/nest/thread) can do the kfree in the failure case. This could potentially create a call trace as shown below, where core(/thread/nest) imc pmu initialization fails and in the failure path imc_common_cpuhp_mem_free() free the memory(per_nest_pmu_arr), which is allocated by successfully registered nest units. The call trace is generated in a scenario where core-imc initialization is made to fail and a cpuhotplug is performed in a p9 system. During cpuhotplug ppc_nest_imc_cpu_offline() tries to access per_nest_pmu_arr, which is already freed by core-imc. NIP [c000000000cb6a94] mutex_lock+0x34/0x90 LR [c000000000cb6a88] mutex_lock+0x28/0x90 Call Trace: mutex_lock+0x28/0x90 (unreliable) perf_pmu_migrate_context+0x90/0x3a0 ppc_nest_imc_cpu_offline+0x190/0x1f0 cpuhp_invoke_callback+0x160/0x820 cpuhp_thread_fun+0x1bc/0x270 smpboot_thread_fn+0x250/0x290 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x74 To address this scenario do the kfree(per_nest_pmu_arr) only in case of nest-imc initialization failure, and when there is no other nest units registered. Fixes: 73ce9aec65b1 ("powerpc/perf: Fix IMC_MAX_PMU macro") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index f1b940714d65..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1184,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1208,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1322,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } -- cgit From 2797c4a11f373b2545c2398ccb02e362ee66a142 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Dec 2017 13:25:13 +0000 Subject: drm/i915: Flush pending GTT writes before unbinding From the shrinker paths, we want to relinquish the GPU and GGTT access to the object, releasing the backing storage back to the system for swapout. As a part of that process we would unpin the pages, marking them for access by the CPU (for the swapout/swapin). However, if that process was interrupted after unbind the vma, we missed a flush of the inflight GGTT writes before we made that GTT space available again for reuse, with the prospect that we would redirect them to another page. The bug dates back to the introduction of multiple GGTT vma, but the code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()"). Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()") Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171204132513.7303-1-chris@chris-wilson.co.uk (cherry picked from commit 5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad4050f7ab3b..18de6569d04a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { -- cgit From 2b3a2e9f400acff4a4a9a2316e3e13b36b76b0e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Dec 2017 22:00:25 +0000 Subject: drm/i915: Drop fb reference on load_detect_pipe failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When intel_modeset_setup_plane_state() fails drop the local framebuffer reference before jumping to the error, otherwise we leak the framebuffer. Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Fixes: edde361711ef ("drm/i915: Use atomic state to obtain load detection crtc, v3.") Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171207220025.22698-1-chris@chris-wilson.co.uk (cherry picked from commit 3e72be177cf19ab3d62b3084d424dce7e71d847f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e8ccf89cb17b..ff9397030092 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9944,11 +9944,10 @@ found: } ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + drm_framebuffer_put(fb); if (ret) goto fail; - drm_framebuffer_put(fb); - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; -- cgit From 74c7b0782b15bc2478f557cea34b3fe34d452dc6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 12:10:33 +0000 Subject: drm/i915: Stop listening to request resubmission from the signaler kthread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent here was that we would be listening to i915_gem_request_unsubmit in order to cancel the signaler quickly and release the reference on the request. Cancelling the signaler is done directly via intel_engine_cancel_signaling (called from unsubmit), but that does not directly wake up the signaling thread, and neither does setting the request->global_seqno back to zero wake up listeners to the request->execute waitqueue. So the only time that listening to the request->execute waitqueue would wake up the signaling kthread would be on the request resubmission, during which time we would already receive wake ups from rejoining the global breadcrumbs wait rbtree. Trying to wake up to release the request remains an issue. If the signaling was cancelled and no other request required signaling, then it is possible for us to shutdown with the reference on the request still held. To ensure that we do not try to shutdown, leaking that request, we kick the signaling threads whenever we disarm the breadcrumbs, i.e. on parking the engine when idle. v2: We do need to be sure to release the last reference on stopping the kthread; asserting that it has been dropped already is insufficient. Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution queue") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: MichaÅ‚ Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171208121033.5236-1-chris@chris-wilson.co.uk Acked-by: Daniel Vetter Reviewed-by: Tvrtko Ursulin (cherry picked from commit 776bc27fd8ab67a675cb0041d3af361af5d0e290) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5f8b9f1f40f1..bcbc7abe6693 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n, *first; if (!b->irq_armed) - return; + goto wakeup_signaler; /* We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request @@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); -- cgit From 2cf654db8d7eafb973d28eb3cddf043d353e1345 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 09:48:02 +0000 Subject: drm/i915/fence: Use rcu to defer freeing of irq_work It is illegal to perform an immediate free of the struct irq_work from inside the irq_work callback (as irq_work_run_list modifies work->flags after execution of the work->func()). As we use the irq_work to coordinate the freeing of the callback from two different softirq paths, we need to defer the kfree from inside our irq_work callback, for which we can use kfree_rcu. Fixes: 81c0ed21aa91 ("drm/i915/fence: Avoid del_timer_sync() from inside a timer") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171213094802.28243-1-chris@chris-wilson.co.uk (cherry picked from commit 7d622351c94172a42bfe9b13bdb0fdc2be90ed3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_sw_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index e8ca67a129d2..ac236b88c99c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb { struct dma_fence *dma; struct timer_list timer; struct irq_work work; + struct rcu_head rcu; }; static void timer_i915_sw_fence_wake(struct timer_list *t) @@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) del_timer_sync(&cb->timer); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, -- cgit From 958d022e326810fd762505bd02007aced79ffcbc Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Thu, 14 Dec 2017 09:54:07 +0800 Subject: ASoC: rt5663: Fix the wrong result of the first jack detection In the first jack detection while booting, the result will always show as headset, even we insert the headphone. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5663.c | 4 ++++ sound/soc/codecs/rt5663.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index b036c9dc0c8c..d329bf719d80 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert) RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); snd_soc_update_bits(codec, RT5663_IRQ_1, RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_RST); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_NOR); while (true) { regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h index c5a9b69579ad..03adc8004ba9 100644 --- a/sound/soc/codecs/rt5663.h +++ b/sound/soc/codecs/rt5663.h @@ -1029,6 +1029,10 @@ #define RT5663_POL_EXT_JD_SHIFT 10 #define RT5663_POL_EXT_JD_EN (0x1 << 10) #define RT5663_POL_EXT_JD_DIS (0x0 << 10) +#define RT5663_EM_JD_MASK (0x1 << 7) +#define RT5663_EM_JD_SHIFT 7 +#define RT5663_EM_JD_NOR (0x1 << 7) +#define RT5663_EM_JD_RST (0x0 << 7) /* DACREF LDO Control (0x0112)*/ #define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) -- cgit From b7b2846fe26f2c0d7f317c874a13d3ecf22670ff Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:02 -0800 Subject: xfs: add the ability to join a held buffer to a defer_ops In certain cases, defer_ops callers will lock a buffer and want to hold the lock across transaction rolls. Similar to ijoined inodes, we want to dirty & join the buffer with each transaction roll in defer_finish so that afterwards the caller still owns the buffer lock and we haven't inadvertently pinned the log. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_defer.c | 39 ++++++++++++++++++++++++++++++++++++--- fs/xfs/libxfs/xfs_defer.h | 5 ++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { -- cgit From 6e643cd094de3bd0f97edcc1db0089afa24d909f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:02 -0800 Subject: xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute The new attribute leaf buffer is not held locked across the transaction roll between the shortform->leaf modification and the addition of the new entry. As a result, the attribute buffer modification being made is not atomic from an operational perspective. Hence the AIL push can grab it in the transient state of "just created" after the initial transaction is rolled, because the buffer has been released. This leads to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating this as in-memory corruption, and shutting down the filesystem. Darrick ported the original patch to 4.15 and reworked it use the xfs_defer_bjoin helper and hold/join the buffer correctly across the second transaction roll. Signed-off-by: Alex Lyakas Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 20 +++++++++++++++----- fs/xfs/libxfs/xfs_attr_leaf.c | 9 ++++++--- fs/xfs/libxfs/xfs_attr_leaf.h | 3 ++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..a76914db72ef 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..601eaa36f1ad 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..894124efb421 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); -- cgit From 8c57b88637d78a723e0854fc3d06c6d4c31a1e0c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:53 -0800 Subject: xfs: account for null transactions in bunmapi In e1a4e37cc7b665 ("xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent"), we try to constrain the amount of real extents we unmap from the data fork in a given call so that we don't blow out transaction reservations. However, not all bunmapi operations require a transaction -- if we're only removing a delalloc extent, no transaction is needed, so we have to code against that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1210f684d3c2..1bddbba6b80c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5136,7 +5136,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; -- cgit From c54854a437a447a6bb1dcb11f60dd01cef3fa597 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:54 -0800 Subject: xfs: move xfs_iext_insert tracepoint to report useful information Move the tracepoint in xfs_iext_insert to after the point where we've inserted the extent because otherwise we report stale extent data in the ftrace output. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_iext_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } -- cgit From 5c989a0ee06eb77a44baffd1779a5dbb9a7e873f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:54 -0800 Subject: xfs: remove dest file's post-eof preallocations before reflinking If we try to reflink into a file with post-eof preallocations at an offset well past the preallocations, we increase i_size as one would expect. However, those allocations do not have page cache backing them, so they won't get cleaned out on their own. This leads to asserts in the collapse/insert range code and xfs_destroy_inode when they encounter delalloc extents they weren't expecting to find. Since there are plenty of other places where we dump those post-eof blocks, do the same to the reflink destination file before we start remapping extents. This was found by adding clonerange support to fsstress and running it in write-only mode. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cf7c8f81bebb..e13f5ad57a03 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1291,6 +1291,17 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* + * Clear out post-eof preallocations because we don't have page cache + * backing the delayed allocations and they'll never get freed on + * their own. + */ + if (xfs_can_free_eofblocks(dest, true)) { + ret = xfs_free_eofblocks(dest); + if (ret) + goto out_unlock; + } + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) -- cgit From 73353f486c9b5b2407ec32be1004174dbbaf6c18 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:55 -0800 Subject: xfs: relax is_reflink_inode assert in xfs_reflink_find_cow_mapping We don't hold the ilock through the entire sequence of xfs_writepage_map -> xfs_map_cow -> xfs_reflink_find_cow_mapping. This means that we can race with another thread that is trying to clear the inode reflink flag, with the result that the flag is set for the xfs_map_cow check but cleared before we get to the assert in find_cow_mapping. When this happens, we blow the assert even though everything is fine. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e13f5ad57a03..99c5852f9fe7 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -490,8 +490,9 @@ xfs_reflink_find_cow_mapping( struct xfs_iext_cursor icur; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_is_reflink_inode(ip)) + return false; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return false; -- cgit From 9d40fba8b2056773b9744a95df9ddd6cc33a4f83 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:55 -0800 Subject: xfs: avoid infinite loop when cancelling CoW blocks after writeback failure When we're cancelling a cow range, we don't always delete each extent that we iterate, so we have to move icur backwards in the list to avoid an infinite loop. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 99c5852f9fe7..6931b0c79cac 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -611,6 +611,9 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + } else { + /* Didn't do anything, push cursor back. */ + xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) -- cgit From a192de265b26c525672884630d5376c405e83b2a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:56 -0800 Subject: xfs: allow CoW remap transactions to use reserve blocks Since we as yet have no way of holding on to the indlen blocks that are reserved as part of CoW fork delalloc reservations, let the CoW remap transaction dip into the reserves so that we avoid failing writes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6931b0c79cac..e49e6db415f7 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -729,7 +729,7 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, 0, &tp); + resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; -- cgit From caea4f384858ee7861367920df36995e7acfe160 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 15 Dec 2017 16:21:50 +0100 Subject: drm/sun4i: validate modes for HDMI When I connected my cubieboard running 4.15-rc1 to my 4k display I got no picture. Some digging found that there is no check against the upper pixelclock limit of the HDMI output, so X selects a 4kp60 format at 594 MHz, which obviously won't work. The patch below adds a check for the upper bound of what this hardware can do, and it checks if the requested tmds clock can be obtained. It also allows for the +/- 0.5% pixel clock variation that the HDMI spec permits. That code is based on commit 22d0be2a557e ("drm: arcpgu: Allow some clock deviation in crtc->mode_valid() callback") from Jose Abreu for drm/arc. Signed-off-by: Hans Verkuil Thanks-to: Jose Abreu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/162854cb-c7bd-d9ce-9fa0-9a6cd89c621b@xs4all.nl --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index dda904ec0534..c12f9bd12904 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -208,8 +208,27 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector) return ret; } +static int sun4i_hdmi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); + long rate = mode->clock * 1000; + long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (max(rounded_rate, rate) - min(rounded_rate, rate) < diff && + rounded_rate > 0) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = { .get_modes = sun4i_hdmi_get_modes, + .mode_valid = sun4i_hdmi_mode_valid, }; static enum drm_connector_status -- cgit From b2162117171864ef48d43cf5d888f3e8012c6c06 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 24 Nov 2017 17:26:28 -0500 Subject: drm/amd/display: add pipe locking before front end programing Add pipe locking/unlocking before we program the front end Signed-off-by: Bhawanpreet Lakha Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/dc/dce110/dce110_hw_sequencer.c | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 07ff8d2faf3f..d844fadcd56f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface( int num_planes, struct dc_state *context) { - int i, be_idx; + int i; if (num_planes == 0) return; - be_idx = -1; for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (stream == context->res_ctx.pipe_ctx[i].stream) { - be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; - break; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (stream == pipe_ctx->stream) { + if (!pipe_ctx->top_pipe && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); } } @@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface( context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); + + dc->hwss.update_plane_addr(dc, pipe_ctx); + program_surface_visibility(dc, pipe_ctx); } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if ((stream == pipe_ctx->stream) && + (!pipe_ctx->top_pipe) && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); + } } static void dce110_power_down_fe(struct dc *dc, int fe_idx) -- cgit From 56a9b95c4d3386a98f69f641dd6018886ed2e9d6 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 13 Nov 2017 17:03:53 -0500 Subject: drm/amd/display: set chroma taps to 1 when not scaling Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 9 ++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..b142629a1058 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -900,6 +900,15 @@ bool dcn_validate_bandwidth( v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; + /* + * Spreadsheet doesn't handle taps_c is one properly, + * need to force Chroma to always be scaled to pass + * bandwidth validation. + */ + if (v->override_hta_pschroma[input_idx] == 1) + v->override_hta_pschroma[input_idx] = 2; + if (v->override_vta_pschroma[input_idx] == 1) + v->override_vta_pschroma[input_idx] = 2; v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; } if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 74e7c82bdc76..a9d55d0dd69e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps( scl_data->taps.h_taps = 1; if (IDENTITY_RATIO(scl_data->ratios.vert)) scl_data->taps.v_taps = 1; - /* - * Spreadsheet doesn't handle taps_c is one properly, - * need to force Chroma to always be scaled to pass - * bandwidth validation. - */ + if (IDENTITY_RATIO(scl_data->ratios.horz_c)) + scl_data->taps.h_taps_c = 1; + if (IDENTITY_RATIO(scl_data->ratios.vert_c)) + scl_data->taps.v_taps_c = 1; } return true; -- cgit From 78288503199d0a33b69b972a44a4cf15df989899 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 10 Nov 2017 10:44:24 -0500 Subject: drm/amd/display: fix missing pixel clock adjustment for dongle Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Reviewed-by: Andrew Jiang Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e27ed4a45265..42a111b9505d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } -bool dp_active_dongle_validate_timing( +static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dc_dongle_caps *dongle_caps) { @@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) required_pix_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + required_pix_clk = required_pix_clk * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: -- cgit From becd0875f4393a992afbf57aa323f7bf1a71c3ff Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 1 Dec 2017 13:26:05 -0500 Subject: drm/amd/display: Fix rehook MST display not light back on Original applied dm_restore_drm_connector_state() has got removed. Set link status to BAD before hotplug() event could trigger another modeset from userspace. The fix "Fix MST daisy chain SST not light up" commit makes so it is trying to create a stream prior to dc_sink. That makes dc_sink is not present in create_stream_for_sink(). Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 + .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 51 ++++++++++++++++++++++ .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.h | 1 + 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f71fe6d2ddda..bb5fa895fb64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct dm_connector_state *dm_state) { struct drm_display_mode *preferred_mode = NULL; - const struct drm_connector *drm_connector; + struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!aconnector->dc_sink) { /* - * Exclude MST from creating fake_sink - * TODO: need to enable MST into fake_sink feature + * Create dc_sink when necessary to MST + * Don't apply fake_sink to MST */ - if (aconnector->mst_port) - goto stream_create_fail; + if (aconnector->mst_port) { + dm_dp_mst_dc_sink_create(drm_connector); + goto mst_dc_sink_create_done; + } if (create_fake_sink(aconnector)) goto stream_create_fail; @@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream_create_fail: dm_state_null: drm_connector_null: +mst_dc_sink_create_done: return stream; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 117521c6a6ed..0230250a1164 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -189,6 +189,8 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; + + bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index f8efb98b1fa7..638c2c2b5cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector, return ret; } +void dm_dp_mst_dc_sink_create(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct edid *edid; + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); + + if (!edid) { + drm_mode_connector_update_edid_property( + &aconnector->base, + NULL); + return; + } + + aconnector->edid = edid; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, + &init_params); + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + + amdgpu_dm_add_sink_to_freesync_module( + connector, aconnector->edid); + + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_set_path_property(connector, pathprop); drm_connector_list_iter_end(&conn_iter); + aconnector->mst_connected = true; return &aconnector->base; } } @@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); + aconnector->mst_connected = true; + DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_update_edid_property( &aconnector->base, NULL); + + aconnector->mst_connected = false; } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } +static void dm_dp_mst_link_status_reset(struct drm_connector *connector) +{ + mutex_lock(&connector->dev->mode_config.mutex); + drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); +} + static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) drm_connector_register(connector); + if (aconnector->mst_connected) + dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 2da851b40042..8cf51da26657 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,5 +31,6 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); +void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif -- cgit From 34112bf4935dabe3c1d1fd42842ed771e279bf61 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 6 Nov 2017 16:20:33 +0100 Subject: drm/nouveau/fbcon: fix NULL pointer access in nouveau_fbcon_destroy When the fbcon object is initialized, but nouveau_fbcon_create is not called, we run into a NULL pointer access within nouveau_fbcon_create when unloading nouveau. The call to drm_fb_helper_funcs.fb_probe is deferred until there is a display for real since 4.14, that's why fbcon->helper.fb is still not set. Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index c533d8e04afc..be7357bf2246 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_unregister_fbi(&fbcon->helper); drm_fb_helper_fini(&fbcon->helper); - if (nouveau_fb->nvbo) { + if (nouveau_fb && nouveau_fb->nvbo) { nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); -- cgit From f60707a69a225f2dd87f42628b44e24ceb219d28 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 10:49:35 +1000 Subject: drm/nouveau/bios/dp: support DP Info Table 2.0 Reported-by: Hans de Goede Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 972370ed36f0..7c7efa4ea0d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) if (data) { *ver = nvbios_rd08(bios, data + 0x00); switch (*ver) { + case 0x20: case 0x21: case 0x30: case 0x40: @@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx, if (data && idx < *cnt) { u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); switch (*ver * !!outp) { + case 0x20: case 0x21: case 0x30: *hdr = nvbios_rd08(bios, data + 0x04); @@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx, info->type = nvbios_rd16(bios, data + 0x00); info->mask = nvbios_rd16(bios, data + 0x02); switch (*ver) { + case 0x20: + info->mask |= 0x00c0; /* match any link */ + /* fall-through */ case 0x21: case 0x30: info->flags = nvbios_rd08(bios, data + 0x05); info->script[0] = nvbios_rd16(bios, data + 0x06); info->script[1] = nvbios_rd16(bios, data + 0x08); - info->lnkcmp = nvbios_rd16(bios, data + 0x0a); + if (*len >= 0x0c) + info->lnkcmp = nvbios_rd16(bios, data + 0x0a); if (*len >= 0x0f) { info->script[2] = nvbios_rd16(bios, data + 0x0c); info->script[3] = nvbios_rd16(bios, data + 0x0e); @@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, memset(info, 0x00, sizeof(*info)); if (data) { switch (*ver) { + case 0x20: case 0x21: info->dc = nvbios_rd08(bios, data + 0x02); info->pe = nvbios_rd08(bios, data + 0x03); -- cgit From 81a24b9ae8eea95b74337c253059da761043ed06 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 11:08:52 +1000 Subject: drm/nouveau/imem/nv50: fix refcount_t warning Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 1ba7289684aa..db48a1daca0c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; - refcount_inc(&iobj->maps); + refcount_set(&iobj->maps, 1); } mutex_unlock(&imem->subdev.mutex); -- cgit From a121027d2747168df0aac0c3da35509eea39f61c Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 24 Nov 2017 03:56:26 +0100 Subject: drm/nouveau/pci: do a msi rearm on init On my GP107 when I load nouveau after unloading it, for some reason the GPU stopped sending or the CPU stopped receiving interrupts if MSI was enabled. Doing a rearm once before getting any interrupts fixes this. Signed-off-by: Karol Herbst Reviewed-by: Thierry Reding Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index b1b1f3626b96..deb96de54b00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev) return ret; pci->irq = pdev->irq; + + /* Ensure MSI interrupts are armed, for the case where there are + * already interrupts pending (for whatever reason) at load time. + */ + if (pci->msi) + pci->func->msi_rearm(pci); + return ret; } -- cgit From 6cb0f2a39d3b7ccdd7269af4ddadb38e78aee744 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 15:04:32 +1000 Subject: drm/nouveau/mmu/gp10b: use correct implementation Reported-by: Mikko Perttunen Fixes: 6359c98224 ("drm/nouveau/mmu/gp10b: fork from gf100") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e14643615698..00eeaaffeae5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2369,7 +2369,7 @@ nv13b_chipset = { .imem = gk20a_instmem_new, .ltc = gp100_ltc_new, .mc = gp10b_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, .pmu = gm20b_pmu_new, .timer = gk20a_timer_new, -- cgit From f29f18eb952bc3e71deedf8bd8fc902f66853c48 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 15:25:14 +1000 Subject: drm/nouveau: avoid GPU page sizes > PAGE_SIZE for buffer objects in host memory While the Tegra (GK20A, GM20B, GP10B) MMUs support large pages in host memory, we're currently lacking IOMMU support for merging system pages into large enough chunks to be mapped as such by the GPU. The core VMM code actually supports automatically determining the best page size to map with, which is intended for these situations, but for various complicated reasons the DRM is currently forcing the page size selection on a per-BO basis. This should fix breakage reported on Tegra GPUs in the meantime, until one or both of the above issues are resolved properly. Reported-by: Mikko Perttunen Fixes: 7dc6a446da7c ("drm/nouveau: improve selection of GPU page size") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2615912430cc..42c1827bbb8e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) continue; - if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + if ((flags & TTM_PL_FLAG_TT) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; /* Select this page size if it's the first that supports -- cgit From 74a39954a4900a7dea7010e3063e2bf16b23934b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 14 Dec 2017 11:19:27 +1000 Subject: drm/nouveau: use alternate memory type for system-memory buffers with kind != 0 Fixes bug on Tegra where we'd strip kind information from system memory (ie. all) buffers, resulting in misrendering. Behaviour on dGPU should be unchanged. Reported-by: Thierry Reding Fixes: d7722134b8 ("drm/nouveau: switch over to new memory and vmm interfaces") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 11 ++++++++-- drivers/gpu/drm/nouveau/nouveau_mem.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_ttm.c | 39 ++++++++++++++++++++++++++--------- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 42c1827bbb8e..435ff8662cfa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, /* Determine if we can get a cache-coherent map, forcing * uncached mapping if we can't. */ - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) nvbo->force_coherent = true; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e86b8220a4bb..6a1b1debe5b8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -156,8 +156,8 @@ struct nouveau_drm { struct nvif_object copy; int mtrr; int type_vram; - int type_host; - int type_ncoh; + int type_host[2]; + int type_ncoh[2]; } ttm; /* GEM interface support */ @@ -216,6 +216,13 @@ nouveau_drm(struct drm_device *dev) return dev->dev_private; } +static inline bool +nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm) +{ + struct nvif_mmu *mmu = &drm->client.mmu; + return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED); +} + int nouveau_pmops_suspend(struct device *); int nouveau_pmops_resume(struct device *); bool nouveau_pmops_runtime(void); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 589a9621db76..c002f8968507 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) u8 type; int ret; - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) - type = drm->ttm.type_ncoh; + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) + type = drm->ttm.type_ncoh[!!mem->kind]; else - type = drm->ttm.type_host; + type = drm->ttm.type_host[0]; if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) mem->comp = mem->kind = 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 08b974b30482..dff51a0ee028 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm) drm->ttm.mem_global_ref.release = NULL; } -int -nouveau_ttm_init(struct nouveau_drm *drm) +static int +nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind) { - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_pci *pci = device->pci; struct nvif_mmu *mmu = &drm->client.mmu; - struct drm_device *dev = drm->dev; - int typei, ret; + int typei; typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | - NVIF_MEM_COHERENT); + kind | NVIF_MEM_COHERENT); if (typei < 0) return -ENOSYS; - drm->ttm.type_host = typei; + drm->ttm.type_host[!!kind] = typei; - typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind); if (typei < 0) return -ENOSYS; - drm->ttm.type_ncoh = typei; + drm->ttm.type_ncoh[!!kind] = typei; + return 0; +} + +int +nouveau_ttm_init(struct nouveau_drm *drm) +{ + struct nvkm_device *device = nvxx_device(&drm->client.device); + struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; + struct drm_device *dev = drm->dev; + int typei, ret; + + ret = nouveau_ttm_init_host(drm, 0); + if (ret) + return ret; + + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && + drm->client.device.info.chipset != 0x50) { + ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND); + if (ret) + return ret; + } if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { -- cgit From 182dc9c7f217146d69d9c0b75c150c0314b9b170 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Dec 2017 16:33:36 +1100 Subject: powerpc/kernel: Print actual address of regs when oopsing When we oops or otherwise call show_regs() we print the address of the regs structure. Being able to see the address is fairly useful, firstly to verify that the regs pointer is not completely bogus, and secondly it allows you to dump the regs and surrounding memory with a debugger if you have one. In the normal case the regs will be located somewhere on the stack, so printing their location discloses no further information than printing the stack pointer does already. So switch to %px and print the actual address, not the hashed value. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); -- cgit From acf568ee859f098279eadf551612f103afdacb4e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 Dec 2017 16:40:44 +1100 Subject: xfrm: Reinject transport-mode packets through tasklet This is an old bugbear of mine: https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html By crafting special packets, it is possible to cause recursion in our kernel when processing transport-mode packets at levels that are only limited by packet size. The easiest one is with DNAT, but an even worse one is where UDP encapsulation is used in which case you just have to insert an UDP encapsulation header in between each level of recursion. This patch avoids this problem by reinjecting tranport-mode packets through a tasklet. Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_input.c | 12 ++++++++++- net/ipv6/xfrm6_input.c | 10 ++++++++- net/xfrm/xfrm_input.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dc28a98ce97c..ae35991b5877 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e50b7fea57ee..bcfc00e88756 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } +static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, iph->tos, skb->dev)) goto drop; } - return dst_input(skb); + + if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2)) + goto drop; + + return 0; drop: kfree_skb(skb); return NET_RX_DROP; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index fe04e23af986..841f4a07438e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, } EXPORT_SYMBOL(xfrm6_rcv_spi); +static int xfrm6_transport_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + if (xfrm_trans_queue(skb, ip6_rcv_finish)) + __kfree_skb(skb); + return -1; +} + int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); @@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_rcv_finish); + xfrm6_transport_finish2); return -1; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index da6447389ffb..3f6f6f8c9fa5 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -8,15 +8,29 @@ * */ +#include +#include #include #include #include +#include #include #include #include #include #include +struct xfrm_trans_tasklet { + struct tasklet_struct tasklet; + struct sk_buff_head queue; +}; + +struct xfrm_trans_cb { + int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); +}; + +#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) + static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); @@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; +static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); + int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; @@ -477,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) } EXPORT_SYMBOL(xfrm_input_resume); +static void xfrm_trans_reinject(unsigned long data) +{ + struct xfrm_trans_tasklet *trans = (void *)data; + struct sk_buff_head queue; + struct sk_buff *skb; + + __skb_queue_head_init(&queue); + skb_queue_splice_init(&trans->queue, &queue); + + while ((skb = __skb_dequeue(&queue))) + XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); +} + +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct xfrm_trans_tasklet *trans; + + trans = this_cpu_ptr(&xfrm_trans_tasklet); + + if (skb_queue_len(&trans->queue) >= netdev_max_backlog) + return -ENOBUFS; + + XFRM_TRANS_SKB_CB(skb)->finish = finish; + skb_queue_tail(&trans->queue, skb); + tasklet_schedule(&trans->tasklet); + return 0; +} +EXPORT_SYMBOL(xfrm_trans_queue); + void __init xfrm_input_init(void) { int err; + int i; init_dummy_netdev(&xfrm_napi_dev); err = gro_cells_init(&gro_cells, &xfrm_napi_dev); @@ -490,4 +538,13 @@ void __init xfrm_input_init(void) sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + + for_each_possible_cpu(i) { + struct xfrm_trans_tasklet *trans; + + trans = &per_cpu(xfrm_trans_tasklet, i); + __skb_queue_head_init(&trans->queue); + tasklet_init(&trans->tasklet, xfrm_trans_reinject, + (unsigned long)trans); + } } -- cgit From 92411f6d7f1afcc95e54295d40e96a75385212ec Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Dec 2017 16:58:50 +0100 Subject: drm/sun4i: Fix error path handling The commit 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence") moved a bunch of logic around, but forgot to update the gotos after the introduction of the err_free_dotclock label. It means that if we fail later that the one introduced in that commit, we'll just to the old label which isn't free the clock we created. This will result in a breakage as soon as someone tries to do something with that clock, since its resources will have been long reclaimed. Cc: Fixes: 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence") Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/f83c1cebc731f0b4251f5ddd7b38c718cd79bb0b.1512662253.git-series.maxime.ripard@free-electrons.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index e122f5b2a395..f4284b51bdca 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, if (IS_ERR(tcon->crtc)) { dev_err(dev, "Couldn't create our CRTC\n"); ret = PTR_ERR(tcon->crtc); - goto err_free_clocks; + goto err_free_dotclock; } ret = sun4i_rgb_init(drm, tcon); if (ret < 0) - goto err_free_clocks; + goto err_free_dotclock; if (tcon->quirks->needs_de_be_mux) { /* -- cgit From d070f7c703ef26e3db613f24206823f916272fc6 Mon Sep 17 00:00:00 2001 From: Abhijeet Kumar Date: Tue, 12 Dec 2017 00:40:25 +0530 Subject: ASoC: nau8825: fix issue that pop noise when start capture In skylake platform, we hear a loud pop noise(0 dB) at start of audio capture power up sequence. This patch removes the pop noise from the recording by adding a delay before enabling ADC. Signed-off-by: Abhijeet Kumar Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 714ce17da717..e853a6dfd33b 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: + msleep(125); regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); break; -- cgit From 20220945b1a8e77c789dd4bb9aa1471b6e8695cc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 15 Dec 2017 20:07:23 -0800 Subject: ASoC: rt5514-spi: only enable wakeup when fully initialized If an rt5514-spi device is probed but the platform hasn't linked it in, we might never fully request the SPI IRQ, nor configure the rt5514 DSP, but we still might try to enable the SPI IRQ (enable_irq_wake()). This is bad, and among other things, can cause the interrupt to trigger every time we try to suspend the system (e.g., because the interrupt trigger setting was never set properly). Instead of setting our wakeup capabilities in the SPI driver probe routine, let's wait until we've actually requested the IRQ. Fixes issues seen on the "kevin" Chromebook (Samsung Chromebook Plus). Fixes: 58f1c07d23cd ("ASoC: rt5514: Voice wakeup support.") Signed-off-by: Brian Norris Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514-spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index ca6a90d8fc39..64bf26cec20d 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform) dev_err(&rt5514_spi->dev, "%s Failed to reguest IRQ: %d\n", __func__, ret); + else + device_init_wakeup(rt5514_dsp->dev, true); } return 0; @@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi) return ret; } - device_init_wakeup(&spi->dev, true); - return 0; } -- cgit From e0795606ad565cc2da0b926a00c7e6b8187a6d71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 18:28:58 +0000 Subject: drm/i915/lpe: Remove double-encapsulation of info string Just printk the string, or at least do not double up on the newlines! Fixes: eef57324d926 ("drm/i915: setup bridge for HDMI LPE audio driver") Signed-off-by: Chris Wilson Cc: Pierre-Louis Bossart Cc: Jerome Anand Cc: Jani Nikula Cc: Takashi Iwai Reviewed-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20171213182858.2159-1-chris@chris-wilson.co.uk (cherry picked from commit 99cd05c43baac8ef56c20eb1776a15b02c81ccc3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lpe_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } -- cgit From a4ffdc2b6726958c07d535318400124e3a3bc19b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Dec 2017 14:43:10 -0800 Subject: drm/i915: Protect DDI port to DPLL map from theoretical race. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we have multiple modesets for different connectors happening in parallel we could have a race on the RMW on these shared registers. This possibility was initially raised by Paulo when reviewing commit '555e38d27317 ("drm/i915/cnl: DDI - PLL mapping")' but the original possibility comes from commit '5416d871136d ("drm/i915/skl: Set the eDP link rate on DPLL0")'. Or maybe later when atomic commits entered into picture. Apparently the discussion around this topic showed that the right solution would be on serializing the atomic commits in a way that we don't have the possibility of races here since if that parallel modeset happenings apparently many other things will be on fire. Code is there since SKL and there was no report of issue, but since we never looked back to that serialization possibility, and also we don't have an igt case for that it is better to at least protect this corner. Suggested-by: Paulo Zanoni Fixes: 555e38d27317 ("drm/i915/cnl: DDI - PLL mapping") Fixes: 5416d871136d ("drm/i915/skl: Set the eDP link rate on DPLL0") Cc: Paulo Zanoni Cc: Ville Syrjälä Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20171215224310.19103-1-rodrigo.vivi@intel.com (cherry picked from commit 8edcda1266f93816fde77c9754f388ae0ae343fc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ddi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e0843bb99169..58a3755544b2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); } static void intel_ddi_clk_disable(struct intel_encoder *encoder) -- cgit From 2cc42bac1c795f75fcc062b95c6ca7ac1b84d5d8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Dec 2017 09:37:45 -0700 Subject: x86-64/Xen: eliminate W+X mappings A few thousand such pages are usually left around due to the re-use of L1 tables having been provided by the hypervisor (Dom0) or tool stack (DomU). Set NX in the direct map variant, which needs to be done in L2 due to the dual use of the re-used L1s. For x86_configure_nx() to actually do what it is supposed to do, call get_cpu_cap() first. This was broken by commit 4763ed4d45 ("x86, mm: Clean up and simplify NX enablement") when switching away from the direct EFER read. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/mmu_pv.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 69b9deff7e5c..86f26ea99324 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -87,6 +87,8 @@ #include "multicalls.h" #include "pmu.h" +#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ + void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); @@ -1249,6 +1251,7 @@ asmlinkage __visible void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); x86_configure_nx(); /* Get mfn list */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9d9cc3870722..7118f776cd49 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1916,6 +1916,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); + /* + * Zap execute permission from the ident map. Due to the sharing of + * L1 entries we need to do this in the L2. + */ + if (__supported_pte_mask & _PAGE_NX) { + for (i = 0; i < PTRS_PER_PMD; ++i) { + if (pmd_none(level2_ident_pgt[i])) + continue; + level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); + } + } + /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) -- cgit From 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 18 Dec 2017 11:57:51 +0800 Subject: clk: sunxi: sun9i-mmc: Implement reset callback for reset controls Our MMC host driver now issues a reset, instead of just deasserting the reset control, since commit c34eda69ad4c ("mmc: sunxi: Reset the device at probe time"). The sun9i-mmc clock driver does not support this, and will fail, which results in MMC not probing. This patch implements the reset callback by asserting the reset control, then deasserting it after a small delay. Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets") Cc: # 4.14.x Signed-off-by: Chen-Yu Tsai Acked-by: Philipp Zabel Acked-by: Maxime Ripard Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/20171218035751.20661-1-wens@csie.org --- drivers/clk/sunxi/clk-sun9i-mmc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index a1a634253d6f..f00d8758ba24 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) -- cgit From 4ef928929987c19fff4d3c1650f139560ba1cc13 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 20 Dec 2017 08:38:46 +1000 Subject: drm/nouveau: fix obvious memory leak fdo#104340. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_vmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 9e2628dd8e4d..f5371d96b003 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma) nvif_vmm_put(&vma->vmm->vmm, &tmp); } list_del(&vma->head); - *pvma = NULL; kfree(*pvma); + *pvma = NULL; } } -- cgit From 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 19 Dec 2017 15:07:10 -0800 Subject: libnvdimm, pfn: fix start_pad handling for aligned namespaces The alignment checks at pfn driver startup fail to properly account for the 'start_pad' in the case where the namespace is misaligned relative to its internal alignment. This is typically triggered in 1G aligned namespace, but could theoretically trigger with small namespace alignments. When this triggers the kernel reports messages of the form: dax2.1: bad offset: 0x3c000000 dax disabled align: 0x40000000 Cc: Fixes: 1ee6667cd8d1 ("libnvdimm, pfn, dax: fix initialization vs autodetect...") Reported-by: Jane Chu Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 65cc171c721d..db2fc7c02e01 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", -- cgit From 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 4 Dec 2017 14:07:43 -0800 Subject: libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment The following namespace configuration attempt: # ndctl create-namespace -e namespace0.0 -m devdax -a 1G -f libndctl: ndctl_dax_enable: dax0.1: failed to enable Error: namespace0.0: failed to enable failed to reconfigure namespace: No such device or address ...fails when the backing memory range is not physically aligned to 1G: # cat /proc/iomem | grep Persistent 210000000-30fffffff : Persistent Memory (legacy) In the above example the 4G persistent memory range starts and ends on a 256MB boundary. We handle this case correctly when needing to handle cases that violate section alignment (128MB) collisions against "System RAM", and we simply need to extend that padding/truncation for the 1GB alignment use case. Cc: Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute...") Reported-and-tested-by: Jane Chu Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index db2fc7c02e01..2adada1a5855 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -583,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, return altmap; } +static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) +{ + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), + ALIGN_DOWN(phys, nd_pfn->align)); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; @@ -638,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) start = nsio->res.start; size = PHYS_SECTION_ALIGN_UP(start + size) - start; if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(start + resource_size(&nsio->res), + nd_pfn->align)) { size = resource_size(&nsio->res); - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + end_trunc = start + size - phys_pmem_align_down(nd_pfn, + start + size); } if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); /* -- cgit From d5aa24825da5711f8cb829f873160ddf1a29b19c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 06:11:59 +0000 Subject: ASoC: rsnd: fixup ADG register mask BRGCKR should use 0x80770000, instead of 0x80FF0000. R-Car Gen2 xxx_TIMSEL should use 0x0F1F, R-Car Gen3 xxx_TIMSEL should use 0x1F1F. Here, Gen3 doesn't support AVD, thus, both case can use 0x0F1F. Signed-off-by: Kuninori Morimoto Reviewed-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown --- sound/soc/sh/rcar/adg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 8ddb08714faa..4672688cac32 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod, NULL, &val, NULL); val = val << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); @@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod, in = in << shift; out = out << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; switch (id / 2) { case 0: @@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate) ckr = 0x80000000; } - rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); + rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr); rsnd_mod_write(adg_mod, BRRA, adg->rbga); rsnd_mod_write(adg_mod, BRRB, adg->rbgb); -- cgit From 2c08cd7c20968ddf71feeac2265b4741d2b3fdde Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 20 Dec 2017 11:52:47 +0100 Subject: drm/sun4i: hdmi: Move the mode_valid callback to the encoder When attached to the connector, the mode_valid callback will only filter the modes provided by the connector itself as part of its probe. However, it will not be doing it when the mode is provided by the userspace, which still might result in a broken configuration. In order to enforce these constraints, move our mode_valid callback to the encoder which doesn't have this behaviour. Acked-by: Daniel Vetter Signed-off-by: Hans Verkuil [maxime: Wrote the commit log in order to update the patch from the merged v3 to the v4 that was correct.] Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/0fa230a8-d01d-561a-f74f-6b4fd421255b@xs4all.nl --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index c12f9bd12904..500b6fb3e028 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder, writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); } +static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); + unsigned long rate = mode->clock * 1000; + unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (rounded_rate > 0 && + max_t(unsigned long, rounded_rate, rate) - + min_t(unsigned long, rounded_rate, rate) < diff) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { .atomic_check = sun4i_hdmi_atomic_check, .disable = sun4i_hdmi_disable, .enable = sun4i_hdmi_enable, .mode_set = sun4i_hdmi_mode_set, + .mode_valid = sun4i_hdmi_mode_valid, }; static const struct drm_encoder_funcs sun4i_hdmi_funcs = { @@ -208,27 +228,8 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector) return ret; } -static int sun4i_hdmi_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); - long rate = mode->clock * 1000; - long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ - long rounded_rate; - - /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ - if (rate > 165000000) - return MODE_CLOCK_HIGH; - rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); - if (max(rounded_rate, rate) - min(rounded_rate, rate) < diff && - rounded_rate > 0) - return MODE_OK; - return MODE_NOCLOCK; -} - static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = { .get_modes = sun4i_hdmi_get_modes, - .mode_valid = sun4i_hdmi_mode_valid, }; static enum drm_connector_status -- cgit From ce0769e0ea4b3e192466243a1a9fd39acf214f1e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 20 Dec 2017 10:35:43 +0100 Subject: drm/plane: Make framebuffer refcounting the responsibility of setplane_internal callers lock_all_ctx in setplane_internal may return -EINTR, and __setplane_internal could return -EDEADLK. Making more special cases for fb would make the code even harder to read, so the easiest solution is not taking over the fb refcount, and making callers responsible for dropping the ref. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102707 Fixes: 13736ba3b38b ("drm/legacy: Convert setplane ioctl locking to interruptible.") Testcase: kms_atomic_interruptible Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171220093545.613-2-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_plane.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 37a93cdffb4a..2c90519576a3 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format) } /* - * setplane_internal - setplane handler for internal callers + * __setplane_internal - setplane handler for internal callers * - * Note that we assume an extra reference has already been taken on fb. If the - * update fails, this reference will be dropped before return; if it succeeds, - * the previous framebuffer (if any) will be unreferenced instead. + * This function will take a reference on the new fb for the plane + * on success. * * src_{x,y,w,h} are provided in 16.16 fixed point format */ @@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane, if (!ret) { plane->crtc = crtc; plane->fb = fb; - fb = NULL; + drm_framebuffer_get(plane->fb); } else { plane->old_fb = NULL; } out: - if (fb) - drm_framebuffer_put(fb); if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_plane *plane; struct drm_crtc *crtc = NULL; struct drm_framebuffer *fb = NULL; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data, } } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ - return setplane_internal(plane, crtc, fb, - plane_req->crtc_x, plane_req->crtc_y, - plane_req->crtc_w, plane_req->crtc_h, - plane_req->src_x, plane_req->src_y, - plane_req->src_w, plane_req->src_h); + ret = setplane_internal(plane, crtc, fb, + plane_req->crtc_x, plane_req->crtc_y, + plane_req->crtc_w, plane_req->crtc_h, + plane_req->src_x, plane_req->src_y, + plane_req->src_w, plane_req->src_h); + + if (fb) + drm_framebuffer_put(fb); + + return ret; } static int drm_mode_cursor_universal(struct drm_crtc *crtc, @@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ ret = __setplane_internal(crtc->cursor, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - 0, 0, src_w, src_h, ctx); + crtc_x, crtc_y, crtc_w, crtc_h, + 0, 0, src_w, src_h, ctx); + + if (fb) + drm_framebuffer_put(fb); /* Update successful; save new cursor position, if necessary */ if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { -- cgit From b3cf8528bb21febb650a7ecbf080d0647be40b9f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 12 Dec 2017 15:08:21 -0500 Subject: xen/balloon: Mark unallocated host memory as UNUSABLE Commit f5775e0b6116 ("x86/xen: discard RAM regions above the maximum reservation") left host memory not assigned to dom0 as available for memory hotplug. Unfortunately this also meant that those regions could be used by others. Specifically, commit fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") may try to map those addresses as MMIO. To prevent this mark unallocated host memory as E820_TYPE_UNUSABLE (thus effectively reverting f5775e0b6116) and keep track of that region as a hostmem resource that can be used for the hotplug. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/enlighten.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/setup.c | 6 ++-- drivers/xen/balloon.c | 65 ++++++++++++++++++++++++++++++++------ include/xen/balloon.h | 5 +++ 4 files changed, 144 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d669e9d89001..c9081c6671f0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,8 +1,12 @@ +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include +#endif #include #include #include #include +#include #include #include @@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +void __init arch_xen_balloon_init(struct resource *hostmem_resource) +{ + struct xen_memory_map memmap; + int rc; + unsigned int i, last_guest_ram; + phys_addr_t max_addr = PFN_PHYS(max_pfn); + struct e820_table *xen_e820_table; + const struct e820_entry *entry; + struct resource *res; + + if (!xen_initial_domain()) + return; + + xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); + if (!xen_e820_table) + return; + + memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); + set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); + rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); + if (rc) { + pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); + goto out; + } + + last_guest_ram = 0; + for (i = 0; i < memmap.nr_entries; i++) { + if (xen_e820_table->entries[i].addr >= max_addr) + break; + if (xen_e820_table->entries[i].type == E820_TYPE_RAM) + last_guest_ram = i; + } + + entry = &xen_e820_table->entries[last_guest_ram]; + if (max_addr >= entry->addr + entry->size) + goto out; /* No unallocated host RAM. */ + + hostmem_resource->start = max_addr; + hostmem_resource->end = entry->addr + entry->size; + + /* + * Mark non-RAM regions between the end of dom0 RAM and end of host RAM + * as unavailable. The rest of that region can be used for hotplug-based + * ballooning. + */ + for (; i < memmap.nr_entries; i++) { + entry = &xen_e820_table->entries[i]; + + if (entry->type == E820_TYPE_RAM) + continue; + + if (entry->addr >= hostmem_resource->end) + break; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto out; + + res->name = "Unavailable host RAM"; + res->start = entry->addr; + res->end = (entry->addr + entry->size < hostmem_resource->end) ? + entry->addr + entry->size : hostmem_resource->end; + rc = insert_resource(hostmem_resource, res); + if (rc) { + pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", + __func__, res->start, res->end, rc); + kfree(res); + goto out; + } + } + + out: + kfree(xen_e820_table); +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ac55c02f98e9..e9011e1ee3de 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -807,7 +807,6 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { - bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,11 +822,10 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - discard = true; + type = E820_TYPE_UNUSABLE; } - if (!discard) - xen_align_and_add_e820_region(addr, chunk_size, type); + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f77e499afddd..065f0b607373 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } +/* + * Host memory not allocated to dom0. We can use this range for hotplug-based + * ballooning. + * + * It's a type-less resource. Setting IORESOURCE_MEM will make resource + * management algorithms (arch_remove_reservations()) look into guest e820, + * which we don't want. + */ +static struct resource hostmem_resource = { + .name = "Host RAM", +}; + +void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) +{} + static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res; - int ret; + struct resource *res, *res_hostmem; + int ret = -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; + res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); + if (res_hostmem) { + /* Try to grab a range from hostmem */ + res_hostmem->name = "Host memory"; + ret = allocate_resource(&hostmem_resource, res_hostmem, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + } + + if (!ret) { + /* + * Insert this resource into iomem. Because hostmem_resource + * tracks portion of guest e820 marked as UNUSABLE noone else + * should try to use it. + */ + res->start = res_hostmem->start; + res->end = res_hostmem->end; + ret = insert_resource(&iomem_resource, res); + if (ret < 0) { + pr_err("Can't insert iomem_resource [%llx - %llx]\n", + res->start, res->end); + release_memory_resource(res_hostmem); + res_hostmem = NULL; + res->start = res->end = 0; + } + } + + if (ret) { + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } } #ifdef CONFIG_SPARSEMEM @@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); + release_memory_resource(res_hostmem); return NULL; } } @@ -765,6 +810,8 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); + + arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 8906361bb50c..d0adfc78dcbd 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -43,3 +43,8 @@ static inline void xen_balloon_init(void) { } #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +struct resource; +void arch_xen_balloon_init(struct resource *hostmem_resource); +#endif -- cgit From 91aae6be4139b9e3902656d819e6af66e051bd7a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:42:22 -0800 Subject: xfs: track cowblocks separately in i_flags The EOFBLOCKS/COWBLOCKS tags are totally separate things, so track them with separate i_flags. Right now we're abusing IEOFBLOCKS for both, which is totally bogus because we won't tag the inode with COWBLOCKS if IEOFBLOCKS was set by a previous tagging of the inode with EOFBLOCKS. Found by wiring up clonerange to fsstress in xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_icache.c | 33 ++++++++++++++++++++++++--------- fs/xfs/xfs_inode.h | 1 + 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..58d2d4253c8e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks( return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); } +static inline unsigned long +xfs_iflag_for_tag( + int tag) +{ + switch (tag) { + case XFS_ICI_EOFBLOCKS_TAG: + return XFS_IEOFBLOCKS; + case XFS_ICI_COWBLOCKS_TAG: + return XFS_ICOWBLOCKS; + default: + ASSERT(0); + return 0; + } +} + static void -__xfs_inode_set_eofblocks_tag( +__xfs_inode_set_blocks_tag( xfs_inode_t *ip, void (*execute)(struct xfs_mount *mp), void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag( * Don't bother locking the AG and looking up in the radix trees * if we already know that we have the tag set. */ - if (ip->i_flags & XFS_IEOFBLOCKS) + if (ip->i_flags & xfs_iflag_for_tag(tag)) return; spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_IEOFBLOCKS; + ip->i_flags |= xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_eofblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, trace_xfs_perag_set_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } static void -__xfs_inode_clear_eofblocks_tag( +__xfs_inode_clear_blocks_tag( xfs_inode_t *ip, void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), @@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag( struct xfs_perag *pag; spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_IEOFBLOCKS; + ip->i_flags &= ~xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_eofblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } @@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_cowblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_cowblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b2136af9289f..d383e392ec9d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * log recovery to replay a bmap operation on the inode. */ #define XFS_IRECOVERY (1 << 11) +#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during -- cgit From d2a48e52541cdf474ef35d51e8d73ded5be33122 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 20 Dec 2017 22:54:24 -0800 Subject: drm: move lease init after validation in drm_lease_create Patch bd36d3bab2e3d08f80766c86487090dbceed4651 fixed a deadlock in the failure path of drm_lease_create. This made the partially initialized lease object visible for a short window of time. To avoid having the lessee state appear transiently, I've rearranged the code so that the lessor fields are not filled in until the parameters are all validated and the function will succeed. Signed-off-by: Keith Packard Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171221065424.1304-1-keithp@keithp.com --- drivers/gpu/drm/drm_lease.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 59849f02e2ad..1402c0e71b03 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr mutex_lock(&dev->mode_config.idr_mutex); - /* Insert the new lessee into the tree */ - id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); - if (id < 0) { - error = id; - goto out_lessee; - } - - lessee->lessee_id = id; - lessee->lessor = drm_master_get(lessor); - list_add_tail(&lessee->lessee_list, &lessor->lessees); - idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.crtc_idr, object)) @@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr } } + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + /* Move the leases over */ lessee->leases = *leases; DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); -- cgit From 363e59baa4f76d3f97c0133ff7014cba3d90a7c3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:42:59 -0800 Subject: xfs: don't be so eager to clear the cowblocks tag on truncate Currently, xfs_itruncate_extents clears the cowblocks tag if i_cnextents is zero. This is wrong, since i_cnextents only tracks real extents in the CoW fork, which means that we could have some delayed CoW reservations still in there that will now never get cleaned. Fix a further bug where we /don't/ clear the reflink iflag if there are any attribute blocks -- really, it's only safe to clear the reflink flag if there are no data fork extents and no cow fork extents. Found by adding clonerange to fsstress in xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b41952a4ddd8..6f95bdb408ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1487,6 +1487,24 @@ xfs_link( return error; } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( + struct xfs_inode *ip) +{ + struct xfs_ifork *dfork; + struct xfs_ifork *cfork; + + if (!xfs_is_reflink_inode(ip)) + return; + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (cfork->if_bytes == 0) + xfs_inode_clear_cowblocks_tag(ip); +} + /* * Free up the underlying blocks past new_size. The new size must be smaller * than the current size. This routine can be used both for the attribute and @@ -1583,15 +1601,7 @@ xfs_itruncate_extents( if (error) goto out; - /* - * Clear the reflink flag if there are no data fork blocks and - * there are no extents staged in the cow fork. - */ - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { - if (ip->i_d.di_nblocks == 0) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - xfs_inode_clear_cowblocks_tag(ip); - } + xfs_itruncate_clear_reflink_flags(ip); /* * Always re-log the inode so that our permanent transaction can keep -- cgit From 10ddf64e420f7f6c1a871bfb4ff2de08faef8235 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:46:05 -0800 Subject: xfs: remove leftover CoW reservations when remounting ro When we're remounting the filesystem readonly, remove all CoW preallocations prior to going ro. If the fs goes down after the ro remount, we never clean up the staging extents, which means xfs_check will trip over them on a subsequent run. Practically speaking, the next mount will clean them up too, so this is unlikely to be seen. Since we shut down the cowblocks cleaner on remount-ro, we also have to make sure we start it back up if/when we remount-rw. Found by adding clonerange to fsstress and running xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_icache.h | 1 + fs/xfs/xfs_super.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 58d2d4253c8e..3861d61fb265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -870,7 +870,7 @@ xfs_eofblocks_worker( * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). * (We'll just piggyback on the post-EOF prealloc space workqueue.) */ -STATIC void +void xfs_queue_cowblocks( struct xfs_mount *mp) { diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index bff4d85e5498..d4a77588eca1 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); void xfs_cowblocks_worker(struct work_struct *); +void xfs_queue_cowblocks(struct xfs_mount *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..2db6a40a96bd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1360,6 +1360,7 @@ xfs_fs_remount( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } + xfs_queue_cowblocks(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); @@ -1369,6 +1370,14 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* Get rid of any leftover CoW reservations... */ + cancel_delayed_work_sync(&mp->m_cowblocks_work); + error = xfs_icache_free_cowblocks(mp, NULL); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { -- cgit From 86d692bfad1b0097fa866f5fcfa5f5adf4cd82e8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:46:06 -0800 Subject: xfs: set cowblocks tag for direct cow writes too If a user performs a direct CoW write, we end up loading the CoW fork with preallocated extents. Therefore, we must set the cowblocks tag so that they can be cleared out if we run low on space. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e49e6db415f7..47aea2e82c26 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -454,6 +454,8 @@ retry: if (error) goto out_bmap_cancel; + xfs_inode_set_cowblocks_tag(ip); + /* Finish up. */ error = xfs_defer_finish(&tp, &dfops); if (error) -- cgit From 0525e952dcceb9fc947c6d395de7f72220c7d081 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:03 -0800 Subject: xfs: queue deferred rmap ops for cow staging extent alloc/free in the right order Under the deferred rmap operation scheme, there's a certain order in which the rmap deferred ops have to be queued to maintain integrity during log replay. For alloc/map operations that order is cui -> rui; for free/unmap operations that order is cui -> rui -> efi. However, the initial refcount code got the ordering wrong in the free side of things because it queued refcount free op and an EFI and the refcount free op queued a rmap free op, resulting in the order cui -> efi -> rui. If we fail before the efd finishes, the efi recovery will try to do a wildcard rmap removal and the subsequent rui will fail to find the rmap and blow up. This didn't ever happen due to other screws up in handling unknown owner rmap removals, but those other screw ups broke recovery in other ways, so fix the ordering to follow the intended rules. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_refcount.c | 52 ++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..c40d26763075 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } -- cgit From 33df3a9cf925183a6a169bc3eff2bd0febd1298a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:27 -0800 Subject: xfs: always honor OWN_UNKNOWN rmap removal requests Calling xfs_rmap_free with an unknown owner is supposed to remove any rmaps covering that range regardless of owner. This is used by the EFI recovery code to say "we're freeing this, it mustn't be owned by anything anymore", but for whatever reason xfs_free_ag_extent filters them out. Therefore, remove the filter and make xfs_rmap_unmap actually treat it as a wildcard owner -- free anything that's already there, and if there's no owner at all then that's fine too. There are two existing callers of bmap_add_free that take care the rmap deferred ops themselves and use OWN_UNKNOWN to skip the EFI-based rmap cleanup; convert these to use OWN_NULL (via helpers), and now we really require that an RUI (if any) gets added to the defer ops before any EFI. Lastly, now that xfs_free_extent filters out OWN_NULL rmap free requests, growfs will have to consult directly with the rmap to ensure that there aren't any rmaps in the grown region. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_alloc.c | 4 ++-- fs/xfs/libxfs/xfs_rmap.c | 25 +++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rmap.h | 16 +++++++++++++++- fs/xfs/xfs_extfree_item.c | 2 +- fs/xfs/xfs_fsops.c | 5 +++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..83ed7715f856 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -702,7 +702,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1682,7 +1682,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..7465cfb39276 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -444,6 +444,30 @@ xfs_rmap_unmap( goto out_done; } + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } + /* Make sure the unwritten flag matches. */ XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); @@ -664,6 +688,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..0fcd5b1ba729 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 44f8c5451210..64da90655e95 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -538,7 +538,7 @@ xfs_efi_recover( return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); - xfs_rmap_skip_owner_update(&oinfo); + xfs_rmap_any_owner_update(&oinfo); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &efip->efi_format.efi_extents[i]; error = xfs_trans_free_extent(tp, efdp, extp->ext_start, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..60a2e128cb6a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -571,6 +571,11 @@ xfs_growfs_data_private( * this doesn't actually exist in the rmap btree. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); + error = xfs_rmap_free(tp, bp, agno, + be32_to_cpu(agf->agf_length) - new, + new, &oinfo); + if (error) + goto error0; error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, be32_to_cpu(agf->agf_length) - new), -- cgit From 68c58e9b9a88c1a9d0c2eaf6c7acefb00f5fbbfb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:55 -0800 Subject: xfs: only skip rmap owner checks for unknown-owner rmap removal For rmap removal, refactor the rmap owner checks into a separate function, then skip the checks if we are performing an unknown-owner removal. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rmap.c | 76 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 7465cfb39276..50db920ceeeb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range( return error; } +/* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + /* * Find the extent in the rmap btree and remove it. * @@ -468,33 +513,16 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); - /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); - - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ -- cgit From 8bc0d7ac934b6f2d0dc8f38a3104d281c9db1e98 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 19 Dec 2017 22:24:10 -0200 Subject: i915: Reject CCS modifiers for pipe C on Geminilake Current code advertises (on the modifiers blob property) support for CCS modifier for pipe C on GLK, only to reject it later when validating the request before the atomic commit. This fixes the tests igt@kms_ccs@pipe-c-*, which should skip on GLK for pipe C (see bug 104096). A relevant discussion is archived at: https://lists.freedesktop.org/archives/intel-gfx/2017-December/150646.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104096 Signed-off-by: Gabriel Krisman Bertazi Cc: Ben Widawsky Reviewed-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171220002410.5604-1-krisman@collabora.co.uk (cherry picked from commit f0cbd8bd877f3d8c5b80a6b1add9ca9010d7f9d8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ff9397030092..30cf273d57aa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13194,7 +13194,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; -- cgit From 13b7954c0b8dd2d6382b4ddb5053f09e389d5c6e Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 14 Dec 2017 17:26:13 -0700 Subject: libnvdimm, btt: add a couple of missing kernel-doc lines Recent updates to btt.h neglected to add corresponding kernel-doc lines for new structure members. Add them. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 578c2057524d..884fbbbdd18a 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -125,6 +125,7 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @err_lock: Mutex for synchronizing error clearing. * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -176,6 +177,7 @@ struct arena_info { * @init_lock: Mutex used for the BTT initialization * @init_state: Flag describing the initialization state for the BTT * @num_arenas: Number of arenas in the BTT instance + * @phys_bb: Pointer to the namespace's badblocks structure */ struct btt { struct gendisk *btt_disk; -- cgit From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 18 Dec 2017 09:28:39 -0700 Subject: libnvdimm, btt: Fix an incompatibility in the log layout Due to a spec misinterpretation, the Linux implementation of the BTT log area had different padding scheme from other implementations, such as UEFI and NVML. This fixes the padding scheme, and defaults to it for new BTT layouts. We attempt to detect the padding scheme in use when probing for an existing BTT. If we detect the older/incompatible scheme, we continue using it. Reported-by: Juston Li Cc: Dan Williams Cc: Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 201 ++++++++++++++++++++++++++++++++++++++++++--------- drivers/nvdimm/btt.h | 45 +++++++++++- 2 files changed, 211 insertions(+), 35 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e949e3302af4..c586bcdb5190 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE, 0); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE, 0); } static struct dentry *debugfs_root; @@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_err(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent, unsigned long flags) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half, flags); if (ret) @@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena) { size_t logsize = arena->info2off - arena->logoff; size_t chunk_size = SZ_4K, offset = 0; - struct log_entry log; + struct log_entry ent; void *zerobuf; int ret; u32 i; @@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena) } for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log, 0); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent, 0); if (ret) goto free; } @@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 884fbbbdd18a..db3cb6d4d0d4 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -27,6 +27,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -50,12 +51,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -126,6 +167,7 @@ struct aligned_lock { * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. * @err_lock: Mutex for synchronizing error clearing. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -158,6 +200,7 @@ struct arena_info { /* Arena flags */ u32 flags; struct mutex err_lock; + int log_index[2]; }; /** -- cgit From f55688c45442bc863f40ad678c638785b26cdce6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 Dec 2017 13:10:00 -0800 Subject: iw_cxgb4: Only validate the MSN for successful completions If the RECV CQE is in error, ignore the MSN check. This was causing recvs that were flushed into the sw cq to be completed with the wrong status (BAD_MSN instead of FLUSHED). Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b7bfc536e00f..7ed87622e461 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, ret = -EAGAIN; goto skip_cqe; } - if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { t4_set_wq_in_error(wq); - hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); - goto proc_cqe; + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; } -- cgit From 96a236ed286776554fbd227c6d2876fd3b5dc65d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Dec 2017 10:29:25 -0800 Subject: iw_cxgb4: reflect the original WR opcode in drain cqes The flush/drain logic was not retaining the original wr opcode in its completion. This can cause problems if the application uses the completion opcode to make decisions. Use bit 10 of the CQE header word to indicate the CQE is a special drain completion, and save the original WR opcode in the cqe header opcode field. Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 7 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 -- drivers/infiniband/hw/cxgb4/qp.c | 46 +++++++++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/t4.h | 6 +++++ 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 7ed87622e461..6f2b26126c64 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -395,7 +395,7 @@ next_cqe: static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) { - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(cqe)) { WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); return 0; } @@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, /* * Special cqe for drain WR completions... */ - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(hw_cqe)) { *cookie = CQE_DRAIN_COOKIE(hw_cqe); *cqe = *hw_cqe; goto skip_cqe; @@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) c4iw_invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; - case C4IW_DRAIN_OPCODE: - wc->opcode = IB_WC_SEND; - break; default: pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", CQE_OPCODE(&cqe), CQE_QPID(&cqe)); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 470f97a79ebb..65dd3726ca02 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state) return IB_QPS_ERR; } -#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN - static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 38bddd02a943..21495f917bcc 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; } -static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int ib_to_fw_opcode(int ib_opcode) +{ + int opcode; + + switch (ib_opcode) { + case IB_WR_SEND_WITH_INV: + opcode = FW_RI_SEND_WITH_INV; + break; + case IB_WR_SEND: + opcode = FW_RI_SEND; + break; + case IB_WR_RDMA_WRITE: + opcode = FW_RI_RDMA_WRITE; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + opcode = FW_RI_READ_REQ; + break; + case IB_WR_REG_MR: + opcode = FW_RI_FAST_REGISTER; + break; + case IB_WR_LOCAL_INV: + opcode = FW_RI_LOCAL_INV; + break; + default: + opcode = -EINVAL; + } + return opcode; +} + +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; unsigned long flag; struct t4_cq *cq; + int opcode; schp = to_c4iw_cq(qhp->ibqp.send_cq); cq = &schp->cq; + opcode = ib_to_fw_opcode(wr->opcode); + if (opcode < 0) + return opcode; + cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(opcode) | CQE_TYPE_V(1) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&schp->lock, flag); @@ -819,6 +855,7 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } + return 0; } static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -833,9 +870,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(FW_RI_SEND) | CQE_TYPE_V(0) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&rchp->lock, flag); @@ -875,7 +913,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wr(qhp, wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e9ea94268d51..79e8ee12c391 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -197,6 +197,11 @@ struct t4_cqe { #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) #define CQE_SWCQE_V(x) ((x)<> CQE_DRAIN_S)) & CQE_DRAIN_M) +#define CQE_DRAIN_V(x) ((x)<> CQE_STATUS_S)) & CQE_STATUS_M) @@ -213,6 +218,7 @@ struct t4_cqe { #define CQE_OPCODE_V(x) ((x)<header))) +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header))) #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) #define SQ_TYPE(x) (CQE_TYPE((x))) -- cgit From d14587334580bc94d3ee11e8320e0c157f91ae8f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Dec 2017 14:02:10 -0800 Subject: iw_cxgb4: when flushing, complete all wrs in a chain If a wr chain was posted and needed to be flushed, only the first wr in the chain was completed with FLUSHED status. The rest were never completed. This caused isert to hang on shutdown due to the missing completions which left iscsi IO commands referenced, stalling the shutdown. Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 21495f917bcc..d5c92fc520d6 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -858,6 +858,22 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; } +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int ret = 0; + + while (wr) { + ret = complete_sq_drain_wr(qhp, wr); + if (ret) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + return ret; +} + static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; @@ -890,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + while (wr) { + complete_rq_drain_wr(qhp, wr); + wr = wr->next; + } +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -913,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - err = complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wrs(qhp, wr, bad_wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); @@ -1061,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_rq_drain_wr(qhp, wr); + complete_rq_drain_wrs(qhp, wr); return err; } num_wrs = t4_rq_avail(&qhp->wq); -- cgit From 17748056ce123ee37fb7382bc698fc721e3c4a09 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:49:03 -0800 Subject: RDMA/vmw_pvrdma: Call ib_umem_release on destroy QP path The QP cleanup did not previously call ib_umem_release, resulting in a user-triggerable kernel resource leak. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 10420a18d02f..dceebc623d96 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -431,6 +431,13 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&qp->refcnt); wait_event(qp->wait, !atomic_read(&qp->refcnt)); + if (!qp->is_kernel) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } + pvrdma_page_dir_cleanup(dev, &qp->pdir); kfree(qp); -- cgit From 30a366a9dabd05a0d218288b7d732649886b6a53 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:50:01 -0800 Subject: RDMA/vmw_pvrdma: Use refcount_dec_and_test to avoid warning refcount_dec generates a warning when the operation causes the refcount to hit zero. Avoid this by using refcount_dec_and_test. Fixes: 8b10ba783c9d ("RDMA/vmw_pvrdma: Add shared receive queue support") Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Bryan Tan Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 826ccb864596..a2b1a3c115f2 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -236,8 +236,8 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) dev->srq_tbl[srq->srq_handle] = NULL; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); - refcount_dec(&srq->refcnt); - wait_event(srq->wait, !refcount_read(&srq->refcnt)); + if (!refcount_dec_and_test(&srq->refcnt)) + wait_event(srq->wait, !refcount_read(&srq->refcnt)); /* There is no support for kernel clients, so this is safe. */ ib_umem_release(srq->umem); -- cgit From e3524b269e451cff68b19f32b15448933a53a4f4 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:51:40 -0800 Subject: RDMA/vmw_pvrdma: Avoid use after free due to QP/CQ/SRQ destroy The use of wait queues in vmw_pvrdma for handling concurrent access to a resource leaves a race condition which can cause a use after free bug. Fix this by using the pattern from other drivers, complete() protected by dec_and_test to ensure complete() is called only once. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 6 +++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 7 ++++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 17 +++++++---------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 7 ++++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 7 ++++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 63bc2efc34eb..4f7bd3b6a315 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -94,7 +94,7 @@ struct pvrdma_cq { u32 cq_handle; bool is_kernel; atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_id_table { @@ -175,7 +175,7 @@ struct pvrdma_srq { u32 srq_handle; int npages; refcount_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_qp { @@ -197,7 +197,7 @@ struct pvrdma_qp { bool is_kernel; struct mutex mutex; /* QP state mutex. */ atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_dev { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 3562c0c30492..e529622cefad 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); atomic_set(&cq->refcnt, 1); - init_waitqueue_head(&cq->wait); + init_completion(&cq->free); spin_lock_init(&cq->cq_lock); memset(cmd, 0, sizeof(*cmd)); @@ -230,8 +230,9 @@ err_cq: static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) { - atomic_dec(&cq->refcnt); - wait_event(cq->wait, !atomic_read(&cq->refcnt)); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); + wait_for_completion(&cq->free); if (!cq->is_kernel) ib_umem_release(cq->umem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 1f4e18717a00..e92681878c93 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) ibqp->event_handler(&e, ibqp->qp_context); } if (qp) { - atomic_dec(&qp->refcnt); - if (atomic_read(&qp->refcnt) == 0) - wake_up(&qp->wait); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); } } @@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) ibcq->event_handler(&e, ibcq->cq_context); } if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt) == 0) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } } @@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) } if (srq) { if (refcount_dec_and_test(&srq->refcnt)) - wake_up(&srq->wait); + complete(&srq->free); } } @@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt)) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index dceebc623d96..4059308e1454 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, spin_lock_init(&qp->rq.lock); mutex_init(&qp->mutex); atomic_set(&qp->refcnt, 1); - init_waitqueue_head(&qp->wait); + init_completion(&qp->free); qp->state = IB_QPS_RESET; @@ -428,8 +428,9 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - atomic_dec(&qp->refcnt); - wait_event(qp->wait, !atomic_read(&qp->refcnt)); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); + wait_for_completion(&qp->free); if (!qp->is_kernel) { if (qp->rumem) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index a2b1a3c115f2..5acebb1ef631 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, spin_lock_init(&srq->lock); refcount_set(&srq->refcnt, 1); - init_waitqueue_head(&srq->wait); + init_completion(&srq->free); dev_dbg(&dev->pdev->dev, "create shared receive queue from user space\n"); @@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) dev->srq_tbl[srq->srq_handle] = NULL; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); - if (!refcount_dec_and_test(&srq->refcnt)) - wait_event(srq->wait, !refcount_read(&srq->refcnt)); + if (refcount_dec_and_test(&srq->refcnt)) + complete(&srq->free); + wait_for_completion(&srq->free); /* There is no support for kernel clients, so this is safe. */ ib_umem_release(srq->umem); -- cgit From 71a0ff65a21bf3e2c4fde208c4a635ed2bbb4e81 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 21 Dec 2017 17:38:26 +0200 Subject: IB/mlx5: Fix congestion counters in LAG mode Congestion counters are counted and queried per physical function. When working in LAG mode, CNP packets can be sent or received on both of the functions, thus congestion counters should be aggregated from the two physical functions. Fixes: e1f24a79f424 ("IB/mlx5: Support congestion related counters") Signed-off-by: Majd Dibbiny Reviewed-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 11 ------ drivers/infiniband/hw/mlx5/cmd.h | 2 - drivers/infiniband/hw/mlx5/main.c | 35 +++-------------- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 56 +++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 4 ++ 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 470995fa38d2..6f6712f87a73 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) return err; } -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size) -{ - u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; - - MLX5_SET(query_cong_statistics_in, in, opcode, - MLX5_CMD_OP_QUERY_CONG_STATISTICS); - MLX5_SET(query_cong_statistics_in, in, clear, reset); - return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); -} - int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size) { diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index af4c24596274..78ffded7cc2c 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,8 +37,6 @@ #include int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 543d0a4c8bf3..b4ef4d9b6ce5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3737,34 +3737,6 @@ free: return ret; } -static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) -{ - int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - void *out; - int ret, i; - int offset = port->cnts.num_q_counters; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen); - if (ret) - goto free; - - for (i = 0; i < port->cnts.num_cong_counters; i++) { - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - port->cnts.offsets[i + offset])); - } - -free: - kvfree(out); - return ret; -} - static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -3782,7 +3754,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, num_counters = port->cnts.num_q_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - ret = mlx5_ib_query_cong_counters(dev, port, stats); + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + port->cnts.num_q_counters, + port->cnts.num_cong_counters, + port->cnts.offsets + + port->cnts.num_q_counters); if (ret) return ret; num_counters += port->cnts.num_cong_counters; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index f26f97fe4666..582b2f18010a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) { return dev->priv.lag; @@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) /* If bonded, we do not add an IB device for PF1. */ return false; } + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_lag *ldev; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(values, 0, sizeof(*values) * num_counters); + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + if (ldev && mlx5_lag_is_bonded(ldev)) { + num_ports = MLX5_MAX_PORTS; + mdev[0] = ldev->pf[0].dev; + mdev[1] = ldev->pf[1].dev; + } else { + num_ports = 1; + mdev[0] = dev; + } + + for (i = 0; i < num_ports; ++i) { + ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); + if (ret) + goto unlock; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +unlock: + mutex_unlock(&lag_mutex); + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..8846919356ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1164,6 +1164,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -- cgit From 1f80bd6a6cc8358b81194e1f5fc16449947396ec Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 21 Dec 2017 17:38:27 +0200 Subject: IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush The locking order of vlan_rwsem (LOCK A) and then rtnl (LOCK B), contradicts other flows such as ipoib_open possibly causing a deadlock. To prevent this deadlock heavy flush is called with RTNL locked and only then tries to acquire vlan_rwsem. This deadlock is possible only when there are child interfaces. [ 140.941758] ====================================================== [ 140.946276] WARNING: possible circular locking dependency detected [ 140.950950] 4.15.0-rc1+ #9 Tainted: G O [ 140.954797] ------------------------------------------------------ [ 140.959424] kworker/u32:1/146 is trying to acquire lock: [ 140.963450] (rtnl_mutex){+.+.}, at: [] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 140.970006] but task is already holding lock: [ 140.975141] (&priv->vlan_rwsem){++++}, at: [] __ipoib_ib_dev_flush+0x51/0x4e0 [ib_ipoib] [ 140.982105] which lock already depends on the new lock. [ 140.990023] the existing dependency chain (in reverse order) is: [ 140.998650] -> #1 (&priv->vlan_rwsem){++++}: [ 141.005276] down_read+0x4d/0xb0 [ 141.009560] ipoib_open+0xad/0x120 [ib_ipoib] [ 141.014400] __dev_open+0xcb/0x140 [ 141.017919] __dev_change_flags+0x1a4/0x1e0 [ 141.022133] dev_change_flags+0x23/0x60 [ 141.025695] devinet_ioctl+0x704/0x7d0 [ 141.029156] sock_do_ioctl+0x20/0x50 [ 141.032526] sock_ioctl+0x221/0x300 [ 141.036079] do_vfs_ioctl+0xa6/0x6d0 [ 141.039656] SyS_ioctl+0x74/0x80 [ 141.042811] entry_SYSCALL_64_fastpath+0x1f/0x96 [ 141.046891] -> #0 (rtnl_mutex){+.+.}: [ 141.051701] lock_acquire+0xd4/0x220 [ 141.055212] __mutex_lock+0x88/0x970 [ 141.058631] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 141.063160] __ipoib_ib_dev_flush+0x71/0x4e0 [ib_ipoib] [ 141.067648] process_one_work+0x1f5/0x610 [ 141.071429] worker_thread+0x4a/0x3f0 [ 141.074890] kthread+0x141/0x180 [ 141.078085] ret_from_fork+0x24/0x30 [ 141.081559] other info that might help us debug this: [ 141.088967] Possible unsafe locking scenario: [ 141.094280] CPU0 CPU1 [ 141.097953] ---- ---- [ 141.101640] lock(&priv->vlan_rwsem); [ 141.104771] lock(rtnl_mutex); [ 141.109207] lock(&priv->vlan_rwsem); [ 141.114032] lock(rtnl_mutex); [ 141.116800] *** DEADLOCK *** Fixes: b4b678b06f6e ("IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 3b96cdaf9a83..e6151a29c412 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { - rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - result = ipoib_ib_dev_open(dev); - rtnl_unlock(); - if (result) + if (ipoib_ib_dev_open(dev)) return; if (netif_queue_stopped(dev)) @@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); + rtnl_lock(); __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + rtnl_unlock(); } void ipoib_ib_dev_cleanup(struct net_device *dev) -- cgit From cd95a89282ef61458c3758d70ebfbd91f303033f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 21 Dec 2017 08:52:50 -0800 Subject: selftests/bpf: fix Makefile for passing LLC to the command line Makefile has a LLC variable that is initialised to "llc", but can theoretically be overridden from the command line ("make LLC=llc-6.0"). However, this fails because for LLVM probe check, "llc" is called directly. Use the $(LLC) variable instead to fix this. Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer") Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 05fc4e2e7b3a..9316e648a880 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -39,7 +39,7 @@ $(BPFOBJ): force CLANG ?= clang LLC ?= llc -PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) # Let newer LLVM versions transparently probe the kernel for availability # of full BPF instruction set. -- cgit From e7cdf5c82f1773c3386b93bbcf13b9bfff29fa31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 Dec 2017 12:07:00 +0000 Subject: drm/syncobj: Stop reusing the same struct file for all syncobj -> fd The vk cts test: dEQP-VK.api.external.semaphore.opaque_fd.export_multiple_times_temporary triggers a lot of VFS: Close: file count is 0 Dave pointed out that clearing the syncobj->file from drm_syncobj_file_release() was sufficient to silence the test, but that opens a can of worm since we assumed that the syncobj->file was never unset. Stop trying to reuse the same struct file for every fd pointing to the drm_syncobj, and allocate one file for each fd instead. v2: Fixup return handling of drm_syncobj_fd_to_handle v2.1: [airlied: fix possible syncobj ref race] Reported-by: Dave Airlie Signed-off-by: Chris Wilson Tested-by: Dave Airlie Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 77 ++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f776fc1cc543..cb4d09c70fd4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = { .release = drm_syncobj_file_release, }; -static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) -{ - struct file *file = anon_inode_getfile("syncobj_file", - &drm_syncobj_file_fops, - syncobj, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - drm_syncobj_get(syncobj); - if (cmpxchg(&syncobj->file, NULL, file)) { - /* lost the race */ - fput(file); - } - - return 0; -} - int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { - int ret; + struct file *file; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; - if (!syncobj->file) { - ret = drm_syncobj_alloc_file(syncobj); - if (ret) { - put_unused_fd(fd); - return ret; - } + file = anon_inode_getfile("syncobj_file", + &drm_syncobj_file_fops, + syncobj, 0); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } - fd_install(fd, syncobj->file); + + drm_syncobj_get(syncobj); + fd_install(fd, file); + *p_fd = fd; return 0; } @@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private, return ret; } -static struct drm_syncobj *drm_syncobj_fdget(int fd) -{ - struct file *file = fget(fd); - - if (!file) - return NULL; - if (file->f_op != &drm_syncobj_file_fops) - goto err; - - return file->private_data; -err: - fput(file); - return NULL; -}; - static int drm_syncobj_fd_to_handle(struct drm_file *file_private, int fd, u32 *handle) { - struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); + struct drm_syncobj *syncobj; + struct file *file; int ret; - if (!syncobj) + file = fget(fd); + if (!file) return -EINVAL; + if (file->f_op != &drm_syncobj_file_fops) { + fput(file); + return -EINVAL; + } + /* take a reference to put in the idr */ + syncobj = file->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, spin_unlock(&file_private->syncobj_table_lock); idr_preload_end(); - if (ret < 0) { - fput(syncobj->file); - return ret; - } - *handle = ret; - return 0; + if (ret > 0) { + *handle = ret; + ret = 0; + } else + drm_syncobj_put(syncobj); + + fput(file); + return ret; } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, -- cgit From dc1c4165d189350cb51bdd3057deb6ecd164beda Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 12 Dec 2017 12:02:04 +0000 Subject: KVM: PPC: Book3S: fix XIVE migration of pending interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When restoring a pending interrupt, we are setting the Q bit to force a retrigger in xive_finish_unmask(). But we also need to force an EOI in this case to reach the same initial state : P=1, Q=0. This can be done by not setting 'old_p' for pending interrupts which will inform xive_finish_unmask() that an EOI needs to be sent. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Suggested-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..b5e6d227a034 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1558,7 +1558,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1566,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; -- cgit From 7333b5aca412d6ad02667b5a513485838a91b136 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 12 Dec 2017 18:23:56 +0100 Subject: KVM: PPC: Book3S HV: Fix pending_pri value in kvmppc_xive_get_icp() When we migrate a VM from a POWER8 host (XICS) to a POWER9 host (XICS-on-XIVE), we have an error: qemu-kvm: Unable to restore KVM interrupt controller state \ (0xff000000) for CPU 0: Invalid argument This is because kvmppc_xics_set_icp() checks the new state is internaly consistent, and especially: ... 1129 if (xisr == 0) { 1130 if (pending_pri != 0xff) 1131 return -EINVAL; ... On the other side, kvmppc_xive_get_icp() doesn't set neither the pending_pri value, nor the xisr value (set to 0) (and kvmppc_xive_set_icp() ignores the pending_pri value) As xisr is 0, pending_pri must be set to 0xff. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Laurent Vivier Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index b5e6d227a034..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) -- cgit From 322f74ede933b3e2cb78768b6a6fdbfbf478a0c1 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:44 +0800 Subject: ALSA: hda - Add MIC_NO_PRESENCE fixup for 2 HP machines There is a headset jack on the front panel, when we plug a headset into it, the headset mic can't trigger unsol events, and read_pin_sense() can't detect its presence too. So add this fixup to fix this issue. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a81aacf684b2..37e1cf8218ff 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -271,6 +271,8 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HEADSET_MIC, + CXT_FIXUP_HP_MIC_NO_PRESENCE, }; /* for hda_fixup_thinkpad_acpi() */ @@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec, } } +static void cxt_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + break; + } +} + /* OPLC XO 1.5 fixup */ /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) @@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_headset_mic, + }, + [CXT_FIXUP_HP_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, + { } + }, + .chained = true, + .chain_id = CXT_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), -- cgit From 285d5ddcffafa5d5e68c586f4c9eaa8b24a2897d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:45 +0800 Subject: ALSA: hda - fix headset mic detection issue on a Dell machine It has the codec alc256, and add its pin definition to pin quirk table to let it apply ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6a4db00511ab..682858548b9b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6585,6 +6585,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x1b, 0x01011020}, {0x21, 0x02211010}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, -- cgit From 8da5bbfc7cbba909f4f32d5e1dda3750baa5d853 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:46 +0800 Subject: ALSA: hda - change the location for one mic on a Lenovo machine There are two front mics on this machine, and current driver assign the same name Mic to both of them, but pulseaudio can't handle them. As a workaround, we change the location for one of them, then the driver will assign "Front Mic" and "Mic" for them. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 682858548b9b..1522ba31e16d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6328,6 +6328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), -- cgit From a36c2638380c0a4676647a1f553b70b20d3ebce1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Dec 2017 10:45:07 +0100 Subject: ALSA: hda: Drop useless WARN_ON() Since the commit 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") cleared hdac_acomp pointer, the WARN_ON() non-NULL check in snd_hdac_i915_register_notifier() may give a false-positive warning, as the function gets called no matter whether the component is registered or not. For fixing it, let's get rid of the spurious WARN_ON(). Fixes: 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") Cc: Reported-by: Kouta Okamoto Signed-off-by: Takashi Iwai --- sound/hda/hdac_i915.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 038a180d3f81..cbe818eda336 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; -- cgit From 8bb65fc06c08f027980a917648e1cf6e4d51c5ad Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 6 Dec 2017 11:37:45 -0600 Subject: gpio: gpio-reg: fix build Revert changes introduced by commit f0fbe7bce733 ("gpio: Move irqdomain into struct gpio_irq_chip") as they are not aplicable to this driver. Reported-by: Russell King - ARM Linux Fixes: f0fbe7bce733 ("gpio: Move irqdomain into struct gpio_irq_chip") Signed-off-by: Grygorii Strashko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index 23e771dba4c1..e85903eddc68 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset) struct gpio_reg *r = to_gpio_reg(gc); int irq = r->irqs[offset]; - if (irq >= 0 && r->irq.domain) - irq = irq_find_mapping(r->irq.domain, irq); + if (irq >= 0 && r->irqdomain) + irq = irq_find_mapping(r->irqdomain, irq); return irq; } -- cgit From 822703354774ec935169cbbc8d503236bcb54fda Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Dec 2017 15:02:33 +0100 Subject: gpio: fix "gpio-line-names" property retrieval Following commit 9427ecbed46cc ("gpio: Rework of_gpiochip_set_names() to use device property accessors"), "gpio-line-names" DT property is not retrieved anymore when chip->parent is not set by the driver. This is due to OF based property reads having been replaced by device based property reads. This patch fixes that by making use of fwnode_property_read_string_array() instead of device_property_read_string_array() and handing over either of_fwnode_handle(chip->of_node) or dev_fwnode(chip->parent) to that function. Fixes: 9427ecbed46cc ("gpio: Rework of_gpiochip_set_names() to use device property accessors") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 2 +- drivers/gpio/gpiolib-devprop.c | 17 +++++++---------- drivers/gpio/gpiolib-of.c | 3 ++- drivers/gpio/gpiolib.h | 3 ++- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index eb4528c87c0b..d6f3d9ee1350 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) } if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent)); acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 27f383bda7d9..f748aa3e77f7 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -19,30 +19,27 @@ /** * devprop_gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible + * @fwnode: Property Node containing the gpio-line-names property * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ -void devprop_gpiochip_set_names(struct gpio_chip *chip) +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode) { struct gpio_device *gdev = chip->gpiodev; const char **names; int ret, i; - if (!chip->parent) { - dev_warn(&gdev->dev, "GPIO chip parent is NULL\n"); - return; - } - - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", NULL, 0); if (ret < 0) return; if (ret != gdev->ngpio) { - dev_warn(chip->parent, + dev_warn(&gdev->dev, "names %d do not match number of GPIOs %d\n", ret, gdev->ngpio); return; @@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return; - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, gdev->ngpio); if (ret < 0) { - dev_warn(chip->parent, "failed to read GPIO line names\n"); + dev_warn(&gdev->dev, "failed to read GPIO line names\n"); kfree(names); return; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0d59e61b52f..72a0695d2ac3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip) /* If the chip defines names itself, these take precedence */ if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, + of_fwnode_handle(chip->of_node)); of_node_get(chip->of_node); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index af48322839c3..6c44d1652139 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) return desc - &desc->gdev->descs[0]; } -void devprop_gpiochip_set_names(struct gpio_chip *chip); +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode); /* With descriptor prefix */ -- cgit From 4c009af473b2026caaa26107e34d7cc68dad7756 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 22 Dec 2017 08:47:20 -0800 Subject: IB/hfi: Only read capability registers if the capability exists During driver init, various registers are saved to allow restoration after an FLR or gen3 bump. Some of these registers are not available in some circumstances (i.e. Virtual machines). This bug makes the driver unusable when the PCI device is passed into a VM, it fails during probe. Delete unnecessary register read/write, and only access register if the capability exists. Cc: # 4.14.x Fixes: a618b7e40af2 ("IB/hfi1: Move saving PCI values to a separate function") Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 1 - drivers/infiniband/hw/hfi1/pcie.c | 30 ++++++++++++------------------ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4a9b4d7efe63..8ce9118d4a7f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1131,7 +1131,6 @@ struct hfi1_devdata { u16 pcie_lnkctl; u16 pcie_devctl2; u32 pci_msix0; - u32 pci_lnkctl3; u32 pci_tph2; /* diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 09e50fd2a08f..8c7e7a60b715 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, + dd->pci_tph2); + if (ret) + goto error; + } return 0; error: @@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, + &dd->pci_tph2); + if (ret) + goto error; + } return 0; error: -- cgit From 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:02:34 +0100 Subject: x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y and NR_CPUS=512, because the fixmap area becomes too big. Limit the number of CPUs with BIGSMP to 64, which is already way to big for 32-bit, but it's at least a working limitation. We performed a quick survey of 32-bit-only machines that might be affected by this change negatively, but found none. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 665eba1b6103..cd5199de231e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -925,7 +925,8 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK + range 2 64 if SMP && X86_32 && X86_BIGSMP + range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP -- cgit From c05344947b37f7cda726e802457370bc6eac4d26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Dec 2017 01:14:39 +0100 Subject: x86/mm/dump_pagetables: Check PAGE_PRESENT for real The check for a present page in printk_prot(): if (!pgprot_val(prot)) { /* Not present */ is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when analyzing mapping correctness. Check for the present bit to make an informed decision. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e3ac6fe6c9e..1014cfb21c2c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -140,7 +140,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; - if (!pgprot_val(prot)) { + if (!(pr & _PAGE_PRESENT)) { /* Not present */ pt_dump_cont_printf(m, dmsg, " "); } else { -- cgit From 146122e24bdf208015d629babba673e28d090709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:07:42 +0100 Subject: x86/mm/dump_pagetables: Make the address hints correct and readable The address hints are a trainwreck. The array entry numbers have to kept magically in sync with the actual hints, which is doomed as some of the array members are initialized at runtime via the entry numbers. Designated initializers have been around before this code was implemented.... Use the entry numbers to populate the address hints array and add the missing bits and pieces. Split 32 and 64 bit for readability sake. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 90 +++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 1014cfb21c2c..fdf09d8f98da 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -44,10 +44,12 @@ struct addr_marker { unsigned long max_lines; }; -/* indices for address_markers; keep sync'd w/ address_markers below */ +/* Address space markers hints */ + +#ifdef CONFIG_X86_64 + enum address_markers_idx { USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 KERNEL_SPACE_NR, LOW_KERNEL_NR, VMALLOC_START_NR, @@ -56,56 +58,70 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif -# ifdef CONFIG_X86_ESPFIX64 +#ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, -# endif +#endif +#ifdef CONFIG_EFI + EFI_END_NR, +#endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, -#else + FIXADDR_START_NR, + END_OF_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, + [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif +#ifdef CONFIG_X86_ESPFIX64 + [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +#endif +#ifdef CONFIG_EFI + [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, +#endif + [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, + [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, + [MODULES_END_NR] = { MODULES_END, "End Modules" }, + [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, + [END_OF_SPACE_NR] = { -1, NULL } +}; + +#else /* CONFIG_X86_64 */ + +enum address_markers_idx { + USER_SPACE_NR = 0, KERNEL_SPACE_NR, VMALLOC_START_NR, VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, #endif + FIXADDR_START_NR, + END_OF_SPACE_NR, }; -/* Address space markers hints */ static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/* VMEMMAP_START */, "Vmemmap" }, -#ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "KASAN shadow" }, - { KASAN_SHADOW_END, "KASAN shadow end" }, + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, +#ifdef CONFIG_HIGHMEM + [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif -# ifdef CONFIG_X86_ESPFIX64 - { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, -# endif -# ifdef CONFIG_EFI - { EFI_VA_END, "EFI Runtime Services" }, -# endif - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ + [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, + [END_OF_SPACE_NR] = { -1, NULL } }; +#endif /* !CONFIG_X86_64 */ + /* Multipliers for offsets within the PTEs */ #define PTE_LEVEL_MULT (PAGE_SIZE) #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) -- cgit From 49275fef986abfb8b476e4708aaecc07e7d3e087 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 10 Dec 2017 22:47:19 -0800 Subject: x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy The kernel is very erratic as to which pagetables have _PAGE_USER set. The vsyscall page gets lucky: it seems that all of the relevant pagetables are among the apparently arbitrary ones that set _PAGE_USER. Rather than relying on chance, just explicitly set _PAGE_USER. This will let us clean up pagetable setup to stop setting _PAGE_USER. The added code can also be reused by pagetable isolation to manage the _PAGE_USER bit in the usermode tables. [ tglx: Folded paravirt fix from Juergen Gross ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index f279ba2643dc..daad57c76e42 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -329,16 +330,47 @@ int in_gate_area_no_mm(unsigned long addr) return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +static void __init set_vsyscall_pgtable_user_bits(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); -- cgit From 4831b779403a836158917d59a7ca880483c67378 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 10 Dec 2017 22:47:20 -0800 Subject: x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode If something goes wrong with pagetable setup, vsyscall=native will accidentally fall back to emulation. Make it warn and fail so that we notice. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index daad57c76e42..1faf40f2dda9 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -139,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); + /* This should be unreachable in NATIVE mode. */ + if (WARN_ON(vsyscall_mode == NATIVE)) + return false; + if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); -- cgit From c10e83f598d08046dd1ebc8360d4bb12d802d51b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Dec 2017 12:27:29 +0100 Subject: arch, mm: Allow arch_dup_mmap() to fail In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be allowed to fail. Fix up all instances. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 5 +++-- arch/um/include/asm/mmu_context.h | 3 ++- arch/unicore32/include/asm/mmu_context.h | 5 +++-- arch/x86/include/asm/mmu_context.h | 4 ++-- include/asm-generic/mm_hooks.h | 5 +++-- kernel/fork.c | 3 +-- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 492d8140a395..44fdf4786638 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -114,9 +114,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c..fca34b2177e2 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d..5c205a9cb5a6 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0..c76162439c8a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -176,10 +176,10 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index ea189d88a3cc..8ac4e68a12f0 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -7,9 +7,10 @@ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/kernel/fork.c b/kernel/fork.c index 07cc743698d3..500ce64517d9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); -- cgit From c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Dec 2017 12:27:30 +0100 Subject: x86/ldt: Rework locking The LDT is duplicated on fork() and on exec(), which is wrong as exec() should start from a clean state, i.e. without LDT. To fix this the LDT duplication code will be moved into arch_dup_mmap() which is only called for fork(). This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the parent process, but the LDT duplication code needs to acquire mm->context.lock to access the LDT data safely, which is the reverse lock order of write_ldt() where mmap_sem nests into context.lock. Solve this by introducing a new rw semaphore which serializes the read/write_ldt() syscall operations and use context.lock to protect the actual installment of the LDT descriptor. So context.lock stabilizes mm->context.ldt and can nest inside of the new semaphore or mmap_sem. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 4 +++- arch/x86/include/asm/mmu_context.h | 2 ++ arch/x86/kernel/ldt.c | 33 +++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9ea26f167497..5ff3e8af2c20 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include +#include #include #include @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c76162439c8a..4fdbe5efe535 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -132,6 +132,8 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1c1eae961340..1600aebc1ec7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include @@ -42,7 +47,7 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; @@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -133,7 +140,8 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); + init_rwsem(&mm->context.ldt_usr_sem); + old_mm = current->mm; if (!old_mm) { mm->context.ldt = NULL; @@ -180,7 +188,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +217,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +277,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -291,7 +300,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } -- cgit From a4828f81037f491b2cc986595e3a969a6eeb2fb5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Dec 2017 12:27:31 +0100 Subject: x86/ldt: Prevent LDT inheritance on exec The LDT is inherited across fork() or exec(), but that makes no sense at all because exec() is supposed to start the process clean. The reason why this happens is that init_new_context_ldt() is called from init_new_context() which obviously needs to be called for both fork() and exec(). It would be surprising if anything relies on that behaviour, so it seems to be safe to remove that misfeature. Split the context initialization into two parts. Clear the LDT pointer and initialize the mutex from the general context init and move the LDT duplication to arch_dup_mmap() which is only called on fork(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 21 ++++++++++++++------- arch/x86/kernel/ldt.c | 18 +++++------------- tools/testing/selftests/x86/ldt_gdt.c | 9 +++------ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4fdbe5efe535..5e25423bf9bb 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -57,11 +57,17 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context_ldt(struct task_struct *tsk, - struct mm_struct *mm) +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) { return 0; } @@ -137,15 +143,16 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } - #endif - return init_new_context_ldt(tsk, mm); +#endif + init_new_context_ldt(mm); + return 0; } static inline void destroy_context(struct mm_struct *mm) { @@ -181,7 +188,7 @@ do { \ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); - return 0; + return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1600aebc1ec7..a6b5d62f45a7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -131,28 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt) } /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. + * Called on fork from arch_dup_mmap(). Just copy the current LDT state, + * the new task is not running, so nothing can be installed. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) +int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ldt_struct *new_ldt; - struct mm_struct *old_mm; int retval = 0; - init_rwsem(&mm->context.ldt_usr_sem); - - old_mm = current->mm; - if (!old_mm) { - mm->context.ldt = NULL; + if (!old_mm) return 0; - } mutex_lock(&old_mm->context.lock); - if (!old_mm->context.ldt) { - mm->context.ldt = NULL; + if (!old_mm->context.ldt) goto out_unlock; - } new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 66e5ce5b91f0..0304ffb714f2 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -627,13 +627,10 @@ static void do_multicpu_tests(void) static int finish_exec_test(void) { /* - * In a sensible world, this would be check_invalid_segment(0, 1); - * For better or for worse, though, the LDT is inherited across exec. - * We can probably change this safely, but for now we test it. + * Older kernel versions did inherit the LDT on exec() which is + * wrong because exec() starts from a clean state. */ - check_valid_segment(0, 1, - AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, - 42, true); + check_invalid_segment(0, 1); return nerrs ? 1 : 0; } -- cgit From 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:43 -0800 Subject: x86/mm/64: Improve the memory map documentation The old docs had the vsyscall range wrong and were missing the fixmap. Fix both. There used to be 8 MB reserved for future vsyscalls, but that's long gone. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 3448e675b462..83ca5a3b90ac 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable) +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Virtual memory map with 5 level page tables: @@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support -- cgit From e8ffe96e5933d417195268478479933d56213a3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:54 +0100 Subject: x86/doc: Remove obvious weirdnesses from the x86 MM layout documentation Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 83ca5a3b90ac..63a41671d25b 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -1,6 +1,4 @@ - - Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm @@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 -through to the most-significant implemented bit are set to either all ones -or all zero. This causes hole between user space and kernel addresses. +through to the most-significant implemented bit are sign extended. +This causes hole between user space and kernel addresses if you interpret them +as unsigned. The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory @@ -60,9 +59,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of the processes using the page fault handler, with init_top_pgt as reference. -Current X86-64 implementations support up to 46 bits of address space (64 TB), -which is our current limit. This expands into MBZ space in the page tables. - We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available @@ -74,5 +70,3 @@ following fixmap section. Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. - --Andi Kleen, Jul 2004 -- cgit From 4fe2d8b11a370af286287a2661de9d4e6c9a145a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 17:25:07 -0800 Subject: x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack If the kernel oopses while on the trampoline stack, it will print "" even if SYSENTER is not involved. That is rather confusing. The "SYSENTER" stack is used for a lot more than SYSENTER now. Give it a better string to display in stack dumps, and rename the kernel code to match. Also move the 32-bit code over to the new naming even though it still uses the entry stack only for SYSENTER. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 12 ++++++------ arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/fixmap.h | 8 ++++---- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/stacktrace.h | 4 ++-- arch/x86/kernel/asm-offsets.c | 4 ++-- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/cpu/common.c | 14 +++++++------- arch/x86/kernel/dumpstack.c | 10 +++++----- arch/x86/kernel/dumpstack_32.c | 6 +++--- arch/x86/kernel/dumpstack_64.c | 12 +++++++++--- 11 files changed, 44 insertions(+), 38 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bd8b57a5c874..ace8f321a5a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -942,9 +942,9 @@ ENTRY(debug) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Ldebug_from_sysenter_stack TRACE_IRQS_OFF @@ -986,9 +986,9 @@ ENTRY(nmi) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2812ce043a7a..87cebe78bbef 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -154,8 +154,8 @@ END(native_usergs_sysret64) _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ -#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ - SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA +#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ + SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA ENTRY(entry_SYSCALL_64_trampoline) UNWIND_HINT_EMPTY diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 94fc4fa14127..8153b8d86a3c 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -56,10 +56,10 @@ struct cpu_entry_area { char gdt[PAGE_SIZE]; /* - * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as + * The GDT is just below entry_stack and thus serves (on x86_64) as * a a read-only guard page. */ - struct SYSENTER_stack_page SYSENTER_stack_page; + struct entry_stack_page entry_stack_page; /* * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because @@ -250,9 +250,9 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); } -static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) +static inline struct entry_stack *cpu_entry_stack(int cpu) { - return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index da943411d3d8..9e482d8b0b97 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -336,12 +336,12 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 -struct SYSENTER_stack { +struct entry_stack { unsigned long words[64]; }; -struct SYSENTER_stack_page { - struct SYSENTER_stack stack; +struct entry_stack_page { + struct entry_stack stack; } __aligned(PAGE_SIZE); struct tss_struct { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f8062bfd43a0..f73706878772 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,7 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, - STACK_TYPE_SYSENTER, + STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -29,7 +29,7 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); +bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cd360a5e0dca..676b7cf4b62b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -97,6 +97,6 @@ void common(void) { /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); - OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); - DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); + OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); + DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 7d20d9c0b3d6..fa1261eefa16 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -48,7 +48,7 @@ void foo(void) /* Offset from the sysenter stack to tss.sp0 */ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - - offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); + offsetofend(struct cpu_entry_area, entry_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 034900623adf..ed4acbce37a8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif -static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, - SYSENTER_stack_storage); +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, + entry_stack_storage); static void __init set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) @@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(int cpu) #endif __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), - per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), + per_cpu_ptr(&entry_stack_storage, cpu), 1, PAGE_KERNEL); /* @@ -1323,7 +1323,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1440,7 +1440,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1655,7 +1655,7 @@ void cpu_init(void) */ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); - load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index bbd6d986e2d0..1dd3f533d78c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +bool in_entry_stack(unsigned long *stack, struct stack_info *info) { - struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); void *begin = ss; void *end = ss + 1; @@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) if ((void *)stack < begin || (void *)stack >= end) return false; - info->type = STACK_TYPE_SYSENTER; + info->type = STACK_TYPE_ENTRY; info->begin = begin; info->end = end; info->next_sp = NULL; @@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) - * - SYSENTER stack + * - entry stack * * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack - * - SYSENTER stack + * - entry stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 5ff13a6b3680..04170f63e3a1 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) + return "ENTRY_TRAMPOLINE"; return NULL; } @@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; if (in_hardirq_stack(stack, info)) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index abc828f8c297..563e28d14f2c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) { + /* + * On 64-bit, we have a generic entry stack that we + * use for all the kernel entry points, including + * SYSENTER. + */ + return "ENTRY_TRAMPOLINE"; + } if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; goto unknown; -- cgit From 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:50 +0100 Subject: x86/uv: Use the right TLB-flush API Since uv_flush_tlb_others() implements flush_tlb_others() which is about flushing user mappings, we should use __flush_tlb_single(), which too is about flushing user mappings. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Andrew Banman Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Mike Travis Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index f44c0bc95aa2..8538a6723171 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_one(msg->address); + __flush_tlb_single(msg->address); stat->d_onetlb++; } stat->d_requestee++; -- cgit From 23cb7d46f371844c004784ad9552a57446f73e5a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:51 +0100 Subject: x86/microcode: Dont abuse the TLB-flush interface Commit: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") ... grubbed into tlbflush internals without coherent explanation. Since it says its a precaution and the SDM doesn't mention anything like this, take it out back. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: fenghua.yu@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 19 ++++++------------- arch/x86/kernel/cpu/microcode/intel.c | 13 ------------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 509046cfa5ce..c2e45da4e540 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -246,20 +246,9 @@ static inline void __native_flush_tlb(void) preempt_enable(); } -static inline void __native_flush_tlb_global_irq_disabled(void) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); -} - static inline void __native_flush_tlb_global(void) { - unsigned long flags; + unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* @@ -277,7 +266,11 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); raw_local_irq_restore(flags); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf797..8ccdca6d3f9e 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci) } #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - static inline void print_ucode(struct ucode_cpu_info *uci) { struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (rev != mc->hdr.rev) return -1; -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif uci->cpu_sig.rev = rev; if (early) -- cgit From a501686b2923ce6f2ff2b1d0d50682c6411baf72 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:49 +0100 Subject: x86/mm: Use __flush_tlb_one() for kernel memory __flush_tlb_single() is for user mappings, __flush_tlb_one() for kernel mappings. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3118392cdf75..0569987f6da6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_single(addr); + __flush_tlb_one(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) -- cgit From b5fc6d943808b570bdfbec80f40c6b3855f1c48b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:46 +0100 Subject: x86/mm: Remove superfluous barriers atomic64_inc_return() already implies smp_mb() before and after. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c2e45da4e540..3e2227386abe 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -60,19 +60,13 @@ static inline void invpcid_flush_all_nonglobals(void) static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { - u64 new_tlb_gen; - /* * Bump the generation count. This also serves as a full barrier * that synchronizes with switch_mm(): callers are required to order * their read of mm_cpumask after their writes to the paging * structures. */ - smp_mb__before_atomic(); - new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen); - smp_mb__after_atomic(); - - return new_tlb_gen; + return atomic64_inc_return(&mm->context.tlb_gen); } #ifdef CONFIG_PARAVIRT -- cgit From 3f67af51e56f291d7417d77c4f67cd774633c5e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:52 +0100 Subject: x86/mm: Add comments to clarify which TLB-flush functions are supposed to flush what Per popular request.. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 3e2227386abe..552d581c8f9f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -228,6 +228,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); +/* + * flush the entire current user mapping + */ static inline void __native_flush_tlb(void) { /* @@ -240,6 +243,9 @@ static inline void __native_flush_tlb(void) preempt_enable(); } +/* + * flush everything + */ static inline void __native_flush_tlb_global(void) { unsigned long cr4, flags; @@ -269,17 +275,27 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_restore(flags); } +/* + * flush one page in the user mapping + */ static inline void __native_flush_tlb_single(unsigned long addr) { asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } +/* + * flush everything + */ static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); - else + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ __flush_tlb(); + } /* * Note: if we somehow had PCID but not PGE, then this wouldn't work -- @@ -290,6 +306,9 @@ static inline void __flush_tlb_all(void) */ } +/* + * flush one page in the kernel mapping + */ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); -- cgit From 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:54 +0100 Subject: x86/mm: Move the CR3 construction functions to tlbflush.h For flushing the TLB, the ASID which has been programmed into the hardware must be known. That differs from what is in 'cpu_tlbstate'. Add functions to transform the 'cpu_tlbstate' values into to the one programmed into the hardware (CR3). It's not easy to include mmu_context.h into tlbflush.h, so just move the CR3 building over to tlbflush.h. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 29 +---------------------------- arch/x86/include/asm/tlbflush.h | 26 ++++++++++++++++++++++++++ arch/x86/mm/tlb.c | 8 ++++---- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5e25423bf9bb..5ede7cae1d67 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -290,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID - * bits. This serves two purposes. It prevents a nasty situation in - * which PCID-unaware code saves CR3, loads some other value (with PCID - * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if - * the saved ASID was nonzero. It also means that any bugs involving - * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger - * deterministically. - */ - -static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) -{ - if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(mm->pgd); - } -} - -static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) -{ - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; -} - /* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). @@ -326,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 552d581c8f9f..ee7925adfb57 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -69,6 +69,32 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } +/* + * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. + * This serves two purposes. It prevents a nasty situation in which + * PCID-unaware code saves CR3, loads some other value (with PCID == 0), + * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved + * ASID was nonzero. It also means that any bugs involving loading a + * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically. + */ +struct pgd_t; +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) +{ + if (static_cpu_has(X86_FEATURE_PCID)) { + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(pgd) | (asid + 1); + } else { + VM_WARN_ON_ONCE(asid != 0); + return __sme_pa(pgd); + } +} + +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) +{ + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; +} + #ifdef CONFIG_PARAVIRT #include #else diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0569987f6da6..0a1be3adc97e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next, new_asid)); + write_cr3(build_cr3(next->pgd, new_asid)); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next, new_asid)); + write_cr3(build_cr3_noflush(next->pgd, new_asid)); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); @@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void) !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm, 0)); + write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); -- cgit From cb0a9144a744e55207e24dcef812f05cd15a499a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:55 +0100 Subject: x86/mm: Remove hard-coded ASID limit checks First, it's nice to remove the magic numbers. Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID space. The space is currently unused, but add a comment to spell out this new restriction. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index ee7925adfb57..f88ccd3ae466 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -69,6 +69,22 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#define PTI_CONSUMED_ASID_BITS 0 + +#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because ASID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) + /* * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. * This serves two purposes. It prevents a nasty situation in which @@ -81,7 +97,7 @@ struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); return __sme_pa(pgd) | (asid + 1); } else { VM_WARN_ON_ONCE(asid != 0); @@ -91,7 +107,7 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { - VM_WARN_ON_ONCE(asid > 4094); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; } -- cgit From dd95f1a4b5ca904c78e6a097091eb21436478abb Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:56 +0100 Subject: x86/mm: Put MMU to hardware ASID translation in one place There are effectively two ASID types: 1. The one stored in the mmu_context that goes from 0..5 2. The one programmed into the hardware that goes from 1..6 This consolidates the locations where converting between the two (by doing a +1) to a single place which gives us a nice place to comment. PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware ASID to flush for the userspace mapping. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index f88ccd3ae466..8b27daff7a7f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -85,20 +85,26 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) */ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) -/* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. - * This serves two purposes. It prevents a nasty situation in which - * PCID-unaware code saves CR3, loads some other value (with PCID == 0), - * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved - * ASID was nonzero. It also means that any bugs involving loading a - * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically. - */ +static inline u16 kern_pcid(u16 asid) +{ + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + /* + * If PCID is on, ASID-aware code paths put the ASID+1 into the + * PCID bits. This serves two purposes. It prevents a nasty + * situation in which PCID-unaware code saves CR3, loads some other + * value (with PCID == 0), and then restores CR3, thus corrupting + * the TLB for ASID 0 if the saved ASID was nonzero. It also means + * that any bugs involving loading a PCID-enabled CR3 with + * CR4.PCIDE off will trigger deterministically. + */ + return asid + 1; +} + struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - return __sme_pa(pgd) | (asid + 1); + return __sme_pa(pgd) | kern_pcid(asid); } else { VM_WARN_ON_ONCE(asid != 0); return __sme_pa(pgd); @@ -108,7 +114,8 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; + VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } #ifdef CONFIG_PARAVIRT -- cgit From 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:47 +0100 Subject: x86/mm: Create asm/invpcid.h Unclutter tlbflush.h a little. Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/invpcid.h | 53 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/tlbflush.h | 49 +------------------------------------ 2 files changed, 54 insertions(+), 48 deletions(-) create mode 100644 arch/x86/include/asm/invpcid.h diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 000000000000..989cfa86de85 --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8b27daff7a7f..171b429f43a2 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -9,54 +9,7 @@ #include #include #include - -static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) -{ - struct { u64 d[2]; } desc = { { pcid, addr } }; - - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global - * mappings, we don't want the compiler to reorder any subsequent - * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. - */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); -} - -#define INVPCID_TYPE_INDIV_ADDR 0 -#define INVPCID_TYPE_SINGLE_CTXT 1 -#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 -#define INVPCID_TYPE_ALL_NON_GLOBAL 3 - -/* Flush all mappings for a given pcid and addr, not including globals. */ -static inline void invpcid_flush_one(unsigned long pcid, - unsigned long addr) -{ - __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); -} - -/* Flush all mappings for a given PCID, not including globals. */ -static inline void invpcid_flush_single_context(unsigned long pcid) -{ - __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); -} - -/* Flush all mappings, including globals, for all PCIDs. */ -static inline void invpcid_flush_all(void) -{ - __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); -} - -/* Flush all mappings for all PCIDs except globals. */ -static inline void invpcid_flush_all_nonglobals(void) -{ - __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); -} +#include static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { -- cgit From ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:28:54 +0100 Subject: x86/cpu_entry_area: Move it to a separate unit Separate the cpu_entry_area code out of cpu/common.c and the fixmap. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++ arch/x86/include/asm/fixmap.h | 41 +------------- arch/x86/kernel/cpu/common.c | 94 ------------------------------ arch/x86/kernel/traps.c | 1 + arch/x86/mm/Makefile | 2 +- arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+), 135 deletions(-) create mode 100644 arch/x86/include/asm/cpu_entry_area.h create mode 100644 arch/x86/mm/cpu_entry_area.c diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000000..5471826803af --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include +#include + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct entry_stack_page entry_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + +extern void setup_cpu_entry_areas(void); + +#endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8153b8d86a3c..fb801662a230 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -25,6 +25,7 @@ #else #include #endif +#include /* * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall @@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP; PAGE_SIZE) #endif -/* - * cpu_entry_area is a percpu region in the fixmap that contains things - * needed by the CPU and early entry/exit code. Real types aren't used - * for all fields here to avoid circular header dependencies. - * - * Every field is a virtual alias of some other allocated backing store. - * There is no direct allocation of a struct cpu_entry_area. - */ -struct cpu_entry_area { - char gdt[PAGE_SIZE]; - - /* - * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. - */ - struct entry_stack_page entry_stack_page; - - /* - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because - * we need task switches to work, and task switches write to the TSS. - */ - struct tss_struct tss; - - char entry_trampoline[PAGE_SIZE]; - -#ifdef CONFIG_X86_64 - /* - * Exception stacks used for IST entries. - * - * In the future, this should have a separate slot for each stack - * with guard pages between them. - */ - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; -#endif -}; - -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) - -extern void setup_cpu_entry_areas(void); - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed4acbce37a8..8ddcfa4d4165 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, [DEBUG_STACK - 1] = DEBUG_STKSZ }; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); -#endif - -static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, - entry_stack_storage); - -static void __init -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) -{ - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); -} - -/* Setup the fixmap mappings only once per-processor */ -static void __init setup_cpu_entry_area(int cpu) -{ -#ifdef CONFIG_X86_64 - extern char _entry_trampoline[]; - - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ - pgprot_t gdt_prot = PAGE_KERNEL_RO; - pgprot_t tss_prot = PAGE_KERNEL_RO; -#else - /* - * On native 32-bit systems, the GDT cannot be read-only because - * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the - * GDT is read-only, that will triple fault. The TSS cannot be - * read-only because the CPU writes to it on task switches. - * - * On Xen PV, the GDT must be read-only because the hypervisor - * requires it. - */ - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? - PAGE_KERNEL_RO : PAGE_KERNEL; - pgprot_t tss_prot = PAGE_KERNEL; -#endif - - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), - per_cpu_ptr(&entry_stack_storage, cpu), 1, - PAGE_KERNEL); - - /* - * The Intel SDM says (Volume 3, 7.2.1): - * - * Avoid placing a page boundary in the part of the TSS that the - * processor reads during a task switch (the first 104 bytes). The - * processor may not correctly perform address translations if a - * boundary occurs in this area. During a task switch, the processor - * reads and writes into the first 104 bytes of each TSS (using - * contiguous physical addresses beginning with the physical address - * of the first byte of the TSS). So, after TSS access begins, if - * part of the 104 bytes is not physically contiguous, the processor - * will access incorrect information without generating a page-fault - * exception. - * - * There are also a lot of errata involving the TSS spanning a page - * boundary. Assert that we're not doing that. - */ - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss_rw, cpu), - sizeof(struct tss_struct) / PAGE_SIZE, - tss_prot); - -#ifdef CONFIG_X86_32 - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); #endif -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); - BUILD_BUG_ON(sizeof(exception_stacks) != - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), - &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, - PAGE_KERNEL); - - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); -#endif -} - -void __init setup_cpu_entry_areas(void) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) - setup_cpu_entry_area(cpu); -} - /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 74136fd16f49..464daed6894f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..2e0017af8f9b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 000000000000..235ff9cfaaf4 --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include +#include +#include +#include + +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +static void __init +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); +} + +/* Setup the fixmap mappings only once per-processor */ +static void __init setup_cpu_entry_area(int cpu) +{ +#ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; +#else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. + * + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; +#endif + + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, + tss_prot); + +#ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +#endif + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, + PAGE_KERNEL); + + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif +} + +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} -- cgit From 92a0f81d89571e3e8759366e050ee05cc545ef99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:51:31 +0100 Subject: x86/cpu_entry_area: Move it out of the fixmap Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big and 0-day already hit a case where the fixmap PTEs were cleared by cleanup_highmap(). Aside of that the fixmap API is a pain as it's all backwards. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 2 + arch/x86/include/asm/cpu_entry_area.h | 18 ++++++++- arch/x86/include/asm/desc.h | 1 + arch/x86/include/asm/fixmap.h | 32 +--------------- arch/x86/include/asm/pgtable_32_types.h | 15 ++++++-- arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++---------- arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/traps.c | 5 ++- arch/x86/mm/cpu_entry_area.c | 66 +++++++++++++++++++++++++-------- arch/x86/mm/dump_pagetables.c | 6 ++- arch/x86/mm/init_32.c | 6 +++ arch/x86/mm/kasan_init_64.c | 29 ++++++++------- arch/x86/mm/pgtable_32.c | 1 + arch/x86/xen/mmu_pv.c | 2 - 14 files changed, 143 insertions(+), 88 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 63a41671d25b..51101708a03a 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space @@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 5471826803af..2fbc69a0916e 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -43,10 +43,26 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE \ + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} #endif diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 2ace1f90d138..bc359dd2f7f6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index fb801662a230..64c4a30e0d39 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -25,7 +25,6 @@ #else #include #endif -#include /* * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall @@ -84,7 +83,6 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif - FIX_RO_IDT, /* Virtual mapping for read-only IDT */ #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -100,9 +98,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_CPU_ENTRY_AREA_TOP, - FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -143,7 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern int fixmaps_set; @@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) -{ - BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); - - return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; -} - -#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ - BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ - __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ - }) - -#define get_cpu_entry_area_index(cpu, field) \ - __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) - -static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) -{ - return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); -} - -static inline struct entry_stack *cpu_entry_stack(int cpu) -{ - return &get_cpu_entry_area(cpu)->entry_stack_page.stack; -} - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index f2ca9b28fd68..ce245b0cdfca 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) +/* + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c + * to avoid include recursion hell + */ +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) + +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) + +#define PKMAP_BASE \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6d5f45dcd4a1..3d27831bc58d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #ifdef CONFIG_X86_5LEVEL -#define VMALLOC_SIZE_TB _AC(16384, UL) -#define __VMALLOC_BASE _AC(0xff92000000000000, UL) -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define VMALLOC_SIZE_TB _AC(16384, UL) +# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -#define VMALLOC_SIZE_TB _AC(32, UL) -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define VMALLOC_SIZE_TB _AC(32, UL) +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif + #ifdef CONFIG_RANDOMIZE_MEMORY -#define VMALLOC_START vmalloc_base -#define VMEMMAP_START vmemmap_base +# define VMALLOC_START vmalloc_base +# define VMEMMAP_START vmemmap_base #else -#define VMALLOC_START __VMALLOC_BASE -#define VMEMMAP_START __VMEMMAP_BASE +# define VMALLOC_START __VMALLOC_BASE +# define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define ESPFIX_PGD_ENTRY _AC(-2, UL) -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) -#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) +#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1dd3f533d78c..36b17e0febe8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 464daed6894f..7c16fe0b60c2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -951,8 +951,9 @@ void __init trap_init(void) * "sidt" instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); - idt_descr.address = fix_to_virt(FIX_RO_IDT); + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; /* * Should be a barrier for any external CPU state: diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 235ff9cfaaf4..21e8b595cbb1 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif +struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +} +EXPORT_SYMBOL(get_cpu_entry_area); + +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) +{ + unsigned long va = (unsigned long) cea_vaddr; + + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); +} + static void __init -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } /* Setup the fixmap mappings only once per-processor */ @@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), - per_cpu_ptr(&entry_stack_storage, cpu), 1, - PAGE_KERNEL); + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), + gdt_prot); + + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); /* * The Intel SDM says (Volume 3, 7.2.1): @@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss_rw, cpu), - sizeof(struct tss_struct) / PAGE_SIZE, - tss_prot); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); #ifdef CONFIG_X86_32 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); @@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(int cpu) BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); BUILD_BUG_ON(sizeof(exception_stacks) != sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), - &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, - PAGE_KERNEL); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif } +static __init void setup_cpu_entry_area_ptes(void) +{ +#ifdef CONFIG_X86_32 + unsigned long start, end; + + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; + end = start + CPU_ENTRY_AREA_MAP_SIZE; + + for (; start < end; start += PMD_SIZE) + populate_extra_pte(start); +#endif +} + void __init setup_cpu_entry_areas(void) { unsigned int cpu; + setup_cpu_entry_area_ptes(); + for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index fdf09d8f98da..43dedbfb7257 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -58,6 +58,7 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif + CPU_ENTRY_AREA_NR, #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif @@ -81,6 +82,7 @@ static struct addr_marker address_markers[] = { [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, #endif + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, #endif @@ -104,6 +106,7 @@ enum address_markers_idx { #ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, #endif + CPU_ENTRY_AREA_NR, FIXADDR_START_NR, END_OF_SPACE_NR, }; @@ -116,6 +119,7 @@ static struct addr_marker address_markers[] = { #ifdef CONFIG_HIGHMEM [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, [END_OF_SPACE_NR] = { -1, NULL } }; @@ -541,8 +545,8 @@ static int __init pt_dump_init(void) address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; #endif - return 0; } __initcall(pt_dump_init); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d..135c9a7898c7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "mm_internal.h" @@ -766,6 +767,7 @@ void __init mem_init(void) mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif @@ -777,6 +779,10 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, + CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE, + CPU_ENTRY_AREA_MAP_SIZE >> 10, + #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ec70d780f1f..47388f0c0e59 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,7 @@ #include #include #include +#include extern struct range pfn_mapped[E820_MAX_ENTRIES]; @@ -322,31 +323,33 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - kasan_populate_zero_shadow( - kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); - - kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), - (unsigned long)kasan_mem_to_shadow(_end), - early_pfn_to_nid(__pa(_stext))); - - shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, PAGE_SIZE); - kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - shadow_cpu_entry_begin); + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + shadow_cpu_entry_begin); kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); + kasan_populate_zero_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + early_pfn_to_nid(__pa(_stext))); + + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6b9bf023a700..c3c5274410a9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c2454237fa67..a0e2b8c6e5c7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: - case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: # ifdef CONFIG_HIGHMEM @@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: /* All local page mappings */ pte = pfn_pte(phys, prot); break; -- cgit From 613e396bc0d4c7604fba23256644e78454c68cf6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 17 Dec 2017 10:56:29 +0100 Subject: init: Invoke init_espfix_bsp() from mm_init() init_espfix_bsp() needs to be invoked before the page table isolation initialization. Move it into mm_init() which is the place where pti_init() will be added. While at it get rid of the #ifdeffery and provide proper stub functions. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/espfix.h | 7 ++++--- arch/x86/kernel/smpboot.c | 6 +----- include/asm-generic/pgtable.h | 5 +++++ init/main.c | 6 ++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 0211029076ea..6777480d8a42 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ESPFIX_H #define _ASM_X86_ESPFIX_H -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_ESPFIX64 #include @@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); extern void init_espfix_ap(int cpu); - -#endif /* CONFIG_X86_64 */ +#else +static inline void init_espfix_ap(int cpu) { } +#endif #endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d56c1d209283..33d6000265aa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -990,12 +990,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, initial_code = (unsigned long)start_secondary; initial_stack = idle->thread.sp; - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 + /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); -#endif /* So we see what's up */ announce_cpu(cpu, apicid); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 757dc6ffc7ba..231b35a76dd9 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1017,6 +1017,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) struct file; int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); + +#ifndef CONFIG_X86_ESPFIX64 +static inline void init_espfix_bsp(void) { } +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/init/main.c b/init/main.c index 0ee9c6866ada..8a390f60ec81 100644 --- a/init/main.c +++ b/init/main.c @@ -504,6 +504,8 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); } asmlinkage __visible void __init start_kernel(void) @@ -673,10 +675,6 @@ asmlinkage __visible void __init start_kernel(void) #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); -#endif -#ifdef CONFIG_X86_ESPFIX64 - /* Should be run before the first non-init thread is created */ - init_espfix_bsp(); #endif thread_stack_cache_init(); cred_init(); -- cgit From b26a2319be3dd26edb3013504992a037a5902520 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 23 Dec 2017 08:54:28 +1000 Subject: drm/nouveau: fix race when adding delayed work items kernel.org bz#198221. Reported-by: Petr Vandrovec Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8d4a5be3b913..56fe261b6268 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, work->cli = cli; mutex_lock(&cli->lock); list_add_tail(&work->head, &cli->worker); - mutex_unlock(&cli->lock); if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) nouveau_cli_work_fence(fence, &work->cb); + mutex_unlock(&cli->lock); } static void -- cgit From b3b1b6532890c70987821946f90c22b8021aaaf8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Dec 2017 11:36:05 -0800 Subject: tools: bpftool: maps: close json array on error paths of show We can't return from the middle of do_show(), because json_array will not be closed. Break out of the loop. Note that the error handling after the loop depends on errno, so no need to set err. Fixes: 831a0aafe5c3 ("tools: bpftool: add JSON output for `bpftool map *` commands") Signed-off-by: Jakub Kicinski Acked-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e2450c8e88e6..8368b7ea31b5 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -523,21 +523,21 @@ static int do_show(int argc, char **argv) break; p_err("can't get next map: %s%s", strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); - return -1; + break; } fd = bpf_map_get_fd_by_id(id); if (fd < 0) { p_err("can't get map by id (%u): %s", id, strerror(errno)); - return -1; + break; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); - return -1; + break; } if (json_output) -- cgit From 8207c6dd4746c345b689684c4cd0ce00a18c7ef2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Dec 2017 11:36:06 -0800 Subject: tools: bpftool: protect against races with disappearing objects On program/map show we may get an ID of an object from GETNEXT, but the object may disappear before we call GET_FD_BY_ID. If that happens, ignore the object and continue. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Jakub Kicinski Acked-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 2 ++ tools/bpf/bpftool/prog.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8368b7ea31b5..a8c3a33dd185 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -528,6 +528,8 @@ static int do_show(int argc, char **argv) fd = bpf_map_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get map by id (%u): %s", id, strerror(errno)); break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ad619b96c276..dded77345bfb 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -382,6 +382,8 @@ static int do_show(int argc, char **argv) fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get prog by id (%u): %s", id, strerror(errno)); err = -1; -- cgit From f6c4fd506cb626e4346aa81688f255e593a7c5a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Dec 2017 19:45:11 +0100 Subject: x86/cpu_entry_area: Prevent wraparound in setup_cpu_entry_area_ptes() on 32bit The loop which populates the CPU entry area PMDs can wrap around on 32bit machines when the number of CPUs is small. It worked wonderful for NR_CPUS=64 for whatever reason and the moron who wrote that code did not bother to test it with !SMP. Check for the wraparound to fix it. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: kernel test robot Signed-off-by: Thomas "Feels stupid" Gleixner Tested-by: Borislav Petkov --- arch/x86/mm/cpu_entry_area.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 21e8b595cbb1..fe814fd5e014 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_ptes(void) start = CPU_ENTRY_AREA_BASE; end = start + CPU_ENTRY_AREA_MAP_SIZE; - for (; start < end; start += PMD_SIZE) + /* Careful here: start + PMD_SIZE might wrap around */ + for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) populate_extra_pte(start); #endif } -- cgit From 464e1d5f23cca236b930ef068c328a64cab78fb1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 23 Dec 2017 20:47:16 -0800 Subject: Linux 4.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7e02f951b284..ac8c441866b7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From 182088aa3c6c7f7c20a2c1dcc9ded4a3fc631f38 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Dec 2017 23:21:28 +0000 Subject: phylink: ensure the PHY interface mode is appropriately set When setting the ethtool settings, ensure that the validated PHY interface mode is propagated to the current link settings, so that 2500BaseX can be selected. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5dc9668dde34..8d06a083ac4c 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -951,6 +951,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, mutex_lock(&pl->state_mutex); /* Configure the MAC to match the new settings */ linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); + pl->link_config.interface = config.interface; pl->link_config.speed = our_kset.base.speed; pl->link_config.duplex = our_kset.base.duplex; pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; -- cgit From 74ee0e8c1bf9925c59cc8f1c65c29adf6e4cf603 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Dec 2017 23:21:34 +0000 Subject: phylink: ensure AN is enabled Ensure that we mark AN as enabled at boot time, rather than leaving it disabled. This is noticable if your SFP module is fiber, and it supports faster speeds than 1G with 2.5G support in place. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 8d06a083ac4c..827f3f92560e 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np, pl->link_config.pause = MLO_PAUSE_AN; pl->link_config.speed = SPEED_UNKNOWN; pl->link_config.duplex = DUPLEX_UNKNOWN; + pl->link_config.an_enabled = true; pl->ops = ops; __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); -- cgit From e5a9336adb317db55eb3fe8200856096f3c71109 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 20 Dec 2017 19:36:03 +0300 Subject: ip6_gre: fix device features for ioctl setup When ip6gre is created using ioctl, its features, such as scatter-gather, GSO and tx-checksumming will be turned off: # ip -f inet6 tunnel add gre6 mode ip6gre remote fd00::1 # ethtool -k gre6 (truncated output) tx-checksumming: off scatter-gather: off tcp-segmentation-offload: off generic-segmentation-offload: off [requested on] But when netlink is used, they will be enabled: # ip link add gre6 type ip6gre remote fd00::1 # ethtool -k gre6 (truncated output) tx-checksumming: on scatter-gather: on tcp-segmentation-offload: on generic-segmentation-offload: on This results in a loss of performance when gre6 is created via ioctl. The issue was found with LTP/gre tests. Fix it by moving the setup of device features to a separate function and invoke it with ndo_init callback because both netlink and ioctl will eventually call it via register_netdevice(): register_netdevice() - ndo_init() callback -> ip6gre_tunnel_init() or ip6gre_tap_init() - ip6gre_tunnel_init_common() - ip6gre_tnl_init_features() The moved code also contains two minor style fixes: * removed needless tab from GRE6_FEATURES on NETIF_F_HIGHDMA line. * fixed the issue reported by checkpatch: "Unnecessary parentheses around 'nt->encap.type == TUNNEL_ENCAP_NONE'" Fixes: ac4eb009e477 ("ip6gre: Add support for basic offloads offloads excluding GSO") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 57 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 416c8913f132..772695960890 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev) eth_random_addr(dev->perm_addr); } +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + +static void ip6gre_tnl_init_features(struct net_device *dev) +{ + struct ip6_tnl *nt = netdev_priv(dev); + + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + nt->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } +} + static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + ip6gre_tnl_init_features(dev); + return 0; } @@ -1298,11 +1330,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; -#define GRE6_FEATURES (NETIF_F_SG | \ - NETIF_F_FRAGLIST | \ - NETIF_F_HIGHDMA | \ - NETIF_F_HW_CSUM) - static void ip6gre_tap_setup(struct net_device *dev) { @@ -1383,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - dev->features |= GRE6_FEATURES; - dev->hw_features |= GRE6_FEATURES; - - if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(nt->parms.o_flags & TUNNEL_CSUM) || - (nt->encap.type == TUNNEL_ENCAP_NONE)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } - - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } - err = register_netdevice(dev); if (err) goto out; -- cgit From c1a8d0a3accf64a014d605e6806ce05d1c17adf1 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 20 Dec 2017 18:45:10 -0600 Subject: net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround Under some circumstances driver will perform PHY reset in ksz9031_read_status() to fix autoneg failure case (idle error count = 0xFF). When this happens ksz9031 will not detect link status change any more when connecting to Netgear 1G switch (link can be recovered sometimes by restarting netdevice "ifconfig down up"). Reproduced with TI am572x board equipped with ksz9031 PHY while connecting to Netgear 1G switch. Fix the issue by reconfiguring autonegotiation after PHY reset in ksz9031_read_status(). Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ab4614113403..422ff6333c52 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev) phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; -- cgit From b2fb01f426883a794ed80be9110675a2d8356347 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 20 Dec 2017 23:26:24 -0800 Subject: net_sched: fix a missing rcu barrier in mini_qdisc_pair_swap() The rcu_barrier_bh() in mini_qdisc_pair_swap() is to wait for flying RCU callback installed by a previous mini_qdisc_pair_swap(), however we miss it on the tp_head==NULL path, which leads to that the RCU callback still uses miniq_old->rcu after it is freed together with qdisc in qdisc_graft(). So just add it on that path too. Fixes: 46209401f8f6 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath ") Reported-by: Jakub Kicinski Tested-by: Jakub Kicinski Cc: Jiri Pirko Cc: John Fastabend Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cd1b200acae7..661c7144b53a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + /* Wait for flying RCU callback before it is freed. */ + rcu_barrier_bh(); return; } @@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, rcu_assign_pointer(*miniqp->p_miniq, miniq); if (miniq_old) - /* This is counterpart of the rcu barrier above. We need to + /* This is counterpart of the rcu barriers above. We need to * block potential new user of miniq_old until all readers * are not seeing it. */ -- cgit From 0a3d805c9c503e05d6e5d3868c53e92a06589dcf Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 21 Dec 2017 13:07:11 +0100 Subject: tipc: base group replicast ack counter on number of actual receivers In commit 2f487712b893 ("tipc: guarantee that group broadcast doesn't bypass group unicast") we introduced a mechanism that requires the first (replicated) broadcast sent after a unicast to be acknowledged by all receivers before permitting sending of the next (true) broadcast. The counter for keeping track of the number of acknowledges to expect is based on the tipc_group::member_cnt variable. But this misses that some of the known members may not be ready for reception, and will never acknowledge the message, either because they haven't fully joined the group or because they are leaving the group. Such members are identified by not fulfilling the condition tested for in the function tipc_group_is_enabled(). We now set the counter for the actual number of acks to receive at the moment the message is sent, by just counting the number of recipients satisfying the tipc_group_is_enabled() test. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 7ebbdeb2a90e..e5b03f08f076 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -368,18 +368,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) u16 prev = grp->bc_snd_nxt - 1; struct tipc_member *m; struct rb_node *n; + u16 ackers = 0; for (n = rb_first(&grp->members); n; n = rb_next(n)) { m = container_of(n, struct tipc_member, tree_node); if (tipc_group_is_enabled(m)) { tipc_group_update_member(m, len); m->bc_acked = prev; + ackers++; } } /* Mark number of acknowledges to expect, if any */ if (ack) - grp->bc_ackers = grp->member_cnt; + grp->bc_ackers = ackers; grp->bc_snd_nxt++; } -- cgit From 4853f128c13ed2731625dff2410b7fdbe540fb26 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 21 Dec 2017 13:13:59 +0100 Subject: net: sched: fix possible null pointer deref in tcf_block_put We need to check block for being null in both tcf_block_put and tcf_block_put_ext. Fixes: 343723dd51ef ("net: sched: fix clsact init error path") Reported-by: Prashant Bhole Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b91ea03e3afa..b9d63d2246e6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -379,6 +379,8 @@ void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; + if (!block) + return; tcf_block_put_ext(block, block->q, &ei); } -- cgit From 3a33a19bf88cdfc6d982972bc6ffcf7a62c1015e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 21 Dec 2017 14:36:34 +0100 Subject: tipc: fix memory leak of group member when peer node is lost When a group member receives a member WITHDRAW event, this might have two reasons: either the peer member is leaving the group, or the link to the member's node has been lost. In the latter case we need to issue a DOWN event to the user right away, and let function tipc_group_filter_msg() perform delete of the member item. However, in this case we miss to change the state of the member item to MBR_LEAVING, so the member item is not deleted, and we have a memory leak. We now separate better between the four sub-cases of a WITHRAW event and make sure that each case is handled correctly. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index e5b03f08f076..8e12ab55346b 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -850,17 +850,26 @@ void tipc_group_member_evt(struct tipc_group *grp, *usr_wakeup = true; m->usr_pending = false; node_up = tipc_node_is_up(net, node); - - /* Hold back event if more messages might be expected */ - if (m->state != MBR_LEAVING && node_up) { - m->event_msg = skb; - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - } else { - if (node_up) + m->event_msg = NULL; + + if (node_up) { + /* Hold back event if a LEAVE msg should be expected */ + if (m->state != MBR_LEAVING) { + m->event_msg = skb; + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + } else { msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - else + __skb_queue_tail(inputq, skb); + } + } else { + if (m->state != MBR_LEAVING) { + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); + } else { + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + } __skb_queue_tail(inputq, skb); } list_del_init(&m->list); -- cgit From 47c332deb8e89f6c59b0bb2615945c6e7fad1a60 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 Dec 2017 09:36:14 +0100 Subject: hwmon: Deal with errors from the thermal subsystem If the thermal subsystem returne -EPROBE_DEFER or any other error when hwmon calls devm_thermal_zone_of_sensor_register(), this is silently ignored. I ran into this with an incorrectly defined thermal zone, making it non-existing and thus this call failed with -EPROBE_DEFER assuming it would appear later. The sensor was still added which is incorrect: sensors must strictly be added after the thermal zones, so deferred probe must be respected. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Signed-off-by: Linus Walleij Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c9790e2c3440..af5123042990 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, struct hwmon_device *hwdev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) @@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, tdata->hwdev = hwdev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } @@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); + if (err) + goto free_device; + } } } } return hdev; +free_device: + device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: -- cgit From 6a6b0b9914e73a8a54253dd5f6f5e5dd5e4a756c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 21 Dec 2017 10:29:09 -0800 Subject: tcp: Avoid preprocessor directives in tracepoint macro args Using a preprocessor directive to check for CONFIG_IPV6 in the middle of a DECLARE_EVENT_CLASS macro's arg list causes sparse to report a series of errors: ./include/trace/events/tcp.h:68:1: error: directive in argument list ./include/trace/events/tcp.h:75:1: error: directive in argument list ./include/trace/events/tcp.h:144:1: error: directive in argument list ./include/trace/events/tcp.h:151:1: error: directive in argument list ./include/trace/events/tcp.h:216:1: error: directive in argument list ./include/trace/events/tcp.h:223:1: error: directive in argument list ./include/trace/events/tcp.h:274:1: error: directive in argument list ./include/trace/events/tcp.h:281:1: error: directive in argument list Once sparse finds an error, it stops printing warnings for the file it is checking. This masks any sparse warnings that would normally be reported for the core TCP code. Instead, handle the preprocessor conditionals in a couple of auxiliary macros. This also has the benefit of reducing duplicate code. Cc: David Ahern Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 97 ++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 60 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 07cccca6cbf1..ab34c561f26b 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -25,6 +25,35 @@ tcp_state_name(TCP_CLOSING), \ tcp_state_name(TCP_NEW_SYN_RECV)) +#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ + do { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + ipv6_addr_set_v4mapped(saddr, pin6); \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + ipv6_addr_set_v4mapped(daddr, pin6); \ + } while (0) + +#if IS_ENABLED(CONFIG_IPV6) +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + do { \ + if (sk->sk_family == AF_INET6) { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + *pin6 = saddr6; \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + *pin6 = daddr6; \ + } else { \ + TP_STORE_V4MAPPED(__entry, saddr, daddr); \ + } \ + } while (0) +#else +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + TP_STORE_V4MAPPED(__entry, saddr, daddr) +#endif + /* * tcp event with arguments sk and skb * @@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skbaddr = skb; @@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", @@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack, TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack, p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = ireq->ir_v6_loc_addr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = ireq->ir_v6_rmt_addr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); - } + TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, + ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", -- cgit From 756efe131088b6e6e7f0124ff9c4e1f0165d3140 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 22 Dec 2017 17:46:04 +0800 Subject: clk: use atomic runtime pm api in clk_core_is_enabled Current clk_pm_runtime_put is using pm_runtime_put_sync which is not safe to be called in clk_core_is_enabled as it should be able to run in atomic context. Thus use pm_runtime_put instead which is atomic safe. Cc: Stephen Boyd Cc: Michael Turquette Cc: Ulf Hansson Cc: Marek Szyprowski Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Dong Aisheng Reviewed-by: Ulf Hansson Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8a1860a36c77..b56c11f51baf 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core) ret = core->ops->is_enabled(core->hw); done: - clk_pm_runtime_put(core); + if (core->dev) + pm_runtime_put(core->dev); return ret; } -- cgit From 44be77c590f381bc629815ac789b8b15ecc4ddcf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Dec 2017 08:53:59 +0100 Subject: ALSA: hda - Fix missing COEF init for ALC225/295/299 There was a long-standing problem on HP Spectre X360 with Kabylake where it lacks of the front speaker output in some situations. Also there are other products showing the similar behavior. The culprit seems to be the missing COEF setup on ALC codecs, ALC225/295/299, which are all compatible. This patch adds the proper COEF setup (to initialize idx 0x67 / bits 0x3000) for addressing the issue. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195457 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1522ba31e16d..8fd2d9c62c96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0292: alc_update_coef_idx(codec, 0x4, 1<<15, 0); break; - case 0x10ec0215: case 0x10ec0225: + case 0x10ec0295: + case 0x10ec0299: + alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); + /* fallthrough */ + case 0x10ec0215: case 0x10ec0233: case 0x10ec0236: case 0x10ec0255: @@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0286: case 0x10ec0288: case 0x10ec0285: - case 0x10ec0295: case 0x10ec0298: case 0x10ec0289: - case 0x10ec0299: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: -- cgit From 14e138a86f6347c6199f610576d2e11c03bec5f0 Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Thu, 21 Dec 2017 20:17:04 -0800 Subject: RDS: Check cmsg_len before dereferencing CMSG_DATA RDS currently doesn't check if the length of the control message is large enough to hold the required data, before dereferencing the control message data. This results in following crash: BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013 [inline] BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157 CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 rds_rdma_bytes net/rds/send.c:1013 [inline] rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2018 __sys_sendmmsg+0x1ee/0x620 net/socket.c:2108 SYSC_sendmmsg net/socket.c:2139 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2134 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x43fe49 RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49 RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0 R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000 To fix this, we verify that the cmsg_len is large enough to hold the data to be read, before proceeding further. Reported-by: syzbot Signed-off-by: Avinash Repaka Acked-by: Santosh Shilimkar Reviewed-by: Yuval Shaia Signed-off-by: David S. Miller --- net/rds/send.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index b52cdc8ae428..f72466c63f0c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } -- cgit From 19142551b2be4a9e13838099fde1351386e5e007 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 22 Dec 2017 09:35:16 +0200 Subject: tipc: error path leak fixes in tipc_enable_bearer() Fix memory leak in tipc_enable_bearer() if enable_media() fails, and cleanup with bearer_disable() if tipc_mon_create() fails. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tommi Rantala Signed-off-by: David S. Miller --- net/tipc/bearer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..c8001471da6c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -324,6 +324,7 @@ restart: if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); + kfree(b); return -EINVAL; } @@ -347,8 +348,10 @@ restart: if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - if (tipc_mon_create(net, bearer_id)) + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); return -ENOMEM; + } pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, -- cgit From 642a8439ddd8423b92f2e71960afe21ee1f66bb6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 22 Dec 2017 09:35:17 +0200 Subject: tipc: fix tipc_mon_delete() oops in tipc_enable_bearer() error path Calling tipc_mon_delete() before the monitor has been created will oops. This can happen in tipc_enable_bearer() error path if tipc_disc_create() fails. [ 48.589074] BUG: unable to handle kernel paging request at 0000000000001008 [ 48.590266] IP: tipc_mon_delete+0xea/0x270 [tipc] [ 48.591223] PGD 1e60c5067 P4D 1e60c5067 PUD 1eb0cf067 PMD 0 [ 48.592230] Oops: 0000 [#1] SMP KASAN [ 48.595610] CPU: 5 PID: 1199 Comm: tipc Tainted: G B 4.15.0-rc4-pc64-dirty #5 [ 48.597176] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 48.598489] RIP: 0010:tipc_mon_delete+0xea/0x270 [tipc] [ 48.599347] RSP: 0018:ffff8801d827f668 EFLAGS: 00010282 [ 48.600705] RAX: ffff8801ee813f00 RBX: 0000000000000204 RCX: 0000000000000000 [ 48.602183] RDX: 1ffffffff1de6a75 RSI: 0000000000000297 RDI: 0000000000000297 [ 48.604373] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff1dd1533 [ 48.605607] R10: ffffffff8eafbb05 R11: fffffbfff1dd1534 R12: 0000000000000050 [ 48.607082] R13: dead000000000200 R14: ffffffff8e73f310 R15: 0000000000001020 [ 48.608228] FS: 00007fc686484800(0000) GS:ffff8801f5540000(0000) knlGS:0000000000000000 [ 48.610189] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 48.611459] CR2: 0000000000001008 CR3: 00000001dda70002 CR4: 00000000003606e0 [ 48.612759] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 48.613831] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 48.615038] Call Trace: [ 48.615635] tipc_enable_bearer+0x415/0x5e0 [tipc] [ 48.620623] tipc_nl_bearer_enable+0x1ab/0x200 [tipc] [ 48.625118] genl_family_rcv_msg+0x36b/0x570 [ 48.631233] genl_rcv_msg+0x5a/0xa0 [ 48.631867] netlink_rcv_skb+0x1cc/0x220 [ 48.636373] genl_rcv+0x24/0x40 [ 48.637306] netlink_unicast+0x29c/0x350 [ 48.639664] netlink_sendmsg+0x439/0x590 [ 48.642014] SYSC_sendto+0x199/0x250 [ 48.649912] do_syscall_64+0xfd/0x2c0 [ 48.650651] entry_SYSCALL64_slow_path+0x25/0x25 [ 48.651843] RIP: 0033:0x7fc6859848e3 [ 48.652539] RSP: 002b:00007ffd25dff938 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 48.654003] RAX: ffffffffffffffda RBX: 00007ffd25dff990 RCX: 00007fc6859848e3 [ 48.655303] RDX: 0000000000000054 RSI: 00007ffd25dff990 RDI: 0000000000000003 [ 48.656512] RBP: 00007ffd25dff980 R08: 00007fc685c35fc0 R09: 000000000000000c [ 48.657697] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000d13010 [ 48.658840] R13: 00007ffd25e009c0 R14: 0000000000000000 R15: 0000000000000000 [ 48.662972] RIP: tipc_mon_delete+0xea/0x270 [tipc] RSP: ffff8801d827f668 [ 48.664073] CR2: 0000000000001008 [ 48.664576] ---[ end trace e811818d54d5ce88 ]--- Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tommi Rantala Signed-off-by: David S. Miller --- net/tipc/monitor.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 8e884ed06d4b..32dc33a94bc7 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *tmp; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { -- cgit From 178e5f57a8d8f8fc5799a624b96fc31ef9a29ffa Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Fri, 22 Dec 2017 17:12:09 +0800 Subject: net: fec: unmap the xmit buffer that are not transferred by DMA The enet IP only support 32 bit, it will use swiotlb buffer to do dma mapping when xmit buffer DMA memory address is bigger than 4G in i.MX platform. After stress suspend/resume test, it will print out: log: [12826.352864] fec 5b040000.ethernet: swiotlb buffer is full (sz: 191 bytes) [12826.359676] DMA: Out of SW-IOMMU space for 191 bytes at device 5b040000.ethernet [12826.367110] fec 5b040000.ethernet eth0: Tx DMA memory map failed The issue is that the ready xmit buffers that are dma mapped but DMA still don't copy them into fifo, once MAC restart, these DMA buffers are not unmapped. So it should check the dma mapping buffer and unmap them. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 610573855213..8184d2fca9be 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; -- cgit From 5a8bae9761dc5dd409ff5c3a529b2801bd0dac3a Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:27 +0530 Subject: tg3: Update copyright Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 6 ++++-- drivers/net/ethernet/broadcom/tg3.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d09c5a9c53b5..5fe8d9b05f31 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4,11 +4,13 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2005-2014 Broadcom Corporation. + * Copyright (C) 2005-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. * * Firmware is: * Derived from proprietary unpublished source code, - * Copyright (C) 2000-2003 Broadcom Corporation. + * Copyright (C) 2000-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Ltd. * * Permission is hereby granted for the distribution of this firmware * data in hexadecimal or equivalent format, provided this copyright diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index c2d02d02d1e6..3d60fc7a2da6 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -5,7 +5,8 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2007-2014 Broadcom Corporation. + * Copyright (C) 2007-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. */ #ifndef _T3_H -- cgit From 4419bb1cedcda0272e1dc410345c5a1d1da0e367 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:28 +0530 Subject: tg3: Add workaround to restrict 5762 MRRS to 2048 One of AMD based server with 5762 hangs with jumbo frame traffic. This AMD platform has southbridge limitation which is restricting MRRS to 4000. As a work around, driver to restricts the MRRS to 2048 for this particular 5762 NX1 card. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 10 ++++++++++ drivers/net/ethernet/broadcom/tg3.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5fe8d9b05f31..a0caa71a8c3b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10054,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) tw32(GRC_MODE, tp->grc_mode | val); + /* On one of the AMD platform, MRRS is restricted to 4000 because of + * south bridge limitation. As a workaround, Driver is setting MRRS + * to 2048 instead of default 4096. + */ + if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL && + tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) { + val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK; + tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048); + } + /* Setup the timer prescalar register. Clock is always 66Mhz. */ val = tr32(GRC_MISC_CFG); val &= ~0xff; diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 3d60fc7a2da6..1f0271fa7c74 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -97,6 +97,7 @@ #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a +#define TG3PCI_SUBDEVICE_ID_DELL_5762 0x07f0 #define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a @@ -282,6 +283,9 @@ #define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */ #define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */ /* 0xa8 --> 0xb8 unused */ +#define TG3PCI_DEV_STATUS_CTRL 0x000000b4 +#define MAX_READ_REQ_SIZE_2048 0x00004000 +#define MAX_READ_REQ_MASK 0x00007000 #define TG3PCI_DUAL_MAC_CTRL 0x000000b8 #define DUAL_MAC_CTRL_CH_MASK 0x00000003 #define DUAL_MAC_CTRL_ID 0x00000004 -- cgit From e60ee41aaf898584205a6af5c996860d0fe6a836 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:29 +0530 Subject: tg3: Enable PHY reset in MTU change path for 5720 A customer noticed RX path hang when MTU is changed on the fly while running heavy traffic with NCSI enabled for 5717 and 5719. Since 5720 belongs to same ASIC family, we observed same issue and same fix could solve this problem for 5720. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a0caa71a8c3b..8995cfefbfcf 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14239,7 +14239,8 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) */ if (tg3_asic_rev(tp) == ASIC_REV_57766 || tg3_asic_rev(tp) == ASIC_REV_5717 || - tg3_asic_rev(tp) == ASIC_REV_5719) + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); -- cgit From f7084059a9cb9e56a186e1677b1dcffd76c2cd24 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 22 Dec 2017 13:01:39 -0200 Subject: bnx2x: Improve reliability in case of nested PCI errors While in recovery process of PCI error (called EEH on PowerPC arch), another PCI transaction could be corrupted causing a situation of nested PCI errors. Also, this scenario could be reproduced with error injection mechanisms (for debug purposes). We observe that in case of nested PCI errors, bnx2x might attempt to initialize its shmem and cause a kernel crash due to bad addresses read from MCP. Multiple different stack traces were observed depending on the point the second PCI error happens. This patch avoids the crashes by: * failing PCI recovery in case of nested errors (since multiple PCI errors in a row are not expected to lead to a functional adapter anyway), and by, * preventing access to adapter FW when MCP is failed (we mark it as failed when shmem cannot get initialized properly). Reported-by: Abdul Haleem Signed-off-by: Guilherme G. Piccoli Acked-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4c739d5355d2..8ae269ec17a1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) del_timer_sync(&bp->timer); - if (IS_PF(bp)) { + if (IS_PF(bp) && !BP_NOMCP(bp)) { /* Set ALWAYS_ALIVE bit in shmem */ bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bnx2x_drv_pulse(bp); @@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bp->cnic_loaded = false; /* Clear driver version indication in shmem */ - if (IS_PF(bp)) + if (IS_PF(bp) && !BP_NOMCP(bp)) bnx2x_update_mng_version(bp); /* Check if there are pending parity attentions. If there are - set diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 91e2a7560b48..ddd5d3ebd201 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp) do { bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR); + + /* If we read all 0xFFs, means we are in PCI error state and + * should bail out to avoid crashes on adapter's FW reads. + */ + if (bp->common.shmem_base == 0xFFFFFFFF) { + bp->flags |= NO_MCP_FLAG; + return -ENODEV; + } + if (bp->common.shmem_base) { val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]); if (val & SHR_MEM_VALIDITY_MB) @@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) BNX2X_ERR("IO slot reset --> driver unload\n"); /* MCP should have been reset; Need to wait for validity */ - bnx2x_init_shmem(bp); + if (bnx2x_init_shmem(bp)) { + rtnl_unlock(); + return PCI_ERS_RESULT_DISCONNECT; + } if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { u32 v; -- cgit From 8cb38a602478e9f806571f6920b0a3298aabf042 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 22 Dec 2017 10:15:20 -0800 Subject: sctp: Replace use of sockets_allocated with specified macro. The patch(180d8cd942ce) replaces all uses of struct sock fields' memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem to accessor macros. But the sockets_allocated field of sctp sock is not replaced at all. Then replace it now for unifying the code. Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Cc: Glauber Costa Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3253f724a995..b4fb6e4886d2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4498,7 +4498,7 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4542,7 +4542,7 @@ static void sctp_destroy_sock(struct sock *sk) } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } -- cgit From 45d8b80c2ac5d21cd1e2954431fb676bc2b1e099 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:32:35 -0500 Subject: ring-buffer: Mask out the info bits when returning buffer page length Two info bits were added to the "commit" part of the ring buffer data page when returned to be consumed. This was to inform the user space readers that events have been missed, and that the count may be stored at the end of the page. What wasn't handled, was the splice code that actually called a function to return the length of the data in order to zero out the rest of the page before sending it up to user space. These data bits were returned with the length making the value negative, and that negative value was not checked. It was compared to PAGE_SIZE, and only used if the size was less than PAGE_SIZE. Luckily PAGE_SIZE is unsigned long which made the compare an unsigned compare, meaning the negative size value did not end up causing a large portion of memory to be randomly zeroed out. Cc: stable@vger.kernel.org Fixes: 66a8cb95ed040 ("ring-buffer: Add place holder recording of dropped events") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c87766c1c204..e06cde093f76 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } -- cgit From 6b7e633fe9c24682df550e5311f47fb524701586 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:38:57 -0500 Subject: tracing: Remove extra zeroing out of the ring buffer page The ring_buffer_read_page() takes care of zeroing out any extra data in the page that it returns. There's no need to zero it out again from the consumer. It was removed from one consumer of this function, but read_buffers_splice_read() did not remove it, and worse, it contained a nasty bug because of it. Cc: stable@vger.kernel.org Fixes: 2711ca237a084 ("ring-buffer: Move zeroing out excess in page to ring buffer code") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 59518b8126d0..73652d5318b2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; -- cgit From ae415fa4c5248a8cf4faabd5a3c20576cb1ad607 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 21:19:29 -0500 Subject: ring-buffer: Do no reuse reader page if still in use To free the reader page that is allocated with ring_buffer_alloc_read_page(), ring_buffer_free_read_page() must be called. For faster performance, this page can be reused by the ring buffer to avoid having to free and allocate new pages. The issue arises when the page is used with a splice pipe into the networking code. The networking code may up the page counter for the page, and keep it active while sending it is queued to go to the network. The incrementing of the page ref does not prevent it from being reused in the ring buffer, and this can cause the page that is being sent out to the network to be modified before it is sent by reading new data. Add a check to the page ref counter, and only reuse the page if it is not being used anywhere else. Cc: stable@vger.kernel.org Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e06cde093f76..9ab18995ff1e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4404,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; + struct page *page = virt_to_page(bpage); unsigned long flags; + /* If the page is still in use someplace else, we can't reuse it */ + if (page_ref_count(page) > 1) + goto out; + local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); @@ -4417,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + out: free_page((unsigned long)bpage); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); -- cgit From 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Tue, 26 Dec 2017 15:12:53 +0800 Subject: tracing: Fix crash when it fails to alloc ring buffer Double free of the ring buffer happens when it fails to alloc new ring buffer instance for max_buffer if TRACER_MAX_TRACE is configured. The root cause is that the pointer is not set to NULL after the buffer is freed in allocate_trace_buffers(), and the freeing of the ring buffer is invoked again later if the pointer is not equal to Null, as: instance_mkdir() |-allocate_trace_buffers() |-allocate_trace_buffer(tr, &tr->trace_buffer...) |-allocate_trace_buffer(tr, &tr->max_buffer...) // allocate fail(-ENOMEM),first free // and the buffer pointer is not set to null |-ring_buffer_free(tr->trace_buffer.buffer) // out_free_tr |-free_trace_buffers() |-free_trace_buffer(&tr->trace_buffer); //if trace_buffer is not null, free again |-ring_buffer_free(buf->buffer) |-rb_free_cpu_buffer(buffer->buffers[cpu]) // ring_buffer_per_cpu is null, and // crash in ring_buffer_per_cpu->pages Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Signed-off-by: Jing Xia Signed-off-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73652d5318b2..0e53d46544b8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7603,7 +7603,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; -- cgit From 4397f04575c44e1440ec2e49b6302785c95fd2f8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 26 Dec 2017 20:07:34 -0500 Subject: tracing: Fix possible double free on failure of allocating trace buffer Jing Xia and Chunyan Zhang reported that on failing to allocate part of the tracing buffer, memory is freed, but the pointers that point to them are not initialized back to NULL, and later paths may try to free the freed memory again. Jing and Chunyan fixed one of the locations that does this, but missed a spot. Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: Jing Xia Reported-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e53d46544b8..2a8d8a294345 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7580,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } -- cgit From 2b83ff96f51d0b039c4561b9f95c824d7bddb85c Mon Sep 17 00:00:00 2001 From: Matthieu CASTET Date: Tue, 12 Dec 2017 11:10:44 +0100 Subject: led: core: Fix brightness setting when setting delay_off=0 With the current code, the following sequence won't work : echo timer > trigger echo 0 > delay_off * at this point we call ** led_delay_off_store ** led_blink_set *** stop timer ** led_blink_setup ** led_set_software_blink *** if !delay_on, led off *** if !delay_off, set led_set_brightness_nosleep <--- LED_BLINK_SW is set but timer is stop *** otherwise start timer/set LED_BLINK_SW flag echo xxx > brightness * led_set_brightness ** if LED_BLINK_SW *** if brightness=0, led off *** else apply brightness if next timer <--- timer is stop, and will never apply new setting ** otherwise set led_set_brightness_nosleep To fix that, when we delete the timer, we should clear LED_BLINK_SW. Cc: linux-leds@vger.kernel.org Signed-off-by: Matthieu CASTET Signed-off-by: Jacek Anaszewski --- drivers/leds/led-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index fd83c7f77a95..f3654fd2eaf3 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,7 +186,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - del_timer_sync(&led_cdev->blink_timer); + led_stop_software_blink(led_cdev); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); -- cgit From ad9a3668a434faca1339789ed2f043d679199309 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 24 Dec 2017 13:54:56 +0200 Subject: IB/mlx5: Serialize access to the VMA list User-space applications can do mmap and munmap directly at any time. Since the VMA list is not protected with a mutex, concurrent accesses to the VMA list from the mmap and munmap can cause data corruption. Add a mutex around the list. Cc: # v4.7 Fixes: 7c2344c3bbf9 ("IB/mlx5: Implements disassociate_ucontext API") Reviewed-by: Yishai Hadas Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 8 ++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4ef4d9b6ce5..8ac50de2b242 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, } INIT_LIST_HEAD(&context->vma_private_list); + mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area) * mlx5_ib_disassociate_ucontext(). */ mlx5_ib_vma_priv_data->vma = NULL; + mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); list_del(&mlx5_ib_vma_priv_data->list); + mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); kfree(mlx5_ib_vma_priv_data); } @@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, return -ENOMEM; vma_prv->vma = vma; + vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; vma->vm_private_data = vma_prv; vma->vm_ops = &mlx5_ib_vm_ops; + mutex_lock(&ctx->vma_private_list_mutex); list_add(&vma_prv->list, vma_head); + mutex_unlock(&ctx->vma_private_list_mutex); return 0; } @@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * mlx5_ib_vma_close. */ down_write(&owning_mm->mmap_sem); + mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; @@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) list_del(&vma_private->list); kfree(vma_private); } + mutex_unlock(&context->vma_private_list_mutex); up_write(&owning_mm->mmap_sem); mmput(owning_mm); put_task_struct(owning_process); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6dd8cac78de2..2c5f3533bbc9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -115,6 +115,8 @@ enum { struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; + /* protect vma_private_list add/del */ + struct mutex *vma_private_list_mutex; }; struct mlx5_ib_ucontext { @@ -129,6 +131,8 @@ struct mlx5_ib_ucontext { /* Transport Domain number */ u32 tdn; struct list_head vma_private_list; + /* protect vma_private_list add/del */ + struct mutex vma_private_list_mutex; unsigned long upd_xlt_page; /* protect ODP/KSM */ -- cgit From 05d14e7b0c138cb07ba30e464f47b39434f3fdef Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 24 Dec 2017 13:54:57 +0200 Subject: IB/uverbs: Fix command checking as part of ib_uverbs_ex_modify_qp() If the input command length is larger than the kernel supports an error should be returned in case the unsupported bytes are not cleared, instead of the other way aroudn. This matches what all other callers of ib_is_udata_cleared do and will avoid user ABI problems in the future. Cc: # v4.10 Fixes: 189aba99e700 ("IB/uverbs: Extend modify_qp and support packet pacing") Reviewed-by: Yishai Hadas Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d0202bb176a4..840b24096690 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, return -EOPNOTSUPP; if (ucore->inlen > sizeof(cmd)) { - if (ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) + if (!ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; } -- cgit From 4a50881bbac309e6f0684816a180bc3c14e1485d Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 24 Dec 2017 13:54:58 +0200 Subject: IB/core: Verify that QP is security enabled in create and destroy The XRC target QP create flow sets up qp_sec only if there is an IB link with LSM security enabled. However, several other related uAPI entry points blindly follow the qp_sec NULL pointer, resulting in a possible oops. Check for NULL before using qp_sec. Cc: # v4.12 Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Reviewed-by: Daniel Jurgens Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/security.c | 3 +++ drivers/infiniband/core/verbs.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index feafdb961c48..59b2f96d986a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev) if (ret) return ret; + if (!qp->qp_sec) + return 0; + mutex_lock(&real_qp->qp_sec->mutex); ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys, qp->qp_sec); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3fb8fb6cc824..e36d27ed4daa 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp) spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); atomic_dec(&real_qp->usecnt); - ib_close_shared_qp_security(qp->qp_sec); + if (qp->qp_sec) + ib_close_shared_qp_security(qp->qp_sec); kfree(qp); return 0; -- cgit From 45e6ae7ef21b907dacb18da62d5787d74a31d860 Mon Sep 17 00:00:00 2001 From: Nitzan Carmi Date: Tue, 26 Dec 2017 11:20:20 +0200 Subject: IB/mlx5: Fix mlx5_ib_alloc_mr error flow ibmr.device is being set only after ib_alloc_mr() is (successfully) complete. Therefore, in case mlx5_core_create_mkey() return with error, the error flow calls mlx5_free_priv_descs() which uses ibmr.device (which doesn't exist yet), causing a NULL dereference oops. To fix this, the IB device should be set in the mr struct earlier stage (e.g. prior to calling mlx5_core_create_mkey()). Fixes: 8a187ee52b04 ("IB/mlx5: Support the new memory registration API") Signed-off-by: Max Gurtovoy Signed-off-by: Nitzan Carmi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ee0ee1f9994b..d109fe8290a7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, MLX5_SET(mkc, mkc, access_mode, mr->access_mode); MLX5_SET(mkc, mkc, umr_en, 1); + mr->ibmr.device = pd->device; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; -- cgit From 517d7c79bdb39864e617960504bdc1aa560c75c6 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Thu, 28 Dec 2017 12:03:06 +0100 Subject: tipc: fix hanging poll() for stream sockets In commit 42b531de17d2f6 ("tipc: Fix missing connection request handling"), we replaced unconditional wakeup() with condtional wakeup for clients with flags POLLIN | POLLRDNORM | POLLRDBAND. This breaks the applications which do a connect followed by poll with POLLOUT flag. These applications are not woken when the connection is ESTABLISHED and hence sleep forever. In this commit, we fix it by including the POLLOUT event for sockets in TIPC_CONNECTING state. Fixes: 42b531de17d2f6 ("tipc: Fix missing connection request handling") Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41127d0b925e..3b4084480377 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; -- cgit From f72c4ac695573699dde5b71da1c3b9ef80440616 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 28 Dec 2017 12:38:13 -0500 Subject: skbuff: in skb_copy_ubufs unclone before releasing zerocopy skb_copy_ubufs must unclone before it is safe to modify its skb_shared_info with skb_zcopy_clear. Commit b90ddd568792 ("skbuff: skb_copy_ubufs must release uarg even without user frags") ensures that all skbs release their zerocopy state, even those without frags. But I forgot an edge case where such an skb arrives that is cloned. The stack does not build such packets. Vhost/tun skbs have their frags orphaned before cloning. TCP skbs only attach zerocopy state when a frag is added. But if TCP packets can be trimmed or linearized, this might occur. Tracing the code I found no instance so far (e.g., skb_linearize ends up calling skb_zcopy_clear if !skb->data_len). Still, it is non-obvious that no path exists. And it is fragile to rely on this. Fixes: b90ddd568792 ("skbuff: skb_copy_ubufs must release uarg even without user frags") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3cb0be4c6f3..08f574081315 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) int i, new_frags; u32 d_off; - if (!num_frags) - goto release; - if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; + if (!num_frags) + goto release; + new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); -- cgit From 602f7a2714a3b3aa4bec82ab0a86a9f5a2c4aa61 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 28 Dec 2017 11:00:43 -0800 Subject: sock: Add sock_owned_by_user_nocheck This allows checking socket lock ownership with producing lockdep warnings. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 9155da422692..7a7b14e9628a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk) return sk->sk_lock.owned; } +static inline bool sock_owned_by_user_nocheck(const struct sock *sk) +{ + return sk->sk_lock.owned; +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { -- cgit From d66fa9ec53c43bba9fa973c16419f6061b7cc3ea Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 28 Dec 2017 11:00:44 -0800 Subject: strparser: Call sock_owned_by_user_nocheck strparser wants to check socket ownership without producing any warnings. As indicated by the comment in the code, it is permissible for owned_by_user to return true. Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages") Reported-by: syzbot Reported-and-tested-by: Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index c5fda15ba319..1fdab5c4eda8 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp) * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ - if (sock_owned_by_user(strp->sk)) { + if (sock_owned_by_user_nocheck(strp->sk)) { queue_work(strp_wq, &strp->work); return; } -- cgit