From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable@vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli Cc: Shubhrajyoti Datta Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro@kernel.org> Signed-off-by: Mark Brown --- drivers/spi/spi-zynqmp-gqspi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match); -- cgit From e1cd4004cde7c9b694bbdd8def0e02288ee58c74 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Sep 2023 18:04:00 +0200 Subject: HID: sony: Fix a potential memory leak in sony_probe() If an error occurs after a successful usb_alloc_urb() call, usb_free_urb() should be called. Fixes: fb1a79a6b6e1 ("HID: sony: fix freeze when inserting ghlive ps3/wii dongles") Signed-off-by: Christophe JAILLET Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index dd942061fd77..a02046a78b2d 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2155,6 +2155,9 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) return ret; err: + if (sc->ghl_urb) + usb_free_urb(sc->ghl_urb); + hid_hw_stop(hdev); return ret; } -- cgit From 195273147e520844c1aae9fbf85cb6eb0bc0fdd7 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Aug 2023 15:16:11 +0200 Subject: wifi: mt76: fix lock dependency problem for wed_lock Fix the following kernel depency lock holding wed_lock with BH disabled. [ 40.579696] mt798x-wmac 18000000.wifi: attaching wed device 0 version 2 [ 40.604648] platform 15010000.wed: MTK WED WO Firmware Version: DEV_000000, Build Time: 20221208202138 [ 40.613972] platform 15010000.wed: MTK WED WO Chip ID 00 Region 3 [ 40.943617] [ 40.945118] ======================================================== [ 40.951457] WARNING: possible irq lock inversion dependency detected [ 40.957797] 5.15.127 #0 Not tainted [ 40.961276] -------------------------------------------------------- [ 40.967614] insmod/2329 just changed the state of lock: [ 40.972827] ffffff8004003b08 (&dev->wed_lock){+.+.}-{2:2}, at: mt76_get_rxwi+0x1c/0xac [mt76] [ 40.981387] but this lock was taken by another, SOFTIRQ-safe lock in the past: [ 40.988592] (&q->lock){+.-.}-{2:2} [ 40.988602] [ 40.988602] [ 40.988602] and interrupts could create inverse lock ordering between them. [ 40.988602] [ 41.003445] [ 41.003445] other info that might help us debug this: [ 41.009957] Possible interrupt unsafe locking scenario: [ 41.009957] [ 41.016729] CPU0 CPU1 [ 41.021245] ---- ---- [ 41.025761] lock(&dev->wed_lock); [ 41.029241] local_irq_disable(); [ 41.035145] lock(&q->lock); [ 41.040620] lock(&dev->wed_lock); [ 41.046616] [ 41.049223] lock(&q->lock); [ 41.052356] [ 41.052356] *** DEADLOCK *** [ 41.052356] [ 41.058260] 1 lock held by insmod/2329: [ 41.062085] #0: ffffff80003b9988 (&dev->mutex){....}-{3:3}, at: __driver_attach+0x88/0x190 [ 41.070442] [ 41.070442] the shortest dependencies between 2nd lock and 1st lock: [ 41.078257] -> (&q->lock){+.-.}-{2:2} { [ 41.082177] HARDIRQ-ON-W at: [ 41.085396] lock_acquire+0xfc/0x2c0 [ 41.090787] _raw_spin_lock_bh+0x84/0xa0 [ 41.096525] mt76_dma_cleanup+0x24c/0x650 [mt76] [ 41.102977] mt76_dma_cleanup+0x614/0x650 [mt76] [ 41.109428] mt7915_eeprom_get_power_delta+0x1168/0x2464 [mt7915e] [ 41.117435] mt7915_eeprom_init+0x40/0x340 [mt7915e] [ 41.124222] cleanup_module+0x94/0xb28 [mt7915e] [ 41.130662] platform_probe+0x64/0xbc [ 41.136139] really_probe.part.0+0x98/0x2f4 [ 41.142134] __driver_probe_device+0x94/0x16c [ 41.148303] driver_probe_device+0x40/0x120 [ 41.154299] __driver_attach+0x94/0x190 [ 41.159947] bus_for_each_dev+0x5c/0x94 [ 41.165594] driver_attach+0x20/0x30 [ 41.170983] bus_add_driver+0x104/0x1f4 [ 41.176631] driver_register+0x74/0x120 [ 41.182280] __platform_driver_register+0x24/0x30 [ 41.188797] 0xffffffc000cb1074 [ 41.193754] do_one_initcall+0x70/0x2cc [ 41.199403] do_init_module+0x44/0x240 [ 41.204968] load_module+0x1f5c/0x2874 [ 41.210532] __do_sys_init_module+0x1d8/0x2ac [ 41.216702] __arm64_sys_init_module+0x18/0x20 [ 41.222958] invoke_syscall.constprop.0+0x4c/0xe0 [ 41.229474] do_el0_svc+0x50/0xf0 [ 41.234602] el0_svc+0x4c/0xcc [ 41.239471] el0t_64_sync_handler+0xe0/0x110 [ 41.245556] el0t_64_sync+0x15c/0x160 [ 41.251029] IN-SOFTIRQ-W at: [ 41.254249] lock_acquire+0xfc/0x2c0 [ 41.259638] _raw_spin_lock_bh+0x84/0xa0 [ 41.265372] mt76_queue_tx_complete+0x34/0x70 [mt76] [ 41.272170] mt76_free_pending_rxwi+0x36c/0x5d0 [mt76] [ 41.279140] mt76_free_pending_rxwi+0x5c0/0x5d0 [mt76] [ 41.286111] mt7915_eeprom_get_power_delta+0x620/0x2464 [mt7915e] [ 41.294026] __napi_poll.constprop.0+0x5c/0x230 [ 41.300372] net_rx_action+0xe4/0x294 [ 41.305847] _stext+0x154/0x4cc [ 41.310801] do_softirq+0xa4/0xbc [ 41.315930] __local_bh_enable_ip+0x168/0x174 [ 41.322097] napi_threaded_poll+0xbc/0x140 [ 41.328007] kthread+0x13c/0x150 [ 41.333049] ret_from_fork+0x10/0x20 [ 41.338437] INITIAL USE at: [ 41.341568] lock_acquire+0xfc/0x2c0 [ 41.346869] _raw_spin_lock_bh+0x84/0xa0 [ 41.352519] mt76_dma_cleanup+0x24c/0x650 [mt76] [ 41.358882] mt76_dma_cleanup+0x614/0x650 [mt76] [ 41.365245] mt7915_eeprom_get_power_delta+0x1168/0x2464 [mt7915e] [ 41.373160] mt7915_eeprom_init+0x40/0x340 [mt7915e] [ 41.379860] cleanup_module+0x94/0xb28 [mt7915e] [ 41.386213] platform_probe+0x64/0xbc [ 41.391602] really_probe.part.0+0x98/0x2f4 [ 41.397511] __driver_probe_device+0x94/0x16c [ 41.403594] driver_probe_device+0x40/0x120 [ 41.409502] __driver_attach+0x94/0x190 [ 41.415063] bus_for_each_dev+0x5c/0x94 [ 41.420625] driver_attach+0x20/0x30 [ 41.425926] bus_add_driver+0x104/0x1f4 [ 41.431487] driver_register+0x74/0x120 [ 41.437049] __platform_driver_register+0x24/0x30 [ 41.443479] 0xffffffc000cb1074 [ 41.448346] do_one_initcall+0x70/0x2cc [ 41.453907] do_init_module+0x44/0x240 [ 41.459383] load_module+0x1f5c/0x2874 [ 41.464860] __do_sys_init_module+0x1d8/0x2ac [ 41.470944] __arm64_sys_init_module+0x18/0x20 [ 41.477113] invoke_syscall.constprop.0+0x4c/0xe0 [ 41.483542] do_el0_svc+0x50/0xf0 [ 41.488582] el0_svc+0x4c/0xcc [ 41.493364] el0t_64_sync_handler+0xe0/0x110 [ 41.499361] el0t_64_sync+0x15c/0x160 [ 41.504748] } [ 41.506489] ... key at: [] __this_module+0x3e0/0xffffffffffffa840 [mt76] [ 41.515371] ... acquired at: [ 41.518413] _raw_spin_lock+0x60/0x74 [ 41.522240] mt76_get_rxwi+0x1c/0xac [mt76] [ 41.526608] mt76_dma_cleanup+0x3e0/0x650 [mt76] [ 41.531410] mt76_dma_cleanup+0x614/0x650 [mt76] [ 41.536211] mt7915_dma_init+0x408/0x7b0 [mt7915e] [ 41.541177] mt7915_register_device+0x310/0x620 [mt7915e] [ 41.546749] mt7915_mmio_probe+0xcec/0x1d44 [mt7915e] [ 41.551973] platform_probe+0x64/0xbc [ 41.555802] really_probe.part.0+0x98/0x2f4 [ 41.560149] __driver_probe_device+0x94/0x16c [ 41.564670] driver_probe_device+0x40/0x120 [ 41.569017] __driver_attach+0x94/0x190 [ 41.573019] bus_for_each_dev+0x5c/0x94 [ 41.577018] driver_attach+0x20/0x30 [ 41.580758] bus_add_driver+0x104/0x1f4 [ 41.584758] driver_register+0x74/0x120 [ 41.588759] __platform_driver_register+0x24/0x30 [ 41.593628] init_module+0x74/0x1000 [mt7915e] [ 41.598248] do_one_initcall+0x70/0x2cc [ 41.602248] do_init_module+0x44/0x240 [ 41.606162] load_module+0x1f5c/0x2874 [ 41.610078] __do_sys_init_module+0x1d8/0x2ac [ 41.614600] __arm64_sys_init_module+0x18/0x20 [ 41.619209] invoke_syscall.constprop.0+0x4c/0xe0 [ 41.624076] do_el0_svc+0x50/0xf0 [ 41.627555] el0_svc+0x4c/0xcc [ 41.630776] el0t_64_sync_handler+0xe0/0x110 [ 41.635211] el0t_64_sync+0x15c/0x160 [ 41.639037] [ 41.640517] -> (&dev->wed_lock){+.+.}-{2:2} { [ 41.644872] HARDIRQ-ON-W at: [ 41.648003] lock_acquire+0xfc/0x2c0 [ 41.653219] _raw_spin_lock+0x60/0x74 [ 41.658520] mt76_free_pending_rxwi+0xc0/0x5d0 [mt76] [ 41.665232] mt76_dma_cleanup+0x1dc/0x650 [mt76] [ 41.671508] mt7915_eeprom_get_power_delta+0x1830/0x2464 [mt7915e] [ 41.679336] mt7915_unregister_device+0x5b4/0x910 [mt7915e] [ 41.686555] mt7915_eeprom_get_target_power+0xb8/0x230 [mt7915e] [ 41.694209] mt7986_wmac_enable+0xc30/0xcd0 [mt7915e] [ 41.700909] platform_remove+0x4c/0x64 [ 41.706298] __device_release_driver+0x194/0x240 [ 41.712554] driver_detach+0xc0/0x100 [ 41.717857] bus_remove_driver+0x54/0xac [ 41.723418] driver_unregister+0x2c/0x54 [ 41.728980] platform_driver_unregister+0x10/0x20 [ 41.735323] mt7915_ops+0x244/0xffffffffffffed58 [mt7915e] [ 41.742457] __arm64_sys_delete_module+0x170/0x23c [ 41.748887] invoke_syscall.constprop.0+0x4c/0xe0 [ 41.755229] do_el0_svc+0x50/0xf0 [ 41.760183] el0_svc+0x4c/0xcc [ 41.764878] el0t_64_sync_handler+0xe0/0x110 [ 41.770788] el0t_64_sync+0x15c/0x160 [ 41.776088] SOFTIRQ-ON-W at: [ 41.779220] lock_acquire+0xfc/0x2c0 [ 41.784435] _raw_spin_lock+0x60/0x74 [ 41.789737] mt76_get_rxwi+0x1c/0xac [mt76] [ 41.795580] mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e] [ 41.802540] mtk_wed_start+0x970/0xaa0 [ 41.807929] mt7915_dma_start+0x26c/0x630 [mt7915e] [ 41.814455] mt7915_dma_start+0x5a4/0x630 [mt7915e] [ 41.820981] mt7915_dma_init+0x45c/0x7b0 [mt7915e] [ 41.827420] mt7915_register_device+0x310/0x620 [mt7915e] [ 41.834467] mt7915_mmio_probe+0xcec/0x1d44 [mt7915e] [ 41.841167] platform_probe+0x64/0xbc [ 41.846469] really_probe.part.0+0x98/0x2f4 [ 41.852291] __driver_probe_device+0x94/0x16c [ 41.858286] driver_probe_device+0x40/0x120 [ 41.864107] __driver_attach+0x94/0x190 [ 41.869582] bus_for_each_dev+0x5c/0x94 [ 41.875056] driver_attach+0x20/0x30 [ 41.880270] bus_add_driver+0x104/0x1f4 [ 41.885745] driver_register+0x74/0x120 [ 41.891221] __platform_driver_register+0x24/0x30 [ 41.897564] init_module+0x74/0x1000 [mt7915e] [ 41.903657] do_one_initcall+0x70/0x2cc [ 41.909130] do_init_module+0x44/0x240 [ 41.914520] load_module+0x1f5c/0x2874 [ 41.919909] __do_sys_init_module+0x1d8/0x2ac [ 41.925905] __arm64_sys_init_module+0x18/0x20 [ 41.931989] invoke_syscall.constprop.0+0x4c/0xe0 [ 41.938331] do_el0_svc+0x50/0xf0 [ 41.943285] el0_svc+0x4c/0xcc [ 41.947981] el0t_64_sync_handler+0xe0/0x110 [ 41.953892] el0t_64_sync+0x15c/0x160 [ 41.959192] INITIAL USE at: [ 41.962238] lock_acquire+0xfc/0x2c0 [ 41.967365] _raw_spin_lock+0x60/0x74 [ 41.972580] mt76_free_pending_rxwi+0xc0/0x5d0 [mt76] [ 41.979206] mt76_dma_cleanup+0x1dc/0x650 [mt76] [ 41.985395] mt7915_eeprom_get_power_delta+0x1830/0x2464 [mt7915e] [ 41.993137] mt7915_unregister_device+0x5b4/0x910 [mt7915e] [ 42.000270] mt7915_eeprom_get_target_power+0xb8/0x230 [mt7915e] [ 42.007837] mt7986_wmac_enable+0xc30/0xcd0 [mt7915e] [ 42.014450] platform_remove+0x4c/0x64 [ 42.019753] __device_release_driver+0x194/0x240 [ 42.025922] driver_detach+0xc0/0x100 [ 42.031137] bus_remove_driver+0x54/0xac [ 42.036612] driver_unregister+0x2c/0x54 [ 42.042087] platform_driver_unregister+0x10/0x20 [ 42.048344] mt7915_ops+0x244/0xffffffffffffed58 [mt7915e] [ 42.055391] __arm64_sys_delete_module+0x170/0x23c [ 42.061735] invoke_syscall.constprop.0+0x4c/0xe0 [ 42.067990] do_el0_svc+0x50/0xf0 [ 42.072857] el0_svc+0x4c/0xcc [ 42.077466] el0t_64_sync_handler+0xe0/0x110 [ 42.083289] el0t_64_sync+0x15c/0x160 [ 42.088503] } [ 42.090157] ... key at: [] __this_module+0x450/0xffffffffffffa840 [mt76] [ 42.098951] ... acquired at: [ 42.101907] __lock_acquire+0x718/0x1df0 [ 42.105994] lock_acquire+0xfc/0x2c0 [ 42.109734] _raw_spin_lock+0x60/0x74 [ 42.113561] mt76_get_rxwi+0x1c/0xac [mt76] [ 42.117929] mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e] [ 42.123415] mtk_wed_start+0x970/0xaa0 [ 42.127328] mt7915_dma_start+0x26c/0x630 [mt7915e] [ 42.132379] mt7915_dma_start+0x5a4/0x630 [mt7915e] [ 42.137430] mt7915_dma_init+0x45c/0x7b0 [mt7915e] [ 42.142395] mt7915_register_device+0x310/0x620 [mt7915e] [ 42.147967] mt7915_mmio_probe+0xcec/0x1d44 [mt7915e] [ 42.153192] platform_probe+0x64/0xbc [ 42.157019] really_probe.part.0+0x98/0x2f4 [ 42.161367] __driver_probe_device+0x94/0x16c [ 42.165887] driver_probe_device+0x40/0x120 [ 42.170234] __driver_attach+0x94/0x190 [ 42.174235] bus_for_each_dev+0x5c/0x94 [ 42.178235] driver_attach+0x20/0x30 [ 42.181974] bus_add_driver+0x104/0x1f4 [ 42.185974] driver_register+0x74/0x120 [ 42.189974] __platform_driver_register+0x24/0x30 [ 42.194842] init_module+0x74/0x1000 [mt7915e] [ 42.199460] do_one_initcall+0x70/0x2cc [ 42.203460] do_init_module+0x44/0x240 [ 42.207376] load_module+0x1f5c/0x2874 [ 42.211290] __do_sys_init_module+0x1d8/0x2ac [ 42.215813] __arm64_sys_init_module+0x18/0x20 [ 42.220421] invoke_syscall.constprop.0+0x4c/0xe0 [ 42.225288] do_el0_svc+0x50/0xf0 [ 42.228768] el0_svc+0x4c/0xcc [ 42.231989] el0t_64_sync_handler+0xe0/0x110 [ 42.236424] el0t_64_sync+0x15c/0x160 [ 42.240249] [ 42.241730] [ 42.241730] stack backtrace: [ 42.246074] CPU: 1 PID: 2329 Comm: insmod Not tainted 5.15.127 #0 [ 42.252157] Hardware name: GainStrong Oolite-MT7981B V1 Dev Board (NAND boot) (DT) [ 42.259712] Call trace: [ 42.262147] dump_backtrace+0x0/0x174 [ 42.265802] show_stack+0x14/0x20 [ 42.269108] dump_stack_lvl+0x84/0xac [ 42.272761] dump_stack+0x14/0x2c [ 42.276066] print_irq_inversion_bug.part.0+0x1b0/0x1c4 [ 42.281285] mark_lock+0x8b8/0x8bc [ 42.284678] __lock_acquire+0x718/0x1df0 [ 42.288592] lock_acquire+0xfc/0x2c0 [ 42.292158] _raw_spin_lock+0x60/0x74 [ 42.295811] mt76_get_rxwi+0x1c/0xac [mt76] [ 42.300008] mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e] [ 42.305320] mtk_wed_start+0x970/0xaa0 [ 42.309059] mt7915_dma_start+0x26c/0x630 [mt7915e] [ 42.313937] mt7915_dma_start+0x5a4/0x630 [mt7915e] [ 42.318815] mt7915_dma_init+0x45c/0x7b0 [mt7915e] [ 42.323606] mt7915_register_device+0x310/0x620 [mt7915e] [ 42.329005] mt7915_mmio_probe+0xcec/0x1d44 [mt7915e] [ 42.334056] platform_probe+0x64/0xbc [ 42.337711] really_probe.part.0+0x98/0x2f4 [ 42.341885] __driver_probe_device+0x94/0x16c [ 42.346232] driver_probe_device+0x40/0x120 [ 42.350407] __driver_attach+0x94/0x190 [ 42.354234] bus_for_each_dev+0x5c/0x94 [ 42.358061] driver_attach+0x20/0x30 [ 42.361627] bus_add_driver+0x104/0x1f4 [ 42.365454] driver_register+0x74/0x120 [ 42.369282] __platform_driver_register+0x24/0x30 [ 42.373977] init_module+0x74/0x1000 [mt7915e] [ 42.378423] do_one_initcall+0x70/0x2cc [ 42.382249] do_init_module+0x44/0x240 [ 42.385990] load_module+0x1f5c/0x2874 [ 42.389733] __do_sys_init_module+0x1d8/0x2ac [ 42.394082] __arm64_sys_init_module+0x18/0x20 [ 42.398518] invoke_syscall.constprop.0+0x4c/0xe0 [ 42.403211] do_el0_svc+0x50/0xf0 [ 42.406517] el0_svc+0x4c/0xcc [ 42.409565] el0t_64_sync_handler+0xe0/0x110 [ 42.413827] el0t_64_sync+0x15c/0x160 [ 42.674858] mt798x-wmac 18000000.wifi: HW/SW Version: 0x8a108a10, Build Time: 20221208201745a [ 42.674858] [ 42.692078] mt798x-wmac 18000000.wifi: WM Firmware Version: ____000000, Build Time: 20221208201806 [ 42.735606] mt798x-wmac 18000000.wifi: WA Firmware Version: DEV_000000, Build Time: 20221208202048 Tested-by: Daniel Golle Fixes: 2666bece0905 ("wifi: mt76: introduce rxwi and rx token utility routines") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/ee80be41c2a8d8749d83c6950a272a5e77aadd45.1693228333.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/dma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 05d9ab3ce819..dc8f4e157eb2 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -93,13 +93,13 @@ __mt76_get_rxwi(struct mt76_dev *dev) { struct mt76_txwi_cache *t = NULL; - spin_lock(&dev->wed_lock); + spin_lock_bh(&dev->wed_lock); if (!list_empty(&dev->rxwi_cache)) { t = list_first_entry(&dev->rxwi_cache, struct mt76_txwi_cache, list); list_del(&t->list); } - spin_unlock(&dev->wed_lock); + spin_unlock_bh(&dev->wed_lock); return t; } @@ -145,9 +145,9 @@ mt76_put_rxwi(struct mt76_dev *dev, struct mt76_txwi_cache *t) if (!t) return; - spin_lock(&dev->wed_lock); + spin_lock_bh(&dev->wed_lock); list_add(&t->list, &dev->rxwi_cache); - spin_unlock(&dev->wed_lock); + spin_unlock_bh(&dev->wed_lock); } EXPORT_SYMBOL_GPL(mt76_put_rxwi); -- cgit From 84727c5727fed0ea8b0bf5ef0cbffdb0cabb2539 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 28 Aug 2023 15:35:59 +0300 Subject: MAINTAINERS: wifi: remove generic wiki links from drivers The top level entry for wireless drivers already contains to the wiki so no need to duplicate that in driver entries: NETWORKING DRIVERS (WIRELESS) [...] W: https://wireless.wiki.kernel.org/ Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230828123603.87621-1-kvalo@kernel.org --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 612d6d1dbf36..8a1f16870604 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -470,7 +470,6 @@ F: drivers/hwmon/adm1029.c ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -W: https://wireless.wiki.kernel.org/ F: drivers/net/wireless/admtek/adm8211.* ADP1653 FLASH CONTROLLER DRIVER @@ -18062,7 +18061,6 @@ REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained -W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git F: drivers/net/wireless/realtek/rtlwifi/ @@ -18591,7 +18589,6 @@ F: drivers/media/dvb-frontends/rtl2832_sdr* RTL8180 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -W: https://wireless.wiki.kernel.org/ F: drivers/net/wireless/realtek/rtl818x/rtl8180/ RTL8187 WIRELESS DRIVER @@ -18599,7 +18596,6 @@ M: Hin-Tak Leung M: Larry Finger L: linux-wireless@vger.kernel.org S: Maintained -W: https://wireless.wiki.kernel.org/ F: drivers/net/wireless/realtek/rtl818x/rtl8187/ RTL8XXXU WIRELESS DRIVER (rtl8xxxu) -- cgit From 0b9480da79e070481bd4c352ad555abf9ec7598f Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 28 Aug 2023 15:36:00 +0300 Subject: MAINTAINERS: wifi: rtlwifi: remove git tree Linville's tree hasn't been used for something like 10 years so remove it. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230828123603.87621-2-kvalo@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8a1f16870604..5074df0b1861 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18061,7 +18061,6 @@ REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git F: drivers/net/wireless/realtek/rtlwifi/ REALTEK WIRELESS DRIVER (rtw88) -- cgit From 42c5f0e20ec9b0371fbdc6de69a73a932d72a0ef Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 28 Aug 2023 15:36:01 +0300 Subject: MAINTAINERS: wifi: rtl8xxxu: remove git tree Jes' tree hasn't been used for six years so remove it. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230828123603.87621-3-kvalo@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5074df0b1861..386ec00eff63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18601,7 +18601,6 @@ RTL8XXXU WIRELESS DRIVER (rtl8xxxu) M: Jes Sorensen L: linux-wireless@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-devel F: drivers/net/wireless/realtek/rtl8xxxu/ RTRS TRANSPORT DRIVERS -- cgit From b8c713c13482a37e55379b54e9d4535d50bc0577 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 28 Aug 2023 15:36:02 +0300 Subject: MAINTAINERS: wifi: wl12xx: remove git tree Luca's tree hasn't been used for 10 years so remove it. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230828123603.87621-4-kvalo@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 386ec00eff63..f7c7f50ca055 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21512,7 +21512,6 @@ L: linux-wireless@vger.kernel.org S: Orphan W: https://wireless.wiki.kernel.org/en/users/Drivers/wl12xx W: https://wireless.wiki.kernel.org/en/users/Drivers/wl1251 -T: git git://git.kernel.org/pub/scm/linux/kernel/git/luca/wl12xx.git F: drivers/net/wireless/ti/ TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER -- cgit From d253fb3705b37fd4afd05eb615310be4e9c3042b Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 28 Aug 2023 15:36:03 +0300 Subject: MAINTAINERS: wifi: hostap: remove maintainer and web page As hostap is marked as obsolete there's no need to have a maintainer. Also remove the link to the web page. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230828123603.87621-5-kvalo@kernel.org --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f7c7f50ca055..b52a7def4063 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9502,10 +9502,8 @@ F: Documentation/devicetree/bindings/iio/pressure/honeywell,mprls0025pa.yaml F: drivers/iio/pressure/mprls0025pa.c HOST AP DRIVER -M: Jouni Malinen L: linux-wireless@vger.kernel.org S: Obsolete -W: http://w1.fi/hostap-driver.html F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER -- cgit From af5ff4b789956e9ef43e0bb80429f24ec44b2063 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 29 Aug 2023 08:03:33 -0700 Subject: MAINTAINERS: wifi: ath12k: add wiki link The ath12k wireless driver now has a wiki, so advertise it. Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230829-ath12kwiki-v1-1-df37127527a1@quicinc.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b52a7def4063..d8927aadd094 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17528,6 +17528,7 @@ M: Kalle Valo M: Jeff Johnson L: ath12k@lists.infradead.org S: Supported +W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath12k/ -- cgit From eec679e4ac5f47507774956fb3479c206e761af7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 24 Aug 2023 21:06:51 -0600 Subject: wifi: mwifiex: Fix tlv_buf_left calculation In a TLV encoding scheme, the Length part represents the length after the header containing the values for type and length. In this case, `tlv_len` should be: tlv_len == (sizeof(*tlv_rxba) - 1) - sizeof(tlv_rxba->header) + tlv_bitmap_len Notice that the `- 1` accounts for the one-element array `bitmap`, which 1-byte size is already included in `sizeof(*tlv_rxba)`. So, if the above is correct, there is a double-counting of some members in `struct mwifiex_ie_types_rxba_sync`, when `tlv_buf_left` and `tmp` are calculated: 968 tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); 969 tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); in specific, members: drivers/net/wireless/marvell/mwifiex/fw.h:777 777 u8 mac[ETH_ALEN]; 778 u8 tid; 779 u8 reserved; 780 __le16 seq_num; 781 __le16 bitmap_len; This is clearly wrong, and affects the subsequent decoding of data in `event_buf` through `tlv_rxba`: 970 tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; Fix this by using `sizeof(tlv_rxba->header)` instead of `sizeof(*tlv_rxba)` in the calculation of `tlv_buf_left` and `tmp`. This results in the following binary differences before/after changes: | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o | @@ -4698,11 +4698,11 @@ | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:968 | tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len); | - 1da7: lea -0x11(%rbx),%edx | + 1da7: lea -0x4(%rbx),%edx | 1daa: movzwl %bp,%eax | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:969 | tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len; | - 1dad: lea 0x11(%r15,%rbp,1),%r15 | + 1dad: lea 0x4(%r15,%rbp,1),%r15 The above reflects the desired change: avoid counting 13 too many bytes; which is the total size of the double-counted members in `struct mwifiex_ie_types_rxba_sync`: $ pahole -C mwifiex_ie_types_rxba_sync drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o struct mwifiex_ie_types_rxba_sync { struct mwifiex_ie_types_header header; /* 0 4 */ |----------------------------------------------------------------------- | u8 mac[6]; /* 4 6 */ | | u8 tid; /* 10 1 */ | | u8 reserved; /* 11 1 */ | | __le16 seq_num; /* 12 2 */ | | __le16 bitmap_len; /* 14 2 */ | | u8 bitmap[1]; /* 16 1 */ | |----------------------------------------------------------------------| | 13 bytes| ----------- /* size: 17, cachelines: 1, members: 7 */ /* last cacheline: 17 bytes */ } __attribute__((__packed__)); Fixes: 99ffe72cdae4 ("mwifiex: process rxba_sync event") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/06668edd68e7a26bbfeebd1201ae077a2a7a8bce.1692931954.git.gustavoars@kernel.org --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 391793a16adc..d1d3632a3ed7 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -965,8 +965,8 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, } } - tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); - tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len); + tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len; tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; } } -- cgit From c7847241de28c718285d0e1bd97d1061a4a806c8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 24 Aug 2023 21:07:43 -0600 Subject: wifi: mwifiex: Replace one-element array with flexible-array member in struct mwifiex_ie_types_rxba_sync One-element and zero-length arrays are deprecated. So, replace one-element array in struct mwifiex_ie_types_rxba_sync with flexible-array member, and refactor the rest of the code, accordingly. This results in no differences in binary output. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/79c801c69c8beece2f80502c60166036d3c047cc.1692931954.git.gustavoars@kernel.org --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- drivers/net/wireless/marvell/mwifiex/fw.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index d1d3632a3ed7..735aac52bdc4 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -918,7 +918,7 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:", event_buf, len); - while (tlv_buf_left >= sizeof(*tlv_rxba)) { + while (tlv_buf_left > sizeof(*tlv_rxba)) { tlv_type = le16_to_cpu(tlv_rxba->header.type); tlv_len = le16_to_cpu(tlv_rxba->header.len); if (tlv_type != TLV_TYPE_RXBA_SYNC) { diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index f2168fac95ed..8e6db904e5b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -779,7 +779,7 @@ struct mwifiex_ie_types_rxba_sync { u8 reserved; __le16 seq_num; __le16 bitmap_len; - u8 bitmap[1]; + u8 bitmap[]; } __packed; struct chan_band_param_set { -- cgit From d5a93b7d2877aae4ba7590ad6cb65f8d33079489 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 24 Aug 2023 21:10:45 -0600 Subject: wifi: mwifiex: Sanity check tlv_len and tlv_bitmap_len Add sanity checks for both `tlv_len` and `tlv_bitmap_len` before decoding data from `event_buf`. This prevents any malicious or buggy firmware from overflowing `event_buf` through large values for `tlv_len` and `tlv_bitmap_len`. Suggested-by: Dan Williams Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/d4f8780527d551552ee96f17a0229e02e1c200d1.1692931954.git.gustavoars@kernel.org --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 735aac52bdc4..10690e82358b 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -921,6 +921,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, while (tlv_buf_left > sizeof(*tlv_rxba)) { tlv_type = le16_to_cpu(tlv_rxba->header.type); tlv_len = le16_to_cpu(tlv_rxba->header.len); + if (size_add(sizeof(tlv_rxba->header), tlv_len) > tlv_buf_left) { + mwifiex_dbg(priv->adapter, WARN, + "TLV size (%zu) overflows event_buf buf_left=%d\n", + size_add(sizeof(tlv_rxba->header), tlv_len), + tlv_buf_left); + return; + } + if (tlv_type != TLV_TYPE_RXBA_SYNC) { mwifiex_dbg(priv->adapter, ERROR, "Wrong TLV id=0x%x\n", tlv_type); @@ -929,6 +937,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num); tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len); + if (size_add(sizeof(*tlv_rxba), tlv_bitmap_len) > tlv_buf_left) { + mwifiex_dbg(priv->adapter, WARN, + "TLV size (%zu) overflows event_buf buf_left=%d\n", + size_add(sizeof(*tlv_rxba), tlv_bitmap_len), + tlv_buf_left); + return; + } + mwifiex_dbg(priv->adapter, INFO, "%pM tid=%d seq_num=%d bitmap_len=%d\n", tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, -- cgit From 3170256d7bc1ef81587caf4b83573eb1f5bb4fb6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 29 Aug 2023 15:40:22 +0200 Subject: counter: chrdev: fix getting array extensions When trying to watch a component array extension, and the array isn't the first extended element, it fails as the type comparison is always done on the 1st element. Fix it by indexing the 'ext' array. Example on a dummy struct counter_comp: static struct counter_comp dummy[] = { COUNTER_COMP_DIRECTION(..), ..., COUNTER_COMP_ARRAY_CAPTURE(...), }; static struct counter_count dummy_cnt = { ... .ext = dummy, .num_ext = ARRAY_SIZE(dummy), } Currently, counter_get_ext() returns -EINVAL when trying to add a watch event on one of the capture array element in such example. Fixes: d2011be1e22f ("counter: Introduce the COUNTER_COMP_ARRAY component type") Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20230829134029.2402868-2-fabrice.gasnier@foss.st.com Signed-off-by: William Breathitt Gray --- drivers/counter/counter-chrdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c index 80acdf62794a..afc94d0062b1 100644 --- a/drivers/counter/counter-chrdev.c +++ b/drivers/counter/counter-chrdev.c @@ -247,8 +247,8 @@ static int counter_get_ext(const struct counter_comp *const ext, if (*id == component_id) return 0; - if (ext->type == COUNTER_COMP_ARRAY) { - element = ext->priv; + if (ext[*ext_idx].type == COUNTER_COMP_ARRAY) { + element = ext[*ext_idx].priv; if (component_id - *id < element->length) return 0; -- cgit From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Tue, 15 Aug 2023 22:18:34 +0800 Subject: net: xfrm: skip policies marked as dead while reinserting policies BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430 Read of size 1 at addr ffff8881051f3bf8 by task ip/668 CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014 Call Trace: dump_stack_lvl+0x72/0xa0 print_report+0xd0/0x620 kasan_report+0xb6/0xf0 xfrm_policy_inexact_list_reinsert+0xb6/0x430 xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800 xfrm_policy_inexact_alloc_chain+0x23f/0x320 xfrm_policy_inexact_insert+0x6b/0x590 xfrm_policy_insert+0x3b1/0x480 xfrm_add_policy+0x23c/0x3c0 xfrm_user_rcv_msg+0x2d0/0x510 netlink_rcv_skb+0x10d/0x2d0 xfrm_netlink_rcv+0x49/0x60 netlink_unicast+0x3fe/0x540 netlink_sendmsg+0x528/0x970 sock_sendmsg+0x14a/0x160 ____sys_sendmsg+0x4fc/0x580 ___sys_sendmsg+0xef/0x160 __sys_sendmsg+0xf7/0x1b0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x73/0xdd The root cause is: cpu 0 cpu1 xfrm_dump_policy xfrm_policy_walk list_move_tail xfrm_add_policy ... ... xfrm_policy_inexact_list_reinsert list_for_each_entry_reverse if (!policy->bydst_reinsert) //read non-existent policy xfrm_dump_policy_done xfrm_policy_walk_done list_del(&walk->walk.all); If dump_one_policy() returns err (triggered by netlink socket), xfrm_policy_walk() will move walk initialized by socket to list net->xfrm.policy_all. so this socket becomes visible in the global policy list. The head *walk can be traversed when users add policies with different prefixlen and trigger xfrm_policy node merge. The issue can also be triggered by policy list traversal while rehashing and flushing policies. It can be fixed by skip such "policies" with walk.dead set to 1. Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address") Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") Signed-off-by: Dong Chenchen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6b405782b63..113fb7e9cdaf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, struct hlist_node *newpos = NULL; bool matches_s, matches_d; - if (!policy->bydst_reinsert) + if (policy->walk.dead || !policy->bydst_reinsert) continue; WARN_ON_ONCE(policy->family != family); @@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work) struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; + if (policy->walk.dead) + continue; + dir = xfrm_policy_id2dir(policy->index); - if (policy->walk.dead || dir >= XFRM_POLICY_MAX) + if (dir >= XFRM_POLICY_MAX) continue; if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { @@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->type != type) continue; @@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; -- cgit From df8fdd01c98b99d04915c04f3a5ce73f55456b7c Mon Sep 17 00:00:00 2001 From: Dharma Balasubiramani Date: Tue, 5 Sep 2023 15:38:35 +0530 Subject: counter: microchip-tcb-capture: Fix the use of internal GCLK logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the datasheet, the clock selection Bits 2:0 – TCCLKS[2:0] should be set to 0 while using the internal GCLK (TIMER_CLOCK1). Fixes: 106b104137fd ("counter: Add microchip TCB capture counter") Signed-off-by: Dharma Balasubiramani Link: https://lore.kernel.org/r/20230905100835.315024-1-dharma.b@microchip.com Signed-off-by: William Breathitt Gray --- drivers/counter/microchip-tcb-capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index e2d1dc6ca668..c7af13aca36c 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -98,7 +98,7 @@ static int mchp_tc_count_function_write(struct counter_device *counter, priv->qdec_mode = 0; /* Set highest rate based on whether soc has gclk or not */ bmr &= ~(ATMEL_TC_QDEN | ATMEL_TC_POSEN); - if (priv->tc_cfg->has_gclk) + if (!priv->tc_cfg->has_gclk) cmr |= ATMEL_TC_TIMER_CLOCK2; else cmr |= ATMEL_TC_TIMER_CLOCK1; -- cgit From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Sep 2023 13:23:03 +0000 Subject: xfrm: interface: use DEV_STATS_INC() syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields are updated. It appears all of these updates can happen from multiple cpus. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000000010d7 -> 0x00000000000010d8 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index b86474084690..e21cc71095bb 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -380,8 +380,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) skb->dev = dev; if (err) { - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_dropped); return 0; } @@ -426,7 +426,6 @@ static int xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); unsigned int length = skb->len; struct net_device *tdev; @@ -473,7 +472,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) tdev = dst->dev; if (tdev == dev) { - stats->collisions++; + DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", dev->name); goto tx_err_dst_release; @@ -512,13 +511,13 @@ xmit: if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { - stats->tx_errors++; - stats->tx_aborted_errors++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_aborted_errors); } return 0; tx_err_link_failure: - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); @@ -528,7 +527,6 @@ tx_err_dst_release: static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; @@ -545,7 +543,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); if (dst->error) { dst_release(dst); - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, dst); @@ -561,7 +559,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); if (IS_ERR(rt)) { - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, &rt->dst); @@ -580,8 +578,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; tx_err: - stats->tx_errors++; - stats->tx_dropped++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } -- cgit From 017c73a34a661a861712f7cc1393a123e5b2208c Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sun, 23 Apr 2023 19:10:41 +0800 Subject: ubi: Refuse attaching if mtd's erasesize is 0 There exists mtd devices with zero erasesize, which will trigger a divide-by-zero exception while attaching ubi device. Fix it by refusing attaching if mtd's erasesize is 0. Fixes: 801c135ce73d ("UBI: Unsorted Block Images") Reported-by: Yu Hao Link: https://lore.kernel.org/lkml/977347543.226888.1682011999468.JavaMail.zimbra@nod.at/T/ Signed-off-by: Zhihao Cheng Reviewed-by: Miquel Raynal Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 8b91a55ec0d2..8ee51e49fced 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -894,6 +894,13 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, return -EINVAL; } + /* UBI cannot work on flashes with zero erasesize. */ + if (!mtd->erasesize) { + pr_err("ubi: refuse attaching mtd%d - zero erasesize flash is not supported\n", + mtd->index); + return -EINVAL; + } + if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) -- cgit From f15f29fd4779be8a418b66e9d52979bb6d6c2325 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Sep 2023 08:22:33 +0200 Subject: netfilter: nf_tables: disallow rule removal from chain binding Chain binding only requires the rule addition/insertion command within the same transaction. Removal of rules from chain bindings within the same transaction makes no sense, userspace does not utilize this feature. Replace nft_chain_is_bound() check to nft_chain_binding() in rule deletion commands. Replace command implies a rule deletion, reject this command too. Rule flush command can also safely rely on this nft_chain_binding() check because unbound chains are not allowed since 62e1e94b246e ("netfilter: nf_tables: reject unbound chain set before commit phase"). Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e429ebba74b3..895c6e4fba97 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1432,7 +1432,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1477,7 +1477,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -2910,6 +2910,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(chain); } + if (nft_chain_binding(chain)) + return -EOPNOTSUPP; + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { @@ -3971,6 +3974,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { + if (nft_chain_binding(chain)) { + err = -EOPNOTSUPP; + goto err_destroy_flow_rule; + } + err = nft_delrule(&ctx, old_rule); if (err < 0) goto err_destroy_flow_rule; @@ -4078,7 +4086,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) return -EOPNOTSUPP; } @@ -4112,7 +4120,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx.chain = chain; @@ -11054,7 +11062,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table) ctx.family = table->family; ctx.table = table; list_for_each_entry(chain, &table->chains, list) { - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx.chain = chain; -- cgit From 96b33300fba880ec0eafcf3d82486f3463b4b6da Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Sep 2023 12:52:24 +0200 Subject: netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention rbtree GC does not modify the datastructure, instead it collects expired elements and it enqueues a GC transaction. Use a read spinlock instead to avoid data contention while GC worker is running. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f250b5399344..70491ba98dec 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -622,8 +622,7 @@ static void nft_rbtree_gc(struct work_struct *work) if (!gc) goto done; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { /* Ruleset has been updated, try later. */ @@ -673,8 +672,7 @@ dead_elem: gc = nft_trans_gc_catchall(gc, gc_seq); try_later: - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + read_unlock_bh(&priv->lock); if (gc) nft_trans_gc_queue_async_done(gc); -- cgit From 4a9e12ea7e70223555ec010bec9f711089ce96f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 6 Sep 2023 15:07:53 +0200 Subject: netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC pipapo needs to enqueue GC transactions for catchall elements through nft_trans_gc_queue_sync(). Add nft_trans_gc_catchall_sync() and nft_trans_gc_catchall_async() to handle GC transaction queueing accordingly. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++-- net/netfilter/nf_tables_api.c | 22 +++++++++++++++++++--- net/netfilter/nft_set_hash.c | 2 +- net/netfilter/nft_set_pipapo.c | 2 +- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd40c75011d2..a4455f4995ab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1700,8 +1700,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 895c6e4fba97..7b59311931fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9613,8 +9613,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq) +static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, + unsigned int gc_seq, + bool sync) { struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; @@ -9630,7 +9631,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, nft_set_elem_dead(ext); dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (sync) + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + else + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) return NULL; @@ -9640,6 +9645,17 @@ dead_elem: return gc; } +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) +{ + return nft_trans_gc_catchall(gc, gc_seq, false); +} + +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) +{ + return nft_trans_gc_catchall(gc, 0, true); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 524763659f25..eca20dc60138 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -372,7 +372,7 @@ dead_elem: nft_trans_gc_elem_add(gc, he); } - gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq); try_later: /* catchall list iteration requires rcu read side lock. */ diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 6af9c9ed4b5c..10b89ac74476 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1610,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) } } - gc = nft_trans_gc_catchall(gc, 0); + gc = nft_trans_gc_catchall_sync(gc); if (gc) { nft_trans_gc_queue_sync_done(gc); priv->last_gc = jiffies; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 70491ba98dec..487572dcd614 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -669,7 +669,7 @@ dead_elem: nft_trans_gc_elem_add(gc, rbe); } - gc = nft_trans_gc_catchall(gc, gc_seq); + gc = nft_trans_gc_catchall_async(gc, gc_seq); try_later: read_unlock_bh(&priv->lock); -- cgit From 6d365eabce3c018a80f6e0379b17df2abb17405e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 6 Sep 2023 17:22:58 +0200 Subject: netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails nft_trans_gc_queue_sync() enqueues the GC transaction and it allocates a new one. If this allocation fails, then stop this GC sync run and retry later. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 10b89ac74476..c0dcc40de358 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1596,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (!gc) - break; + return; nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap); -- cgit From b079155faae94e9b3ab9337e82100a914ebb4e8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Sep 2023 01:39:43 +0200 Subject: netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC might collect the same element more than once. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index eca20dc60138..2013de934cef 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work) while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } - continue; + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; } /* Ruleset has been updated, try later. */ -- cgit From 41bc46c12a8053a1b3279a379bd6b5e87b045b85 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Sep 2023 22:06:51 +0200 Subject: bpf: Add override check to kprobe multi link attach Currently the multi_kprobe link attach does not check error injection list for programs with bpf_override_return helper and allows them to attach anywhere. Adding the missing check. Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Reviewed-by: Alan Maguire Cc: stable@vger.kernel.org Link: https://lore.kernel.org/bpf/20230907200652.926951-1-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a7264b2c17ad..c1c1af63ced2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 return arr.mods_cnt; } +static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + if (!within_error_injection_list(addrs[i])) + return -EINVAL; + } + return 0; +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } + if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { + err = -EINVAL; + goto error; + } + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; -- cgit From 7182e56411b9a8b76797ed7b6095fc84be76dfb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Sep 2023 22:06:52 +0200 Subject: selftests/bpf: Add kprobe_multi override test Adding test that tries to attach program with bpf_override_return helper to function not within error injection list. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230907200652.926951-2-jolsa@kernel.org --- .../selftests/bpf/prog_tests/kprobe_multi_test.c | 37 ++++++++++++++++++++++ .../selftests/bpf/progs/kprobe_multi_override.c | 13 ++++++++ 2 files changed, 50 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_override.c diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 179fe300534f..e05477b210a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -3,6 +3,7 @@ #include "kprobe_multi.skel.h" #include "trace_helpers.h" #include "kprobe_multi_empty.skel.h" +#include "kprobe_multi_override.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -453,6 +454,40 @@ cleanup: } } +void test_attach_override(void) +{ + struct kprobe_multi_override *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_multi_override__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + /* The test_override calls bpf_override_return so it should fail + * to attach to bpf_fentry_test1 function, which is not on error + * injection list. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) { + bpf_link__destroy(link); + goto cleanup; + } + + /* The should_fail_bio function is on error injection list, + * attach should succeed. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "should_fail_bio", NULL); + if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio")) + goto cleanup; + + bpf_link__destroy(link); + +cleanup: + kprobe_multi_override__destroy(skel); +} + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -480,4 +515,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("attach_override")) + test_attach_override(); } diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c new file mode 100644 index 000000000000..28f8487c9059 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi") +int test_override(struct pt_regs *ctx) +{ + bpf_override_return(ctx, 123); + return 0; +} -- cgit From 75a5221630fe5aa3fedba7a06be618db0f79ba1e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 7 Sep 2023 13:05:42 +0800 Subject: erofs: fix memory leak of LZMA global compressed deduplication When stressing microLZMA EROFS images with the new global compressed deduplication feature enabled (`-Ededupe`), I found some short-lived temporary pages weren't properly released, which could slowly cause unexpected OOMs hours later. Let's fix it now (LZ4 and DEFLATE don't have this issue.) Fixes: 5c2a64252c5d ("erofs: introduce partial-referenced pclusters") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230907050542.97152-1-hsiangkao@linux.alibaba.com --- fs/erofs/decompressor_lzma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 73091fbe3ea4..dee10d22ada9 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -217,9 +217,12 @@ again: strm->buf.out_size = min_t(u32, outlen, PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; - if (!rq->out[no] && rq->fillgaps) /* deduped */ + if (!rq->out[no] && rq->fillgaps) { /* deduped */ rq->out[no] = erofs_allocpage(pagepool, GFP_KERNEL | __GFP_NOFAIL); + set_page_private(rq->out[no], + Z_EROFS_SHORTLIVED_PAGE); + } if (rq->out[no]) strm->buf.out = kmap(rq->out[no]) + pageofs; pageofs = 0; -- cgit From f101583fa9f8c3f372d4feb61d67da0ccbf4d9a5 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 7 Sep 2023 20:32:24 +0530 Subject: ASoC: soc-utils: Export snd_soc_dai_is_dummy() symbol Export symbol snd_soc_dai_is_dummy() for usage outside core driver modules. This is required by Tegra ASoC machine driver. Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1694098945-32760-2-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index 11607c5f5d5a..9c746e4edef7 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai) return 1; return 0; } +EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy); int snd_soc_component_is_dummy(struct snd_soc_component *component) { -- cgit From e765886249c533e1bb5cbc3cd741bad677417312 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 7 Sep 2023 20:32:25 +0530 Subject: ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Tegra audio graph card has many DAI links which connects internal AHUB modules and external audio codecs. Since these are DPCM links, hw_params() call in the machine driver happens for each connected BE link and PLLA is updated every time. This is not really needed for all links as only I/O link DAIs derive respective clocks from PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock updates to DAIs over I/O links. This found to be fixing a DMIC clock discrepancy which is suspected to happen because of back to back quick PLLA and PLLA_OUT0 rate updates. This was observed on Jetson TX2 platform where DMIC clock ended up with unexpected value. Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_audio_graph_card.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5a..4737e776d383 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include #include #include +#include #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err; -- cgit From ec03804552e9a723569e14d2512f36a8e70dc640 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 8 Sep 2023 11:17:16 +0100 Subject: ASoC: cs35l56: Call pm_runtime_dont_use_autosuspend() Driver remove() must call pm_runtime_dont_use_autosuspend(). Drivers that call pm_runtime_use_autosuspend() must disable it in driver remove(). Unfortunately until recently this was only mentioned in 1 line in a 900+ line document so most people hadn't noticed this. It has only recently been added to the kerneldoc of pm_runtime_use_autosuspend(). THIS WON'T APPLY CLEANLY TO V6.5 AND EARLIER: We will send a separate backported patch to stable. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230908101716.2658582-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 600b79c62ec4..f2e7c6d0be46 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1207,6 +1207,7 @@ void cs35l56_remove(struct cs35l56_private *cs35l56) flush_workqueue(cs35l56->dsp_wq); destroy_workqueue(cs35l56->dsp_wq); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_suspend(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); -- cgit From aedf323b66b2b875137422ecb7d2525179759076 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 7 Sep 2023 11:05:04 +0200 Subject: ASoC: meson: spdifin: start hw on dai probe For spdif input to report the locked rate correctly, even when no capture is running, the HW and reference clock must be started as soon as the dai is probed. Fixes: 5ce5658375e6 ("ASoC: meson: add axg spdif input") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20230907090504.12700-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-spdifin.c | 49 +++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/sound/soc/meson/axg-spdifin.c b/sound/soc/meson/axg-spdifin.c index d86880169075..bc2f2849ecfb 100644 --- a/sound/soc/meson/axg-spdifin.c +++ b/sound/soc/meson/axg-spdifin.c @@ -112,34 +112,6 @@ static int axg_spdifin_prepare(struct snd_pcm_substream *substream, return 0; } -static int axg_spdifin_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); - int ret; - - ret = clk_prepare_enable(priv->refclk); - if (ret) { - dev_err(dai->dev, - "failed to enable spdifin reference clock\n"); - return ret; - } - - regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, - SPDIFIN_CTRL0_EN); - - return 0; -} - -static void axg_spdifin_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); - - regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, 0); - clk_disable_unprepare(priv->refclk); -} - static void axg_spdifin_write_mode_param(struct regmap *map, int mode, unsigned int val, unsigned int num_per_reg, @@ -251,17 +223,32 @@ static int axg_spdifin_dai_probe(struct snd_soc_dai *dai) ret = axg_spdifin_sample_mode_config(dai, priv); if (ret) { dev_err(dai->dev, "mode configuration failed\n"); - clk_disable_unprepare(priv->pclk); - return ret; + goto pclk_err; } + ret = clk_prepare_enable(priv->refclk); + if (ret) { + dev_err(dai->dev, + "failed to enable spdifin reference clock\n"); + goto pclk_err; + } + + regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, + SPDIFIN_CTRL0_EN); + return 0; + +pclk_err: + clk_disable_unprepare(priv->pclk); + return ret; } static int axg_spdifin_dai_remove(struct snd_soc_dai *dai) { struct axg_spdifin *priv = snd_soc_dai_get_drvdata(dai); + regmap_update_bits(priv->map, SPDIFIN_CTRL0, SPDIFIN_CTRL0_EN, 0); + clk_disable_unprepare(priv->refclk); clk_disable_unprepare(priv->pclk); return 0; } @@ -270,8 +257,6 @@ static const struct snd_soc_dai_ops axg_spdifin_ops = { .probe = axg_spdifin_dai_probe, .remove = axg_spdifin_dai_remove, .prepare = axg_spdifin_prepare, - .startup = axg_spdifin_startup, - .shutdown = axg_spdifin_shutdown, }; static int axg_spdifin_iec958_info(struct snd_kcontrol *kcontrol, -- cgit From 28115b1c4f2bb76e786436bf6597c5eb27638a5c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 7 Sep 2023 11:55:20 +0200 Subject: ASoC: rsnd: add missing of_node_put for_each_child_of_node performs an of_node_get on each iteration, so a break out of the loop requires an of_node_put. This was done using the Coccinelle semantic patch iterators/for_each_child.cocci Signed-off-by: Julia Lawall Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20230907095521.14053-11-Julia.Lawall@inria.fr Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index e29c2fee9521..1bd7114c472a 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1303,6 +1303,7 @@ audio_graph: if (i >= RSND_MAX_COMPONENT) { dev_info(dev, "reach to max component\n"); of_node_put(node); + of_node_put(ports); break; } } -- cgit From d7e47e32192bb88f5b2dc8e655fa587ecf9d71e0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 9 Sep 2023 05:02:37 -0700 Subject: ASoC: wm8960: Fix error handling in probe Commit 422f10adc3eb ("ASoC: wm8960: Add support for the power supplies") added regulator support to the wm8960 driver, but neglected to update error handling in the probe function. This results in warning backtraces if the probe function fails. Fixes: 422f10adc3eb ("ASoC: wm8960: Add support for the power supplies") Signed-off-by: Guenter Roeck Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20230909120237.2646275-1-linux@roeck-us.net Signed-off-by: Mark Brown --- sound/soc/codecs/wm8960.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 0a50180750e8..7689fe3cc86d 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -1468,8 +1468,10 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) } wm8960->regmap = devm_regmap_init_i2c(i2c, &wm8960_regmap); - if (IS_ERR(wm8960->regmap)) - return PTR_ERR(wm8960->regmap); + if (IS_ERR(wm8960->regmap)) { + ret = PTR_ERR(wm8960->regmap); + goto bulk_disable; + } if (pdata) memcpy(&wm8960->pdata, pdata, sizeof(struct wm8960_data)); @@ -1479,13 +1481,14 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) ret = i2c_master_recv(i2c, &val, sizeof(val)); if (ret >= 0) { dev_err(&i2c->dev, "Not wm8960, wm8960 reg can not read by i2c\n"); - return -EINVAL; + ret = -EINVAL; + goto bulk_disable; } ret = wm8960_reset(wm8960->regmap); if (ret != 0) { dev_err(&i2c->dev, "Failed to issue reset\n"); - return ret; + goto bulk_disable; } if (wm8960->pdata.shared_lrclk) { @@ -1494,7 +1497,7 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) if (ret != 0) { dev_err(&i2c->dev, "Failed to enable LRCM: %d\n", ret); - return ret; + goto bulk_disable; } } @@ -1528,7 +1531,13 @@ static int wm8960_i2c_probe(struct i2c_client *i2c) ret = devm_snd_soc_register_component(&i2c->dev, &soc_component_dev_wm8960, &wm8960_dai, 1); + if (ret) + goto bulk_disable; + return 0; + +bulk_disable: + regulator_bulk_disable(ARRAY_SIZE(wm8960->supplies), wm8960->supplies); return ret; } -- cgit From 396b907919e028d89bac912e49de014485deb8dc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 8 Sep 2023 09:59:20 +0100 Subject: ASoC: soc-pcm: Shrink stack frame for __soc_pcm_hw_params Commit ac950278b087 ("ASoC: add N cpus to M codecs dai link support") added an additional local params in __soc_pcm_hw_params, for the CPU side of the DAI. The snd_pcm_hw_params struct is pretty large (604 bytes) and keeping two local copies of it can make the stack frame really large. It is worth noting the variables are in separate code blocks so for some optimisation levels in the compiler these will get automatically combined keeping the stack frame reasonable. But better to manually combine them to cover all cases. Add a single local variable for __soc_pcm_hw_params and use in both loops to shrink the stack frame. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230908085920.2906359-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index eb0723876851..54704250c0a2 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -985,6 +985,7 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, { struct snd_soc_dai *cpu_dai; struct snd_soc_dai *codec_dai; + struct snd_pcm_hw_params tmp_params; int i, ret = 0; snd_soc_dpcm_mutex_assert_held(rtd); @@ -998,7 +999,6 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, goto out; for_each_rtd_codec_dais(rtd, i, codec_dai) { - struct snd_pcm_hw_params codec_params; unsigned int tdm_mask = snd_soc_dai_tdm_mask_get(codec_dai, substream->stream); /* @@ -1019,23 +1019,22 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, continue; /* copy params for each codec */ - codec_params = *params; + tmp_params = *params; /* fixup params based on TDM slot masks */ if (tdm_mask) - soc_pcm_codec_params_fixup(&codec_params, tdm_mask); + soc_pcm_codec_params_fixup(&tmp_params, tdm_mask); ret = snd_soc_dai_hw_params(codec_dai, substream, - &codec_params); + &tmp_params); if(ret < 0) goto out; - soc_pcm_set_dai_params(codec_dai, &codec_params); - snd_soc_dapm_update_dai(substream, &codec_params, codec_dai); + soc_pcm_set_dai_params(codec_dai, &tmp_params); + snd_soc_dapm_update_dai(substream, &tmp_params, codec_dai); } for_each_rtd_cpu_dais(rtd, i, cpu_dai) { - struct snd_pcm_hw_params cpu_params; unsigned int ch_mask = 0; int j; @@ -1047,7 +1046,7 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, continue; /* copy params for each cpu */ - cpu_params = *params; + tmp_params = *params; if (!rtd->dai_link->codec_ch_maps) goto hw_params; @@ -1062,16 +1061,16 @@ static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd, /* fixup cpu channel number */ if (ch_mask) - soc_pcm_codec_params_fixup(&cpu_params, ch_mask); + soc_pcm_codec_params_fixup(&tmp_params, ch_mask); hw_params: - ret = snd_soc_dai_hw_params(cpu_dai, substream, &cpu_params); + ret = snd_soc_dai_hw_params(cpu_dai, substream, &tmp_params); if (ret < 0) goto out; /* store the parameters for each DAI */ - soc_pcm_set_dai_params(cpu_dai, &cpu_params); - snd_soc_dapm_update_dai(substream, &cpu_params, cpu_dai); + soc_pcm_set_dai_params(cpu_dai, &tmp_params); + snd_soc_dapm_update_dai(substream, &tmp_params, cpu_dai); } ret = snd_soc_pcm_component_hw_params(substream, params); -- cgit From e616a916fe8431ebd5eb3cf4ac224d143c57083c Mon Sep 17 00:00:00 2001 From: Walt Holman Date: Sun, 10 Sep 2023 13:54:34 -0500 Subject: Add DMI ID for MSI Bravo 15 B7ED Signed-off-by: Walt Holman Link: https://lore.kernel.org/r/20230910185433.13677-1-waltholman09@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 3ec15b46fa35..59aa2e9d3a79 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -262,6 +262,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit From 18495676f7886e105133f1dc06c1d5e8d5436f32 Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 6 Sep 2023 13:32:54 -0500 Subject: spi: nxp-fspi: reset the FLSHxCR1 registers Reset the FLSHxCR1 registers to default value. ROM may set the register value and it affects the SPI NAND normal functions. Signed-off-by: Han Xu Link: https://lore.kernel.org/r/20230906183254.235847-1-han.xu@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 45a4acc95661..c964f41dcc42 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -1084,6 +1084,13 @@ static int nxp_fspi_default_setup(struct nxp_fspi *f) fspi_writel(f, FSPI_AHBCR_PREF_EN | FSPI_AHBCR_RDADDROPT, base + FSPI_AHBCR); + /* Reset the FLSHxCR1 registers. */ + reg = FSPI_FLSHXCR1_TCSH(0x3) | FSPI_FLSHXCR1_TCSS(0x3); + fspi_writel(f, reg, base + FSPI_FLSHA1CR1); + fspi_writel(f, reg, base + FSPI_FLSHA2CR1); + fspi_writel(f, reg, base + FSPI_FLSHB1CR1); + fspi_writel(f, reg, base + FSPI_FLSHB2CR1); + /* AHB Read - Set lut sequence ID for all CS. */ fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2); fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2); -- cgit From 6de8a70c84ee0586fdde4e671626b9caca6aed74 Mon Sep 17 00:00:00 2001 From: Valentin Caron Date: Wed, 6 Sep 2023 15:27:35 +0200 Subject: spi: stm32: add a delay before SPI disable As explained in errata sheet, in section "2.14.5 Truncation of SPI output signals after EOT event": On STM32MP1x, EOT interrupt can be thrown before the true end of communication. So we add a delay of a half period to wait the real end of the transmission. Link: https://www.st.com/resource/en/errata_sheet/es0539-stm32mp131x3x5x-device-errata-stmicroelectronics.pdf Signed-off-by: Valentin Caron Link: https://lore.kernel.org/r/20230906132735.748174-1-valentin.caron@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b6d66caba4c0..ef665f470c5b 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -277,6 +277,7 @@ struct stm32_spi_cfg { * @fifo_size: size of the embedded fifo in bytes * @cur_midi: master inter-data idleness in ns * @cur_speed: speed configured in Hz + * @cur_half_period: time of a half bit in us * @cur_bpw: number of bits in a single SPI data frame * @cur_fthlv: fifo threshold level (data frames in a single data packet) * @cur_comm: SPI communication mode @@ -304,6 +305,7 @@ struct stm32_spi { unsigned int cur_midi; unsigned int cur_speed; + unsigned int cur_half_period; unsigned int cur_bpw; unsigned int cur_fthlv; unsigned int cur_comm; @@ -468,6 +470,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, spi->cur_speed = spi->clk_rate / (1 << mbrdiv); + spi->cur_half_period = DIV_ROUND_CLOSEST(USEC_PER_SEC, 2 * spi->cur_speed); + return mbrdiv - 1; } @@ -709,6 +713,10 @@ static void stm32h7_spi_disable(struct stm32_spi *spi) return; } + /* Add a delay to make sure that transmission is ended. */ + if (spi->cur_half_period) + udelay(spi->cur_half_period); + if (spi->cur_usedma && spi->dma_tx) dmaengine_terminate_async(spi->dma_tx); if (spi->cur_usedma && spi->dma_rx) -- cgit From d929b2b7464f95ec01e47f560b1e687482ba8929 Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Mon, 21 Aug 2023 16:24:18 +0200 Subject: bus: ti-sysc: Use fsleep() instead of usleep_range() in sysc_reset() The am335x-evm started producing boot errors because of subtle timing changes: Unhandled fault: external abort on non-linefetch (0x1008) at 0xf03c1010 ... sysc_reset from sysc_probe+0xf60/0x1514 sysc_probe from platform_probe+0x5c/0xbc ... The fix consists in using the appropriate sleep function in sysc reset. For flexible sleeping, fsleep is recommended. Here, sysc delay parameter can take any value in [0 - 255] us range. As a result, fsleep() should be used, calling udelay() for a sysc delay lower than 10 us. Signed-off-by: Julien Panis Fixes: e709ed70d122 ("bus: ti-sysc: Fix missing reset delay handling") Message-ID: <20230821-fix-ti-sysc-reset-v1-1-5a0a5d8fae55@baylibre.com> Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index eb4e7bee1e20..cf09b6b88cf7 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2150,8 +2150,7 @@ static int sysc_reset(struct sysc *ddata) } if (ddata->cfg.srst_udelay) - usleep_range(ddata->cfg.srst_udelay, - ddata->cfg.srst_udelay * 2); + fsleep(ddata->cfg.srst_udelay); if (ddata->post_reset_quirk) ddata->post_reset_quirk(ddata); -- cgit From 11729caa520950e17cd81bc43ffc477c46cf791e Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 6 Sep 2023 18:34:42 -0500 Subject: bus: ti-sysc: Fix missing AM35xx SoC matching Commit feaa8baee82a ("bus: ti-sysc: Implement SoC revision handling") created a list of SoC types searching for strings based on names and wildcards which associates the SoC to different families. The OMAP34xx and OMAP35xx are treated as SOC_3430 while OMAP36xx and OMAP37xx are treated as SOC_3630, but the AM35xx isn't listed. The AM35xx is mostly an OMAP3430, and a later commit a12315d6d270 ("bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific") looks for the SOC type and behaves in a certain way if it's SOC_3430. This caused a regression on the AM3517 causing it to return two errors: ti-sysc: probe of 48318000.target-module failed with error -16 ti-sysc: probe of 49032000.target-module failed with error -16 Fix this by treating the creating SOC_AM35 and inserting it between the SOC_3430 and SOC_3630. If it is treaed the same way as the SOC_3430 when checking the status of sysc_check_active_timer, the error conditions will disappear. Fixes: a12315d6d270 ("bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific") Fixes: feaa8baee82a ("bus: ti-sysc: Implement SoC revision handling") Signed-off-by: Adam Ford Message-ID: <20230906233442.270835-1-aford173@gmail.com> Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index cf09b6b88cf7..33e8d780b04b 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -38,6 +38,7 @@ enum sysc_soc { SOC_2420, SOC_2430, SOC_3430, + SOC_AM35, SOC_3630, SOC_4430, SOC_4460, @@ -1862,7 +1863,7 @@ static void sysc_pre_reset_quirk_dss(struct sysc *ddata) dev_warn(ddata->dev, "%s: timed out %08x !+ %08x\n", __func__, val, irq_mask); - if (sysc_soc->soc == SOC_3430) { + if (sysc_soc->soc == SOC_3430 || sysc_soc->soc == SOC_AM35) { /* Clear DSS_SDI_CONTROL */ sysc_write(ddata, 0x44, 0); @@ -3024,6 +3025,7 @@ static void ti_sysc_idle(struct work_struct *work) static const struct soc_device_attribute sysc_soc_match[] = { SOC_FLAG("OMAP242*", SOC_2420), SOC_FLAG("OMAP243*", SOC_2430), + SOC_FLAG("AM35*", SOC_AM35), SOC_FLAG("OMAP3[45]*", SOC_3430), SOC_FLAG("OMAP3[67]*", SOC_3630), SOC_FLAG("OMAP443*", SOC_4430), @@ -3228,7 +3230,7 @@ static int sysc_check_active_timer(struct sysc *ddata) * can be dropped if we stop supporting old beagleboard revisions * A to B4 at some point. */ - if (sysc_soc->soc == SOC_3430) + if (sysc_soc->soc == SOC_3430 || sysc_soc->soc == SOC_AM35) error = -ENXIO; else error = -EBUSY; -- cgit From 61ba93b4353106f0f4ee5fdca2d6527e85abf884 Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Fri, 8 Sep 2023 16:10:40 +0800 Subject: selftests: ALSA: remove unused variables These variables are never referenced in the code, just remove them. Signed-off-by: Ding Xiang Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230908081040.197243-1-dingxiang@cmss.chinamobile.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/conf.c | 1 - tools/testing/selftests/alsa/mixer-test.c | 11 +++-------- tools/testing/selftests/alsa/pcm-test.c | 4 ++-- tools/testing/selftests/alsa/test-pcmtest-driver.c | 1 - 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index d7aafe5a1993..2f1685a3eae1 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -431,7 +431,6 @@ long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) { snd_config_t *cfg; - long l; int ret; if (!root) diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index c95d63e553f4..21e482b23f50 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -188,7 +188,7 @@ static int wait_for_event(struct ctl_data *ctl, int timeout) { unsigned short revents; snd_ctl_event_t *event; - int count, err; + int err; unsigned int mask = 0; unsigned int ev_id; @@ -430,7 +430,6 @@ static bool strend(const char *haystack, const char *needle) static void test_ctl_name(struct ctl_data *ctl) { bool name_ok = true; - bool check; ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem, ctl->name); @@ -863,7 +862,6 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, snd_ctl_elem_value_t *val) { int err; - long val_read; /* Ideally this will fail... */ err = snd_ctl_elem_write(ctl->card->handle, val); @@ -883,8 +881,7 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl) { - int err, i; - long val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -994,8 +991,7 @@ static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl) static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) { - int err, i; - unsigned int val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -1027,7 +1023,6 @@ static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) static void test_ctl_write_invalid(struct ctl_data *ctl) { bool pass; - int err; /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index 2f5e3c462194..c0a39818c5a4 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -257,7 +257,7 @@ static void find_pcms(void) static void test_pcm_time(struct pcm_data *data, enum test_class class, const char *test_name, snd_config_t *pcm_cfg) { - char name[64], key[128], msg[256]; + char name[64], msg[256]; const int duration_s = 2, margin_ms = 100; const int duration_ms = duration_s * 1000; const char *cs; @@ -567,7 +567,7 @@ int main(void) { struct card_data *card; struct pcm_data *pcm; - snd_config_t *global_config, *cfg, *pcm_cfg; + snd_config_t *global_config, *cfg; int num_pcm_tests = 0, num_tests, num_std_pcm_tests; int ret; void *thread_ret; diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index 357adc722cba..a52ecd43dbe3 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -313,7 +313,6 @@ TEST_F(pcmtest, ni_playback) { */ TEST_F(pcmtest, reset_ioctl) { snd_pcm_t *handle; - unsigned char *it; int test_res; struct pcmtest_test_params *params = &self->params; -- cgit From a0334bf78b95532cec54f56b53e8ae1bfe7e1ca1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 3 Sep 2023 22:23:25 +0000 Subject: acpi: Provide ia64 dummy implementation of acpi_proc_quirk_mwait_check() Commit 0a0e2ea642f6 ("ACPI: processor: Move MWAIT quirk out of acpi_processor.c") moved the MWAIT quirk code into arch/x86 but left calls to it in the ACPI PDC processor code that is shared with Itanium, breaking the latter build. Since the quirk is specific to a certain x86-based platform, stub out the function acpi_proc_quirk_mwait_check() when building for ia64. Fixes: 0a0e2ea642f6 ("ACPI: processor: Move MWAIT quirk out of acpi_processor.c") Signed-off-by: Ard Biesheuvel --- arch/ia64/kernel/acpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 15f6cfddcc08..41e8fe55cd98 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -907,3 +907,7 @@ EXPORT_SYMBOL(acpi_unregister_ioapic); * TBD when IA64 starts to support suspend... */ int acpi_suspend_lowlevel(void) { return 0; } + +void acpi_proc_quirk_mwait_check(void) +{ +} -- cgit From 57c0f4a8ea3a206c313bf6b0992f6fdc084e66e7 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 25 Aug 2023 14:05:13 +0200 Subject: xfs: fix select in config XFS_ONLINE_SCRUB_STATS Commit d7a74cad8f45 ("xfs: track usage statistics of online fsck") introduces config XFS_ONLINE_SCRUB_STATS, which selects the non-existing config FS_DEBUG. It is probably intended to select the existing config XFS_DEBUG. Fix the select in config XFS_ONLINE_SCRUB_STATS. Fixes: d7a74cad8f45 ("xfs: track usage statistics of online fsck") Signed-off-by: Lukas Bulwahn Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index c9d653168ad0..ed0bc8cbc703 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select FS_DEBUG + select XFS_DEBUG help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number -- cgit From 23a3bfd4ba7acd36abf52b78605f61b21bdac216 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Sep 2023 19:04:45 +0200 Subject: netfilter: nf_tables: disallow element removal on anonymous sets Anonymous sets need to be populated once at creation and then they are bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound anonymous set before commit phase"), otherwise transaction reports EINVAL. Userspace does not need to delete elements of anonymous sets that are not yet bound, reject this with EOPNOTSUPP. From flush command path, skip anonymous sets, they are expected to be bound already. Otherwise, EINVAL is hit at the end of this transaction for unbound sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b59311931fb..c1e485aee763 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1446,8 +1446,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -7191,8 +7190,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && - (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); -- cgit From c3638b851bc1ca0022dca9d6ca4beaa6ef03a216 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Sat, 12 Aug 2023 02:18:18 +0530 Subject: firmware: arm_scmi: Fixup perf power-cost/microwatt support The perf power scale value would currently be reported as bogowatts if the platform firmware supports microwatt power scale and meets the perf major version requirements. Fix this by populating version information in the driver private data before the call to protocol attributes is made. CC: Chandra Sekhar Lingutla Fixes: 3630cd8130ce ("firmware: arm_scmi: Add SCMI v3.1 perf power-cost in microwatts") Signed-off-by: Sibi Sankar Reviewed-by: Cristian Marussi Link: https://lore.kernel.org/r/20230811204818.30928-1-quic_sibis@quicinc.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index c0cd556fbaae..30dedd6ebfde 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -1080,6 +1080,8 @@ static int scmi_perf_protocol_init(const struct scmi_protocol_handle *ph) if (!pinfo) return -ENOMEM; + pinfo->version = version; + ret = scmi_perf_attributes_get(ph, pinfo); if (ret) return ret; @@ -1104,8 +1106,6 @@ static int scmi_perf_protocol_init(const struct scmi_protocol_handle *ph) if (ret) return ret; - pinfo->version = version; - return ph->set_priv(ph, pinfo); } -- cgit From 234249d88b091d006b82f8d570343aae5f383736 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 25 Aug 2023 03:00:55 -0400 Subject: wifi: cfg80211/mac80211: hold link BSSes when assoc fails for MLO connection When connect to MLO AP with more than one link, and the assoc response of AP is not success, then cfg80211_unhold_bss() is not called for all the links' cfg80211_bss except the primary link which means the link used by the latest successful association request. Thus the hold value of the cfg80211_bss is not reset to 0 after the assoc fail, and then the __cfg80211_unlink_bss() will not be called for the cfg80211_bss by __cfg80211_bss_expire(). Then the AP always looks exist even the AP is shutdown or reconfigured to another type, then it will lead error while connecting it again. The detail info are as below. When connect with muti-links AP, cfg80211_hold_bss() is called by cfg80211_mlme_assoc() for each cfg80211_bss of all the links. When assoc response from AP is not success(such as status_code==1), the ieee80211_link_data of non-primary link(sdata->link[link_id]) is NULL because ieee80211_assoc_success()->ieee80211_vif_update_links() is not called for the links. Then struct cfg80211_rx_assoc_resp resp in cfg80211_rx_assoc_resp() and struct cfg80211_connect_resp_params cr in __cfg80211_connect_result() will only have the data of the primary link, and finally function cfg80211_connect_result_release_bsses() only call cfg80211_unhold_bss() for the primary link. Then cfg80211_bss of the other links will never free because its hold is always > 0 now. Hence assign value for the bss and status from assoc_data since it is valid for this case. Also assign value of addr from assoc_data when the link is NULL because the addrs of assoc_data and link both represent the local link addr and they are same value for success connection. Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link") Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20230825070055.28164-1-quic_wgong@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- net/mac80211/mlme.c | 11 ++++++----- net/wireless/mlme.c | 3 ++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3a4b684f89bf..ed3bc2a78d82 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7231,7 +7231,7 @@ struct cfg80211_rx_assoc_resp { int uapsd_queues; const u8 *ap_mld_addr; struct { - const u8 *addr; + u8 addr[ETH_ALEN] __aligned(2); struct cfg80211_bss *bss; u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f93eb38ae0b8..46d46cfab6c8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5429,17 +5429,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link; - link = sdata_dereference(sdata->link[link_id], sdata); - if (!link) - continue; - if (!assoc_data->link[link_id].bss) continue; resp.links[link_id].bss = assoc_data->link[link_id].bss; - resp.links[link_id].addr = link->conf->addr; + ether_addr_copy(resp.links[link_id].addr, + assoc_data->link[link_id].addr); resp.links[link_id].status = assoc_data->link[link_id].status; + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + /* get uapsd queues configuration - same for all links */ resp.uapsd_queues = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 775cac4d6100..3e2c398abddc 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -52,7 +52,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, cr.links[link_id].bssid = data->links[link_id].bss->bssid; cr.links[link_id].addr = data->links[link_id].addr; /* need to have local link addresses for MLO connections */ - WARN_ON(cr.ap_mld_addr && !cr.links[link_id].addr); + WARN_ON(cr.ap_mld_addr && + !is_valid_ether_addr(cr.links[link_id].addr)); BUG_ON(!cr.links[link_id].bss->channel); -- cgit From 5112fa502708aaaf80acb78273fc8625f221eb11 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 5 Sep 2023 12:18:57 +0530 Subject: wifi: cfg80211: validate AP phy operation before starting it Many regulatories can have HE/EHT Operation as not permitted. In such cases, AP should not be allowed to start if it is using a channel having the no operation flag set. However, currently there is no such check in place. Fix this issue by validating such IEs sent during start AP against the channel flags. Signed-off-by: Aditya Kumar Singh Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230905064857.1503-1-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index de47838aca4f..0c989a839e56 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5909,6 +5909,21 @@ out: nlmsg_free(msg); } +static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params) +{ + struct ieee80211_channel *channel = params->chandef.chan; + + if ((params->he_cap || params->he_oper) && + (channel->flags & IEEE80211_CHAN_NO_HE)) + return -EOPNOTSUPP; + + if ((params->eht_cap || params->eht_oper) && + (channel->flags & IEEE80211_CHAN_NO_EHT)) + return -EOPNOTSUPP; + + return 0; +} + static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6178,6 +6193,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) goto out_unlock; + err = nl80211_validate_ap_phy_operation(params); + if (err) + goto out_unlock; + if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) params->flags = nla_get_u32( info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); -- cgit From 2d4caa1dbe915654d0e8845758d9c96e721377a8 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 5 Sep 2023 16:29:57 +0300 Subject: iwlwifi: mvm: handle PS changes in vif_cfg_changed Handling of BSS_CHANGED_PS was missing in vif_cfg_changed callback. Fix it. Fixes: 22c588343529 ("wifi: iwlwifi: mvm: replace bss_info_changed() with vif_cfg/link_info_changed()") Reported-by: Sultan Alsawaf Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230905162939.5ef0c8230de6.Ieed265014988c50ec68fbff6d33821e4215f987f@changeid [note: patch looks bigger than it is due to reindentation] Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 121 +++++++++++---------- 1 file changed, 63 insertions(+), 58 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 8b6c641772ee..b719843e9457 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -731,73 +731,78 @@ static void iwl_mvm_mld_vif_cfg_changed_station(struct iwl_mvm *mvm, mvmvif->associated = vif->cfg.assoc; - if (!(changes & BSS_CHANGED_ASSOC)) - return; - - if (vif->cfg.assoc) { - /* clear statistics to get clean beacon counter */ - iwl_mvm_request_statistics(mvm, true); - iwl_mvm_sf_update(mvm, vif, false); - iwl_mvm_power_vif_assoc(mvm, vif); - - for_each_mvm_vif_valid_link(mvmvif, i) { - memset(&mvmvif->link[i]->beacon_stats, 0, - sizeof(mvmvif->link[i]->beacon_stats)); + if (changes & BSS_CHANGED_ASSOC) { + if (vif->cfg.assoc) { + /* clear statistics to get clean beacon counter */ + iwl_mvm_request_statistics(mvm, true); + iwl_mvm_sf_update(mvm, vif, false); + iwl_mvm_power_vif_assoc(mvm, vif); + + for_each_mvm_vif_valid_link(mvmvif, i) { + memset(&mvmvif->link[i]->beacon_stats, 0, + sizeof(mvmvif->link[i]->beacon_stats)); + + if (vif->p2p) { + iwl_mvm_update_smps(mvm, vif, + IWL_MVM_SMPS_REQ_PROT, + IEEE80211_SMPS_DYNAMIC, i); + } + + rcu_read_lock(); + link_conf = rcu_dereference(vif->link_conf[i]); + if (link_conf && !link_conf->dtim_period) + protect = true; + rcu_read_unlock(); + } - if (vif->p2p) { - iwl_mvm_update_smps(mvm, vif, - IWL_MVM_SMPS_REQ_PROT, - IEEE80211_SMPS_DYNAMIC, i); + if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && + protect) { + /* If we're not restarting and still haven't + * heard a beacon (dtim period unknown) then + * make sure we still have enough minimum time + * remaining in the time event, since the auth + * might actually have taken quite a while + * (especially for SAE) and so the remaining + * time could be small without us having heard + * a beacon yet. + */ + iwl_mvm_protect_assoc(mvm, vif, 0); } - rcu_read_lock(); - link_conf = rcu_dereference(vif->link_conf[i]); - if (link_conf && !link_conf->dtim_period) - protect = true; - rcu_read_unlock(); - } + iwl_mvm_sf_update(mvm, vif, false); + + /* FIXME: need to decide about misbehaving AP handling */ + iwl_mvm_power_vif_assoc(mvm, vif); + } else if (iwl_mvm_mld_vif_have_valid_ap_sta(mvmvif)) { + iwl_mvm_mei_host_disassociated(mvm); - if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && - protect) { - /* If we're not restarting and still haven't - * heard a beacon (dtim period unknown) then - * make sure we still have enough minimum time - * remaining in the time event, since the auth - * might actually have taken quite a while - * (especially for SAE) and so the remaining - * time could be small without us having heard - * a beacon yet. + /* If update fails - SF might be running in associated + * mode while disassociated - which is forbidden. */ - iwl_mvm_protect_assoc(mvm, vif, 0); + ret = iwl_mvm_sf_update(mvm, vif, false); + WARN_ONCE(ret && + !test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status), + "Failed to update SF upon disassociation\n"); + + /* If we get an assert during the connection (after the + * station has been added, but before the vif is set + * to associated), mac80211 will re-add the station and + * then configure the vif. Since the vif is not + * associated, we would remove the station here and + * this would fail the recovery. + */ + iwl_mvm_mld_vif_delete_all_stas(mvm, vif); } - iwl_mvm_sf_update(mvm, vif, false); - - /* FIXME: need to decide about misbehaving AP handling */ - iwl_mvm_power_vif_assoc(mvm, vif); - } else if (iwl_mvm_mld_vif_have_valid_ap_sta(mvmvif)) { - iwl_mvm_mei_host_disassociated(mvm); - - /* If update fails - SF might be running in associated - * mode while disassociated - which is forbidden. - */ - ret = iwl_mvm_sf_update(mvm, vif, false); - WARN_ONCE(ret && - !test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, - &mvm->status), - "Failed to update SF upon disassociation\n"); - - /* If we get an assert during the connection (after the - * station has been added, but before the vif is set - * to associated), mac80211 will re-add the station and - * then configure the vif. Since the vif is not - * associated, we would remove the station here and - * this would fail the recovery. - */ - iwl_mvm_mld_vif_delete_all_stas(mvm, vif); + iwl_mvm_bss_info_changed_station_assoc(mvm, vif, changes); } - iwl_mvm_bss_info_changed_station_assoc(mvm, vif, changes); + if (changes & BSS_CHANGED_PS) { + ret = iwl_mvm_power_update_mac(mvm); + if (ret) + IWL_ERR(mvm, "failed to update power mode\n"); + } } static void -- cgit From dc77721ea4aa1e8937e2436f230b5a69065cc508 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 13 Jun 2023 23:31:50 +0200 Subject: power: supply: ab8500: Set typing and props I had the following weird phenomena on a mobile phone: while the capacity in /sys/class/power_supply/ab8500_fg/capacity would reflect the actual charge and capacity of the battery, only 1/3 of the value was shown on the battery status indicator and warnings for low battery appeared. It turns out that UPower, the Freedesktop power daemon, will average all the power supplies of type "battery" in /sys/class/power_supply/* if there is more than one battery. For the AB8500, there was "battery" ab8500_fg, ab8500_btemp and ab8500_chargalg. The latter two don't know anything about the battery, and should not be considered. They were however averaged and with the capacity of 0. Flag ab8500_btemp and ab8500_chargalg with type "unknown" so they are not averaged as batteries. Remove the technology prop from ab8500_btemp as well, all it does is snoop in on knowledge from another supply. After this the battery indicator shows the right value. Cc: Stefan Hansson Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/ab8500_btemp.c | 9 +-------- drivers/power/supply/ab8500_chargalg.c | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c index 6f83e99d2eb7..ce36d6ca3422 100644 --- a/drivers/power/supply/ab8500_btemp.c +++ b/drivers/power/supply/ab8500_btemp.c @@ -115,7 +115,6 @@ struct ab8500_btemp { static enum power_supply_property ab8500_btemp_props[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, - POWER_SUPPLY_PROP_TECHNOLOGY, POWER_SUPPLY_PROP_TEMP, }; @@ -532,12 +531,6 @@ static int ab8500_btemp_get_property(struct power_supply *psy, else val->intval = 1; break; - case POWER_SUPPLY_PROP_TECHNOLOGY: - if (di->bm->bi) - val->intval = di->bm->bi->technology; - else - val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN; - break; case POWER_SUPPLY_PROP_TEMP: val->intval = ab8500_btemp_get_temp(di); break; @@ -662,7 +655,7 @@ static char *supply_interface[] = { static const struct power_supply_desc ab8500_btemp_desc = { .name = "ab8500_btemp", - .type = POWER_SUPPLY_TYPE_BATTERY, + .type = POWER_SUPPLY_TYPE_UNKNOWN, .properties = ab8500_btemp_props, .num_properties = ARRAY_SIZE(ab8500_btemp_props), .get_property = ab8500_btemp_get_property, diff --git a/drivers/power/supply/ab8500_chargalg.c b/drivers/power/supply/ab8500_chargalg.c index ea4ad61d4c7e..2205ea0834a6 100644 --- a/drivers/power/supply/ab8500_chargalg.c +++ b/drivers/power/supply/ab8500_chargalg.c @@ -1720,7 +1720,7 @@ static char *supply_interface[] = { static const struct power_supply_desc ab8500_chargalg_desc = { .name = "ab8500_chargalg", - .type = POWER_SUPPLY_TYPE_BATTERY, + .type = POWER_SUPPLY_TYPE_UNKNOWN, .properties = ab8500_chargalg_props, .num_properties = ARRAY_SIZE(ab8500_chargalg_props), .get_property = ab8500_chargalg_get_property, -- cgit From 46a9ea6681907a3be6b6b0d43776dccc62cad6cf Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Fri, 8 Sep 2023 19:06:49 -0400 Subject: mm/slab_common: fix slab_caches list corruption after kmem_cache_destroy() After the commit in Fixes:, if a module that created a slab cache does not release all of its allocated objects before destroying the cache (at rmmod time), we might end up releasing the kmem_cache object without removing it from the slab_caches list thus corrupting the list as kmem_cache_destroy() ignores the return value from shutdown_cache(), which in turn never removes the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails to release all of the cache's slabs. This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y as after that ill release the system will immediately trip on list_add, or list_del, assertions similar to the one shown below as soon as another kmem_cache gets created, or destroyed: [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268) [ 1041.219165] ------------[ cut here ]------------ [ 1041.221517] kernel BUG at lib/list_debug.c:62! [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G B W OE 6.5.0 #15 [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023 [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0 Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y, is to set slub_debug to poison the released objects and then just run cat /proc/slabinfo after removing the module that leaks slab objects, in which case the kernel will panic: [ 50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI [ 50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G B W OE 6.5.0 #15 [ 50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023 [ 50.972663] RIP: 0010:get_slabinfo+0x42/0xf0 This patch fixes this issue by properly checking shutdown_cache()'s return value before taking the kmem_cache_release() branch. Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock") Signed-off-by: Rafael Aquini Cc: stable@vger.kernel.org Reviewed-by: Waiman Long Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 01cdbf122463..e99e821065c3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s) { - int refcnt; + int err = -EBUSY; bool rcu_set; if (unlikely(!s) || !kasan_check_byte(s)) @@ -490,17 +490,17 @@ void kmem_cache_destroy(struct kmem_cache *s) rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU; - refcnt = --s->refcount; - if (refcnt) + s->refcount--; + if (s->refcount) goto out_unlock; - WARN(shutdown_cache(s), - "%s %s: Slab cache still has objects when called from %pS", + err = shutdown_cache(s); + WARN(err, "%s %s: Slab cache still has objects when called from %pS", __func__, s->name, (void *)_RET_IP_); out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); - if (!refcnt && !rcu_set) + if (!err && !rcu_set) kmem_cache_release(s); } EXPORT_SYMBOL(kmem_cache_destroy); -- cgit From e193b7955dfad68035b983a0011f4ef3590c85eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Aug 2023 13:57:27 -0700 Subject: RDMA/srp: Do not call scsi_done() from srp_abort() After scmd_eh_abort_handler() has called the SCSI LLD eh_abort_handler callback, it performs one of the following actions: * Call scsi_queue_insert(). * Call scsi_finish_command(). * Call scsi_eh_scmd_add(). Hence, SCSI abort handlers must not call scsi_done(). Otherwise all the above actions would trigger a use-after-free. Hence remove the scsi_done() call from srp_abort(). Keep the srp_free_req() call before returning SUCCESS because we may not see the command again if SUCCESS is returned. Cc: Bob Pearson Cc: Shinichiro Kawasaki Fixes: d8536670916a ("IB/srp: Avoid having aborted requests hang") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230823205727.505681-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1574218764e0..2916e77f589b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2784,7 +2784,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; - int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); @@ -2798,19 +2797,14 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "Sending SRP abort for tag %#x\n", tag); if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, - SRP_TSK_ABORT_TASK, NULL) == 0) - ret = SUCCESS; - else if (target->rport->state == SRP_RPORT_LOST) - ret = FAST_IO_FAIL; - else - ret = FAILED; - if (ret == SUCCESS) { + SRP_TSK_ABORT_TASK, NULL) == 0) { srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scsi_done(scmnd); + return SUCCESS; } + if (target->rport->state == SRP_RPORT_LOST) + return FAST_IO_FAIL; - return ret; + return FAILED; } static int srp_reset_device(struct scsi_cmnd *scmnd) -- cgit From 53a3f777049771496f791504e7dc8ef017cba590 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 5 Sep 2023 16:58:22 +0200 Subject: RDMA/siw: Fix connection failure handling In case immediate MPA request processing fails, the newly created endpoint unlinks the listening endpoint and is ready to be dropped. This special case was not handled correctly by the code handling the later TCP socket close, causing a NULL dereference crash in siw_cm_work_handler() when dereferencing a NULL listener. We now also cancel the useless MPA timeout, if immediate MPA request processing fails. This patch furthermore simplifies MPA processing in general: Scheduling a useless TCP socket read in sk_data_ready() upcall is now surpressed, if the socket is already moved out of TCP_ESTABLISHED state. Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20230905145822.446263-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index a2605178f4ed..43e776073f49 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -976,6 +976,7 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_put(cep); new_cep->listen_cep = NULL; if (rv) { + siw_cancel_mpatimer(new_cep); siw_cep_set_free(new_cep); goto error; } @@ -1100,9 +1101,12 @@ static void siw_cm_work_handler(struct work_struct *w) /* * Socket close before MPA request received. */ - siw_dbg_cep(cep, "no mpareq: drop listener\n"); - siw_cep_put(cep->listen_cep); - cep->listen_cep = NULL; + if (cep->listen_cep) { + siw_dbg_cep(cep, + "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } } } release_cep = 1; @@ -1227,7 +1231,11 @@ static void siw_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - siw_dbg_cep(cep, "state: %d\n", cep->state); + siw_dbg_cep(cep, "cep state: %d, socket state %d\n", + cep->state, sk->sk_state); + + if (sk->sk_state != TCP_ESTABLISHED) + goto out; switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: -- cgit From 8fb8a82086f5bda6893ea6557c5a458e4549c6d7 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Tue, 5 Sep 2023 15:40:48 +0300 Subject: RDMA/cxgb4: Check skb value for failure to allocate get_skb() can fail to allocate skb, so check it. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5be78ee924ae ("RDMA/cxgb4: Fix LE hash collision bug for active open connection") Signed-off-by: Artem Chernyshev Link: https://lore.kernel.org/r/20230905124048.284165-1-artem.chernyshev@red-soft.ru Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/cxgb4/cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ced615b5ea09..040ba2224f9f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1965,6 +1965,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) int win; skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + req = __skb_put_zero(skb, sizeof(*req)); req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); -- cgit From c489800e0d48097fc6afebd862c6afa039110a36 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Tue, 5 Sep 2023 18:32:58 +0800 Subject: RDMA/uverbs: Fix typo of sizeof argument Since size of 'hdr' pointer and '*hdr' structure is equal on 64-bit machines issue probably didn't cause any wrong behavior. But anyway, fixing of typo is required. Fixes: da0f60df7bd5 ("RDMA/uverbs: Prohibit write() calls with too small buffers") Co-developed-by: Ivanov Mikhail Signed-off-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Link: https://lore.kernel.org/r/20230905103258.1738246-1-konstantin.meskhidze@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index bf800f8cb3e4..495d5a5d0373 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -546,7 +546,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; - if (count < method_elm->req_size + sizeof(hdr)) { + if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't -- cgit From 2251588143f65636cf3f3f12beb009084fa2d5d7 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 21 Aug 2023 13:33:12 +0900 Subject: reiserfs: Replace 1-element array with C99 style flex-array UBSAN found the following issue: ================================================================================ UBSAN: array-index-out-of-bounds in fs/reiserfs/journal.c:4166:22 index 1 is out of range for type '__le32 [1]' This is because struct reiserfs_journal_desc uses 1-element array for dynamically sized array member, j_realblock. This patch fixes this issue by replacing the 1-element array member with C99 style flex-array. This patch also fixes the same issue in struct reiserfs_journal_commit as the same manner. Fixes: f466c6fdb3b1 ("move private bits of reiserfs_fs.h to fs/reiserfs/reiserfs.h") Signed-off-by: Shigeru Yoshida Message-Id: <20230821043312.1444068-1-syoshida@redhat.com> Signed-off-by: Christian Brauner --- fs/reiserfs/reiserfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index b81749492ef9..7d12b8c5b2fa 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2699,7 +2699,7 @@ struct reiserfs_iget_args { #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) #define journal_trans_half(blocksize) \ - ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32)) + ((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32)) /* journal.c see journal.c for all the comments here */ @@ -2711,7 +2711,7 @@ struct reiserfs_journal_desc { __le32 j_len; __le32 j_mount_id; /* mount id of this trans */ - __le32 j_realblock[1]; /* real locations for each block */ + __le32 j_realblock[]; /* real locations for each block */ }; #define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) @@ -2726,7 +2726,7 @@ struct reiserfs_journal_desc { struct reiserfs_journal_commit { __le32 j_trans_id; /* must match j_trans_id from the desc block */ __le32 j_len; /* ditto */ - __le32 j_realblock[1]; /* real locations for each block */ + __le32 j_realblock[]; /* real locations for each block */ }; #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) -- cgit From 6223e073db78458f8846c380ccd224a7a73a3867 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 11 Sep 2023 14:42:47 +0200 Subject: regulator: Fix voltage range selection Use the correct field to fix wrong voltage range selection on regulators such as tps6287x since the blamed commit. Fixes: 269cb04b601d ("regulator: Use bitfield values for range selectors") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20230911-regulator-voltage-sel-v1-1-886eb1ade8d8@axis.com Signed-off-by: Mark Brown --- drivers/regulator/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index 5ad5f3b3a6b5..d49268336553 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -197,7 +197,7 @@ int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev, sel += rdev->desc->linear_ranges[i].min_sel; range = rdev->desc->linear_range_selectors_bitfield[i]; - range <<= ffs(rdev->desc->vsel_mask) - 1; + range <<= ffs(rdev->desc->vsel_range_mask) - 1; if (rdev->desc->vsel_reg == rdev->desc->vsel_range_reg) { ret = regmap_update_bits(rdev->regmap, -- cgit From a6e414a4cb5ab320747b913b0897d1159fa978eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Sep 2023 10:28:50 -0300 Subject: perf tools: Update copy of libbpf's hashmap.c To pick the changes in: a3e7e6b17946f48b ("libbpf: Remove HASHMAP_INIT static initialization helper") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index 0a5bf1937a7c..c12f8320e668 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -80,16 +80,6 @@ struct hashmap { size_t sz; }; -#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ - .hash_fn = (hash_fn), \ - .equal_fn = (equal_fn), \ - .ctx = (ctx), \ - .buckets = NULL, \ - .cap = 0, \ - .cap_bits = 0, \ - .sz = 0, \ -} - void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, hashmap_equal_fn equal_fn, void *ctx); struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, -- cgit From 00078e834e110a29cecd22ac5fbc49acbc839c44 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 25 Aug 2023 17:27:06 +0800 Subject: pinctrl: lantiq: Remove unsued declaration ltq_pinctrl_unregister() Commit 3f8c50c9b110 ("OF: pinctrl: MIPS: lantiq: implement lantiq/xway pinctrl support") declared but never implemented it, so can be removed. Signed-off-by: YueHaibing Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230825092706.14680-1-yuehaibing@huawei.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-lantiq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-lantiq.h b/drivers/pinctrl/pinctrl-lantiq.h index efb25fc34f14..1ac49ae638de 100644 --- a/drivers/pinctrl/pinctrl-lantiq.h +++ b/drivers/pinctrl/pinctrl-lantiq.h @@ -198,5 +198,4 @@ enum ltq_pin { extern int ltq_pinctrl_register(struct platform_device *pdev, struct ltq_pinmux_info *info); -extern int ltq_pinctrl_unregister(struct platform_device *pdev); #endif /* __PINCTRL_LANTIQ_H */ -- cgit From 424c82e8ad56756bb98b08268ffcf68d12d183eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Jun 2023 11:03:34 +0200 Subject: wifi: iwlwifi: dbg_ini: fix structure packing The iwl_fw_ini_error_dump_range structure has conflicting alignment requirements for the inner union and the outer struct: In file included from drivers/net/wireless/intel/iwlwifi/fw/dbg.c:9: drivers/net/wireless/intel/iwlwifi/fw/error-dump.h:312:2: error: field within 'struct iwl_fw_ini_error_dump_range' is less aligned than 'union iwl_fw_ini_error_dump_range::(anonymous at drivers/net/wireless/intel/iwlwifi/fw/error-dump.h:312:2)' and is usually due to 'struct iwl_fw_ini_error_dump_range' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] union { As the original intention was apparently to make the entire structure unaligned, mark the innermost members the same way so the union becomes packed as well. Fixes: 973193554cae6 ("iwlwifi: dbg_ini: dump headers cleanup") Signed-off-by: Arnd Bergmann Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/20230616090343.2454061-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/error-dump.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h index f5e08988dc7b..06d6f7f66430 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h @@ -310,9 +310,9 @@ struct iwl_fw_ini_fifo_hdr { struct iwl_fw_ini_error_dump_range { __le32 range_data_size; union { - __le32 internal_base_addr; - __le64 dram_base_addr; - __le32 page_num; + __le32 internal_base_addr __packed; + __le64 dram_base_addr __packed; + __le32 page_num __packed; struct iwl_fw_ini_fifo_hdr fifo_hdr; struct iwl_cmd_header fw_pkt_hdr; }; -- cgit From e8fbe99e87877f0412655f40d7c45bf8471470ac Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 8 Aug 2023 13:56:05 -0700 Subject: wifi: iwlwifi: Ensure ack flag is properly cleared. Debugging indicates that nothing else is clearing the info->flags, so some frames were flagged as ACKed when they should not be. Explicitly clear the ack flag to ensure this does not happen. Signed-off-by: Ben Greear Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/20230808205605.4105670-1-greearb@candelatech.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 36d70d589aed..898dca393643 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1612,6 +1612,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]); memset(&info->status, 0, sizeof(info->status)); + info->flags &= ~(IEEE80211_TX_STAT_ACK | IEEE80211_TX_STAT_TX_FILTERED); /* inform mac80211 about what happened with the frame */ switch (status & TX_STATUS_MSK) { @@ -1964,6 +1965,8 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, */ if (!is_flush) info->flags |= IEEE80211_TX_STAT_ACK; + else + info->flags &= ~IEEE80211_TX_STAT_ACK; } /* -- cgit From 8ba438ef3cacc4808a63ed0ce24d4f0942cfe55d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 23 Jul 2023 22:24:59 +0200 Subject: wifi: iwlwifi: mvm: Fix a memory corruption issue A few lines above, space is kzalloc()'ed for: sizeof(struct iwl_nvm_data) + sizeof(struct ieee80211_channel) + sizeof(struct ieee80211_rate) 'mvm->nvm_data' is a 'struct iwl_nvm_data', so it is fine. At the end of this structure, there is the 'channels' flex array. Each element is of type 'struct ieee80211_channel'. So only 1 element is allocated in this array. When doing: mvm->nvm_data->bands[0].channels = mvm->nvm_data->channels; We point at the first element of the 'channels' flex array. So this is fine. However, when doing: mvm->nvm_data->bands[0].bitrates = (void *)((u8 *)mvm->nvm_data->channels + 1); because of the "(u8 *)" cast, we add only 1 to the address of the beginning of the flex array. It is likely that we want point at the 'struct ieee80211_rate' allocated just after. Remove the spurious casting so that the pointer arithmetic works as expected. Fixes: 8ca151b568b6 ("iwlwifi: add the MVM driver") Signed-off-by: Christophe JAILLET Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/23f0ec986ef1529055f4f93dcb3940a6cf8d9a94.1690143750.git.christophe.jaillet@wanadoo.fr Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 1f5db65a088d..1d5ee4330f29 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -802,7 +802,7 @@ out: mvm->nvm_data->bands[0].n_channels = 1; mvm->nvm_data->bands[0].n_bitrates = 1; mvm->nvm_data->bands[0].bitrates = - (void *)((u8 *)mvm->nvm_data->channels + 1); + (void *)(mvm->nvm_data->channels + 1); mvm->nvm_data->bands[0].bitrates->hw_value = 10; } -- cgit From 37c20b2effe987b806c8de6d12978e4ffeff026f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Aug 2023 15:38:04 +0200 Subject: wifi: cfg80211: fix cqm_config access race Max Schulze reports crashes with brcmfmac. The reason seems to be a race between userspace removing the CQM config and the driver calling cfg80211_cqm_rssi_notify(), where if the data is freed while cfg80211_cqm_rssi_notify() runs it will crash since it assumes wdev->cqm_config is set. This can't be fixed with a simple non-NULL check since there's nothing we can do for locking easily, so use RCU instead to protect the pointer, but that requires pulling the updates out into an asynchronous worker so they can sleep and call back into the driver. Since we need to change the free anyway, also change it to go back to the old settings if changing the settings fails. Reported-and-tested-by: Max Schulze Closes: https://lore.kernel.org/r/ac96309a-8d8d-4435-36e6-6d152eb31876@online.de Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM") Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +- net/wireless/core.c | 14 ++++---- net/wireless/core.h | 7 ++-- net/wireless/nl80211.c | 93 +++++++++++++++++++++++++++++++++----------------- 4 files changed, 75 insertions(+), 42 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed3bc2a78d82..aebfa54d547a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6013,7 +6013,8 @@ struct wireless_dev { } wext; #endif - struct cfg80211_cqm_config *cqm_config; + struct wiphy_work cqm_rssi_work; + struct cfg80211_cqm_config __rcu *cqm_config; struct list_head pmsr_list; spinlock_t pmsr_lock; diff --git a/net/wireless/core.c b/net/wireless/core.c index 25bc2e50a061..64e861617110 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1181,16 +1181,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason); -void cfg80211_cqm_config_free(struct wireless_dev *wdev) -{ - kfree(wdev->cqm_config); - wdev->cqm_config = NULL; -} - static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, bool unregister_netdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_cqm_config *cqm_config; unsigned int link_id; ASSERT_RTNL(); @@ -1227,7 +1222,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, kfree_sensitive(wdev->wext.keys); wdev->wext.keys = NULL; #endif - cfg80211_cqm_config_free(wdev); + wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work); + /* deleted from the list, so can't be found from nl80211 any more */ + cqm_config = rcu_access_pointer(wdev->cqm_config); + kfree_rcu(cqm_config, rcu_head); /* * Ensure that all events have been processed and @@ -1379,6 +1377,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif + wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work); + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) wdev->ps = true; else diff --git a/net/wireless/core.h b/net/wireless/core.h index 507d184b8b40..ba9c7170afa4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -295,12 +295,17 @@ struct cfg80211_beacon_registration { }; struct cfg80211_cqm_config { + struct rcu_head rcu_head; u32 rssi_hyst; s32 last_rssi_event_value; + enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; +void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, + struct wiphy_work *work); + void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); /* free object */ @@ -566,8 +571,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif -void cfg80211_cqm_config_free(struct wireless_dev *wdev); - void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid); void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev); void cfg80211_pmsr_free_wk(struct work_struct *work); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0c989a839e56..7a88361b3414 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12815,7 +12815,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info, } static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, + struct cfg80211_cqm_config *cqm_config) { struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; @@ -12824,7 +12825,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int err; /* RSSI reporting disabled? */ - if (!wdev->cqm_config) + if (!cqm_config) return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); /* @@ -12833,7 +12834,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, * connection is established and enough beacons received to calculate * the average. */ - if (!wdev->cqm_config->last_rssi_event_value && + if (!cqm_config->last_rssi_event_value && wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; @@ -12847,30 +12848,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, cfg80211_sinfo_release_content(&sinfo); if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) - wdev->cqm_config->last_rssi_event_value = + cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; } - last = wdev->cqm_config->last_rssi_event_value; - hyst = wdev->cqm_config->rssi_hyst; - n = wdev->cqm_config->n_rssi_thresholds; + last = cqm_config->last_rssi_event_value; + hyst = cqm_config->rssi_hyst; + n = cqm_config->n_rssi_thresholds; for (i = 0; i < n; i++) { i = array_index_nospec(i, n); - if (last < wdev->cqm_config->rssi_thresholds[i]) + if (last < cqm_config->rssi_thresholds[i]) break; } low_index = i - 1; if (low_index >= 0) { low_index = array_index_nospec(low_index, n); - low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + low = cqm_config->rssi_thresholds[low_index] - hyst; } else { low = S32_MIN; } if (i < n) { i = array_index_nospec(i, n); - high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + high = cqm_config->rssi_thresholds[i] + hyst - 1; } else { high = S32_MAX; } @@ -12883,6 +12884,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_cqm_config *cqm_config = NULL, *old; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; int i, err; @@ -12900,10 +12902,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - wdev_lock(wdev); - cfg80211_cqm_config_free(wdev); - wdev_unlock(wdev); - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); @@ -12920,9 +12918,10 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, n_thresholds = 0; wdev_lock(wdev); - if (n_thresholds) { - struct cfg80211_cqm_config *cqm_config; + old = rcu_dereference_protected(wdev->cqm_config, + lockdep_is_held(&wdev->mtx)); + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); @@ -12937,11 +12936,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); - wdev->cqm_config = cqm_config; + rcu_assign_pointer(wdev->cqm_config, cqm_config); + } else { + RCU_INIT_POINTER(wdev->cqm_config, NULL); } - err = cfg80211_cqm_rssi_update(rdev, dev); - + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + if (err) { + rcu_assign_pointer(wdev->cqm_config, old); + kfree_rcu(cqm_config, rcu_head); + } else { + kfree_rcu(old, rcu_head); + } unlock: wdev_unlock(wdev); @@ -19092,9 +19098,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level, gfp_t gfp) { - struct sk_buff *msg; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_cqm_config *cqm_config; trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); @@ -19102,18 +19107,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; - if (wdev->cqm_config) { - wdev->cqm_config->last_rssi_event_value = rssi_level; + rcu_read_lock(); + cqm_config = rcu_dereference(wdev->cqm_config); + if (cqm_config) { + cqm_config->last_rssi_event_value = rssi_level; + cqm_config->last_rssi_event_type = rssi_event; + wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + cqm_rssi_work); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + enum nl80211_cqm_rssi_threshold_event rssi_event; + struct cfg80211_cqm_config *cqm_config; + struct sk_buff *msg; + s32 rssi_level; - cfg80211_cqm_rssi_update(rdev, dev); + wdev_lock(wdev); + cqm_config = rcu_dereference_protected(wdev->cqm_config, + lockdep_is_held(&wdev->mtx)); + if (!wdev->cqm_config) + goto unlock; - if (rssi_level == 0) - rssi_level = wdev->cqm_config->last_rssi_event_value; - } + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); - msg = cfg80211_prepare_cqm(dev, NULL, gfp); + rssi_level = cqm_config->last_rssi_event_value; + rssi_event = cqm_config->last_rssi_event_type; + + msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL); if (!msg) - return; + goto unlock; if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, rssi_event)) @@ -19123,14 +19151,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_level)) goto nla_put_failure; - cfg80211_send_cqm(msg, gfp); + cfg80211_send_cqm(msg, GFP_KERNEL); - return; + goto unlock; nla_put_failure: nlmsg_free(msg); + unlock: + wdev_unlock(wdev); } -EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, -- cgit From 55e95bfccf6db8d26a66c46e1de50d53c59a6774 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Sep 2023 10:03:50 +0300 Subject: of: dynamic: Fix potential memory leak in of_changeset_action() Smatch complains that the error path where "action" is invalid leaks the "ce" allocation: drivers/of/dynamic.c:935 of_changeset_action() warn: possible memory leak of 'ce' Fix this by doing the validation before the allocation. Note that there is not any actual problem with upstream kernels. All callers of of_changeset_action() are static inlines with fixed action values. Fixes: 914d9d831e61 ("of: dynamic: Refactor action prints to not use "%pOF" inside devtree_lock") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202309011059.EOdr4im9-lkp@intel.com/ Signed-off-by: Dan Carpenter Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/7dfaf999-30ad-491c-9615-fb1138db121c@moroto.mountain Signed-off-by: Rob Herring --- drivers/of/dynamic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 0a3483e247a8..f63250c650ca 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -890,13 +890,13 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action, { struct of_changeset_entry *ce; + if (WARN_ON(action >= ARRAY_SIZE(action_names))) + return -EINVAL; + ce = kzalloc(sizeof(*ce), GFP_KERNEL); if (!ce) return -ENOMEM; - if (WARN_ON(action >= ARRAY_SIZE(action_names))) - return -EINVAL; - /* get a reference to the node */ ce->action = action; ce->np = of_node_get(np); -- cgit From 2f9426905a63be7ccf8cd10109caf1848aa0993a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 11 Sep 2023 14:38:07 +0800 Subject: ASoC: fsl: imx-pcm-rpmsg: Add SNDRV_PCM_INFO_BATCH flag The rpmsg pcm device is a device which should support double buffering. Found this issue with pipewire. When there is no SNDRV_PCM_INFO_BATCH flag in driver, the pipewire will set headroom to be zero, and because rpmsg pcm device don't support residue report, when the latency setting is small, the "delay" always larger than "target" in alsa-pcm.c, that reading next period data is not scheduled on time. With SNDRV_PCM_INFO_BATCH flag in driver, the pipewire will select a smaller period size for device, then the task of reading next period data will be scheduled on time. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1694414287-13291-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-pcm-rpmsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/imx-pcm-rpmsg.c b/sound/soc/fsl/imx-pcm-rpmsg.c index d63782b8bdef..bb736d45c9e0 100644 --- a/sound/soc/fsl/imx-pcm-rpmsg.c +++ b/sound/soc/fsl/imx-pcm-rpmsg.c @@ -19,6 +19,7 @@ static struct snd_pcm_hardware imx_rpmsg_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_NO_PERIOD_WAKEUP | -- cgit From 954998b60caa8f2a3bf3abe490de6f08d283687a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:37 -0400 Subject: NFS: Fix error handling for O_DIRECT write scheduling If we fail to schedule a request for transmission, there are 2 possibilities: 1) Either we hit a fatal error, and we just want to drop the remaining requests on the floor. 2) We were asked to try again, in which case we should allow the outstanding RPC calls to complete, so that we can recoalesce requests and try again. Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 47d892a1d363..ee88f0a6e7b8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode, static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; - struct nfs_page *req, *tmp; + struct nfs_page *req; LIST_HEAD(reqs); struct nfs_commit_info cinfo; - LIST_HEAD(failed); nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); @@ -549,27 +548,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); - dreq->flags = 0; - if (desc.pg_error < 0) + if (dreq->error < 0) { + desc.pg_error = dreq->error; + } else if (desc.pg_error != -EAGAIN) { + dreq->flags = 0; + if (!desc.pg_error) + desc.pg_error = -EIO; dreq->error = desc.pg_error; - else - dreq->error = -EIO; + } else + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; spin_unlock(&cinfo.inode->i_lock); + break; } nfs_release_request(req); } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) { - req = nfs_list_entry(failed.next); + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); + if (desc.pg_error == -EAGAIN) + nfs_mark_request_commit(req, NULL, &cinfo, 0); + else + nfs_release_request(req); } if (put_dreq(dreq)) @@ -794,9 +802,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; + struct nfs_commit_info cinfo; ssize_t result = 0; size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + bool defer = false; trace_nfs_direct_write_schedule_iovec(dreq); @@ -837,17 +847,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; } - nfs_lock_request(req); - if (!nfs_pageio_add_request(&desc, req)) { - result = desc.pg_error; - nfs_unlock_and_release_request(req); - break; - } pgbase = 0; bytes -= req_len; requested_bytes += req_len; pos += req_len; dreq->bytes_left -= req_len; + + if (defer) { + nfs_mark_request_commit(req, NULL, &cinfo, 0); + continue; + } + + nfs_lock_request(req); + if (nfs_pageio_add_request(&desc, req)) + continue; + + /* Exit on hard errors */ + if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { + result = desc.pg_error; + nfs_unlock_and_release_request(req); + break; + } + + /* If the error is soft, defer remaining requests */ + nfs_init_cinfo_from_dreq(&cinfo, dreq); + spin_lock(&cinfo.inode->i_lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&cinfo.inode->i_lock); + nfs_unlock_request(req); + nfs_mark_request_commit(req, NULL, &cinfo, 0); + desc.pg_error = 0; + defer = true; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); -- cgit From ecd49f7a36fbccc884471f86fc43de6ca8d1f786 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:02 -0700 Subject: xfs: fix per-cpu CIL structure aggregation racing with dying cpus In commit 7c8ade2121200 ("xfs: implement percpu cil space used calculation"), the XFS committed (log) item list code was converted to use per-cpu lists and space tracking to reduce cpu contention when multiple threads are modifying different parts of the filesystem and hence end up contending on the log structures during transaction commit. Each CPU tracks its own commit items and space usage, and these do not have to be merged into the main CIL until either someone wants to push the CIL items, or we run over a soft threshold and switch to slower (but more accurate) accounting with atomics. Unfortunately, the for_each_cpu iteration suffers from the same race with cpu dying problem that was identified in commit 8b57b11cca88f ("pcpcntrs: fix dying cpu summation race") -- CPUs are removed from cpu_online_mask before the CPUHP_XFS_DEAD callback gets called. As a result, both CIL percpu structure aggregation functions fail to collect the items and accounted space usage at the correct point in time. If we're lucky, the items that are collected from the online cpus exceed the space given to those cpus, and the log immediately shuts down in xlog_cil_insert_items due to the (apparent) log reservation overrun. This happens periodically with generic/650, which exercises cpu hotplug vs. the filesystem code: smpboot: CPU 3 is now offline XFS (sda3): ctx ticket reservation ran out. Need to up reservation XFS (sda3): ticket reservation summary: XFS (sda3): unit res = 9268 bytes XFS (sda3): current res = -40 bytes XFS (sda3): original count = 1 XFS (sda3): remaining count = 1 XFS (sda3): Filesystem has been shut down due to log error (0x2). Applying the same sort of fix from 8b57b11cca88f to the CIL code seems to make the generic/650 problem go away, but I've been told that tglx was not happy when he saw: "...the only thing we actually need to care about is that percpu_counter_sum() iterates dying CPUs. That's trivial to do, and when there are no CPUs dying, it has no addition overhead except for a cpumask_or() operation." The CPU hotplug code is rather complex and difficult to understand and I don't want to try to understand the cpu hotplug locking well enough to use cpu_dying mask. Furthermore, there's a performance improvement that could be had here. Attach a private cpu mask to the CIL structure so that we can track exactly which cpus have accessed the percpu data at all. It doesn't matter if the cpu has since gone offline; log item aggregation will still find the items. Better yet, we skip cpus that have not recently logged anything. Worse yet, Ritesh Harjani and Eric Sandeen both reported today that CPU hot remove racing with an xfs mount can crash if the cpu_dead notifier tries to access the log but the mount hasn't yet set up the log. Link: https://lore.kernel.org/linux-xfs/ZOLzgBOuyWHapOyZ@dread.disaster.area/T/ Link: https://lore.kernel.org/lkml/877cuj1mt1.ffs@tglx/ Link: https://lore.kernel.org/lkml/20230414162755.281993820@linutronix.de/ Link: https://lore.kernel.org/linux-xfs/ZOVkjxWZq0YmjrJu@dread.disaster.area/T/ Cc: tglx@linutronix.de Cc: peterz@infradead.org Reported-by: ritesh.list@gmail.com Reported-by: sandeen@sandeen.net Fixes: af1c2146a50b ("xfs: introduce per-cpu CIL tracking structure") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_log_cil.c | 52 ++++++++++++++++----------------------------------- fs/xfs/xfs_log_priv.h | 14 ++++++-------- fs/xfs/xfs_super.c | 1 - 3 files changed, 22 insertions(+), 45 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index eccbfb99e894..ebc70aaa299c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -124,7 +124,7 @@ xlog_cil_push_pcp_aggregate( struct xlog_cil_pcp *cilpcp; int cpu; - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &ctx->cil_pcpmask) { cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); ctx->ticket->t_curr_res += cilpcp->space_reserved; @@ -165,7 +165,13 @@ xlog_cil_insert_pcp_aggregate( if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) return; - for_each_online_cpu(cpu) { + /* + * We can race with other cpus setting cil_pcpmask. However, we've + * atomically cleared PCP_SPACE which forces other threads to add to + * the global space used count. cil_pcpmask is a superset of cilpcp + * structures that could have a nonzero space_used. + */ + for_each_cpu(cpu, &ctx->cil_pcpmask) { int old, prev; cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); @@ -554,6 +560,7 @@ xlog_cil_insert_items( int iovhdr_res = 0, split_res = 0, ctx_res = 0; int space_used; int order; + unsigned int cpu_nr; struct xlog_cil_pcp *cilpcp; ASSERT(tp); @@ -577,7 +584,12 @@ xlog_cil_insert_items( * can't be scheduled away between split sample/update operations that * are done without outside locking to serialise them. */ - cilpcp = get_cpu_ptr(cil->xc_pcp); + cpu_nr = get_cpu(); + cilpcp = this_cpu_ptr(cil->xc_pcp); + + /* Tell the future push that there was work added by this CPU. */ + if (!cpumask_test_cpu(cpu_nr, &ctx->cil_pcpmask)) + cpumask_test_and_set_cpu(cpu_nr, &ctx->cil_pcpmask); /* * We need to take the CIL checkpoint unit reservation on the first @@ -663,7 +675,7 @@ xlog_cil_insert_items( continue; list_add_tail(&lip->li_cil, &cilpcp->log_items); } - put_cpu_ptr(cilpcp); + put_cpu(); /* * If we've overrun the reservation, dump the tx details before we move @@ -1790,38 +1802,6 @@ out_shutdown: return 0; } -/* - * Move dead percpu state to the relevant CIL context structures. - * - * We have to lock the CIL context here to ensure that nothing is modifying - * the percpu state, either addition or removal. Both of these are done under - * the CIL context lock, so grabbing that exclusively here will ensure we can - * safely drain the cilpcp for the CPU that is dying. - */ -void -xlog_cil_pcp_dead( - struct xlog *log, - unsigned int cpu) -{ - struct xfs_cil *cil = log->l_cilp; - struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); - struct xfs_cil_ctx *ctx; - - down_write(&cil->xc_ctx_lock); - ctx = cil->xc_ctx; - if (ctx->ticket) - ctx->ticket->t_curr_res += cilpcp->space_reserved; - cilpcp->space_reserved = 0; - - if (!list_empty(&cilpcp->log_items)) - list_splice_init(&cilpcp->log_items, &ctx->log_items); - if (!list_empty(&cilpcp->busy_extents)) - list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents); - atomic_add(cilpcp->space_used, &ctx->space_used); - cilpcp->space_used = 0; - up_write(&cil->xc_ctx_lock); -} - /* * Perform initial CIL structure initialisation. */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1bd2963e8fbd..af87648331d5 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -231,6 +231,12 @@ struct xfs_cil_ctx { struct work_struct discard_endio_work; struct work_struct push_work; atomic_t order_id; + + /* + * CPUs that could have added items to the percpu CIL data. Access is + * coordinated with xc_ctx_lock. + */ + struct cpumask cil_pcpmask; }; /* @@ -278,9 +284,6 @@ struct xfs_cil { wait_queue_head_t xc_push_wait; /* background push throttle */ void __percpu *xc_pcp; /* percpu CIL structures */ -#ifdef CONFIG_HOTPLUG_CPU - struct list_head xc_pcp_list; -#endif } ____cacheline_aligned_in_smp; /* xc_flags bit values */ @@ -705,9 +708,4 @@ xlog_kvmalloc( return p; } -/* - * CIL CPU dead notifier - */ -void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu); - #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1f77014c6e1a..ed29a5022e36 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2337,7 +2337,6 @@ xfs_cpu_dead( list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { spin_unlock(&xfs_mount_list_lock); xfs_inodegc_cpu_dead(mp, cpu); - xlog_cil_pcp_dead(mp->m_log, cpu); spin_lock(&xfs_mount_list_lock); } spin_unlock(&xfs_mount_list_lock); -- cgit From cfa2df68b7ceb49ac9eb2d295ab0c5974dbf17e7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:02 -0700 Subject: xfs: fix an agbno overflow in __xfs_getfsmap_datadev Dave Chinner reported that xfs/273 fails if the AG size happens to be an exact power of two. I traced this to an agbno integer overflow when the current GETFSMAP call is a continuation of a previous GETFSMAP call, and the last record returned was non-shareable space at the end of an AG. __xfs_getfsmap_datadev sets up a data device query by converting the incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno and agbno pair. In the (failing) case of where fmr_blockcount of the low key is nonzero and the record was for a non-shareable extent, it will add fmr_blockcount to start_fsb and info->low.rm_startblock. If the low key was actually the last record for that AG, then this addition causes info->low.rm_startblock to point beyond EOAG. When the rmapbt range query starts, it'll return an empty set, and fsmap moves on to the next AG. Or so I thought. Remember how we added to start_fsb? If agsize < 1<low and moves on to the next AG. If agsize == 1<high to EOFS (which is now has a lower rm_startblock than info->low), and the ranged btree query code will return -EINVAL. If it's not the last AG, we ignore all records for the intermediate AGs. Oops. Fix this by decoding start_fsb into agno and agbno only after making adjustments to start_fsb. This means that info->low.rm_startblock will always be set to a valid agbno, and we always start the rmapbt iteration in the correct AG. While we're at it, fix the predicate for determining if an fsmap record represents non-shareable space to include file data on pre-reflink filesystems. Reported-by: Dave Chinner Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_fsmap.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 10403ba9b58f..736e5545f584 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -565,6 +565,19 @@ err: } #endif /* CONFIG_XFS_RT */ +static inline bool +rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) +{ + if (!xfs_has_reflink(mp)) + return true; + if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) + return true; + if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | + XFS_RMAP_UNWRITTEN)) + return true; + return false; +} + /* Execute a getfsmap query against the regular data device. */ STATIC int __xfs_getfsmap_datadev( @@ -598,7 +611,6 @@ __xfs_getfsmap_datadev( * low to the fsmap low key and max out the high key to the end * of the AG. */ - info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); if (error) @@ -608,12 +620,9 @@ __xfs_getfsmap_datadev( /* Adjust the low key if we are continuing from where we left off. */ if (info->low.rm_blockcount == 0) { - /* empty */ - } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || - (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | - XFS_RMAP_BMBT_BLOCK | - XFS_RMAP_UNWRITTEN))) { - info->low.rm_startblock += info->low.rm_blockcount; + /* No previous record from which to continue */ + } else if (rmap_not_shareable(mp, &info->low)) { + /* Last record seen was an unshareable extent */ info->low.rm_owner = 0; info->low.rm_offset = 0; @@ -621,8 +630,10 @@ __xfs_getfsmap_datadev( if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) return 0; } else { + /* Last record seen was a shareable file data extent */ info->low.rm_offset += info->low.rm_blockcount; } + info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); info->high.rm_startblock = -1U; info->high.rm_owner = ULLONG_MAX; -- cgit From 62334fab47621dd91ab30dd5bb6c43d78a8ec279 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:03 -0700 Subject: xfs: use per-mount cpumask to track nonempty percpu inodegc lists Directly track which CPUs have contributed to the inodegc percpu lists instead of trusting the cpu online mask. This eliminates a theoretical problem where the inodegc flush functions might fail to flush a CPU's inodes if that CPU happened to be dying at exactly the same time. Most likely nobody's noticed this because the CPU dead hook moves the percpu inodegc list to another CPU and schedules that worker immediately. But it's quite possible that this is a subtle race leading to UAF if the inodegc flush were part of an unmount. Further benefits: This reduces the overhead of the inodegc flush code slightly by allowing us to ignore CPUs that have empty lists. Better yet, it reduces our dependence on the cpu online masks, which have been the cause of confusion and drama lately. Fixes: ab23a7768739 ("xfs: per-cpu deferred inode inactivation queues") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_icache.c | 78 +++++++++++++++++++---------------------------------- fs/xfs/xfs_icache.h | 1 - fs/xfs/xfs_mount.h | 6 +++-- fs/xfs/xfs_super.c | 4 +-- 4 files changed, 33 insertions(+), 56 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index e541f5c0bc25..30d7454a9b93 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -443,7 +443,7 @@ xfs_inodegc_queue_all( int cpu; bool ret = false; - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { gc = per_cpu_ptr(mp->m_inodegc, cpu); if (!llist_empty(&gc->list)) { mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); @@ -463,7 +463,7 @@ xfs_inodegc_wait_all( int error = 0; flush_workqueue(mp->m_inodegc_wq); - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { struct xfs_inodegc *gc; gc = per_cpu_ptr(mp->m_inodegc, cpu); @@ -1845,9 +1845,17 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + struct xfs_mount *mp = gc->mp; unsigned int nofs_flag; - ASSERT(gc->cpu == smp_processor_id()); + /* + * Clear the cpu mask bit and ensure that we have seen the latest + * update of the gc structure associated with this CPU. This matches + * with the release semantics used when setting the cpumask bit in + * xfs_inodegc_queue. + */ + cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask); + smp_mb__after_atomic(); WRITE_ONCE(gc->items, 0); @@ -1862,7 +1870,7 @@ xfs_inodegc_worker( nofs_flag = memalloc_nofs_save(); ip = llist_entry(node, struct xfs_inode, i_gclist); - trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); + trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits)); WRITE_ONCE(gc->shrinker_hits, 0); llist_for_each_entry_safe(ip, n, node, i_gclist) { @@ -2057,6 +2065,7 @@ xfs_inodegc_queue( struct xfs_inodegc *gc; int items; unsigned int shrinker_hits; + unsigned int cpu_nr; unsigned long queue_delay = 1; trace_xfs_inode_set_need_inactive(ip); @@ -2064,18 +2073,28 @@ xfs_inodegc_queue( ip->i_flags |= XFS_NEED_INACTIVE; spin_unlock(&ip->i_flags_lock); - gc = get_cpu_ptr(mp->m_inodegc); + cpu_nr = get_cpu(); + gc = this_cpu_ptr(mp->m_inodegc); llist_add(&ip->i_gclist, &gc->list); items = READ_ONCE(gc->items); WRITE_ONCE(gc->items, items + 1); shrinker_hits = READ_ONCE(gc->shrinker_hits); + /* + * Ensure the list add is always seen by anyone who finds the cpumask + * bit set. This effectively gives the cpumask bit set operation + * release ordering semantics. + */ + smp_mb__before_atomic(); + if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask)) + cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask); + /* * We queue the work while holding the current CPU so that the work * is scheduled to run on this CPU. */ if (!xfs_is_inodegc_enabled(mp)) { - put_cpu_ptr(gc); + put_cpu(); return; } @@ -2085,7 +2104,7 @@ xfs_inodegc_queue( trace_xfs_inodegc_queue(mp, __return_address); mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, queue_delay); - put_cpu_ptr(gc); + put_cpu(); if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { trace_xfs_inodegc_throttle(mp, __return_address); @@ -2093,47 +2112,6 @@ xfs_inodegc_queue( } } -/* - * Fold the dead CPU inodegc queue into the current CPUs queue. - */ -void -xfs_inodegc_cpu_dead( - struct xfs_mount *mp, - unsigned int dead_cpu) -{ - struct xfs_inodegc *dead_gc, *gc; - struct llist_node *first, *last; - unsigned int count = 0; - - dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); - cancel_delayed_work_sync(&dead_gc->work); - - if (llist_empty(&dead_gc->list)) - return; - - first = dead_gc->list.first; - last = first; - while (last->next) { - last = last->next; - count++; - } - dead_gc->list.first = NULL; - dead_gc->items = 0; - - /* Add pending work to current CPU */ - gc = get_cpu_ptr(mp->m_inodegc); - llist_add_batch(first, last, &gc->list); - count += READ_ONCE(gc->items); - WRITE_ONCE(gc->items, count); - - if (xfs_is_inodegc_enabled(mp)) { - trace_xfs_inodegc_queue(mp, __return_address); - mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, - 0); - } - put_cpu_ptr(gc); -} - /* * We set the inode flag atomically with the radix tree tag. Once we get tag * lookups on the radix tree, this inode flag can go away. @@ -2195,7 +2173,7 @@ xfs_inodegc_shrinker_count( if (!xfs_is_inodegc_enabled(mp)) return 0; - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { gc = per_cpu_ptr(mp->m_inodegc, cpu); if (!llist_empty(&gc->list)) return XFS_INODEGC_SHRINKER_COUNT; @@ -2220,7 +2198,7 @@ xfs_inodegc_shrinker_scan( trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); - for_each_online_cpu(cpu) { + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { gc = per_cpu_ptr(mp->m_inodegc, cpu); if (!llist_empty(&gc->list)) { unsigned int h = READ_ONCE(gc->shrinker_hits); diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 2fa6f2e09d07..905944dafbe5 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -79,7 +79,6 @@ void xfs_inodegc_push(struct xfs_mount *mp); int xfs_inodegc_flush(struct xfs_mount *mp); void xfs_inodegc_stop(struct xfs_mount *mp); void xfs_inodegc_start(struct xfs_mount *mp); -void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu); int xfs_inodegc_register_shrinker(struct xfs_mount *mp); #endif diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a25eece3be2b..f4a8879ba0e9 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -60,6 +60,7 @@ struct xfs_error_cfg { * Per-cpu deferred inode inactivation GC lists. */ struct xfs_inodegc { + struct xfs_mount *mp; struct llist_head list; struct delayed_work work; int error; @@ -67,9 +68,7 @@ struct xfs_inodegc { /* approximate count of inodes in the list */ unsigned int items; unsigned int shrinker_hits; -#if defined(DEBUG) || defined(XFS_WARN) unsigned int cpu; -#endif }; /* @@ -249,6 +248,9 @@ typedef struct xfs_mount { unsigned int *m_errortag; struct xfs_kobj m_errortag_kobj; #endif + + /* cpus that have inodes queued for inactivation */ + struct cpumask m_inodegc_cpumask; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ed29a5022e36..3a91ba3a4c62 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1135,9 +1135,8 @@ xfs_inodegc_init_percpu( for_each_possible_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); -#if defined(DEBUG) || defined(XFS_WARN) gc->cpu = cpu; -#endif + gc->mp = mp; init_llist_head(&gc->list); gc->items = 0; gc->error = 0; @@ -2336,7 +2335,6 @@ xfs_cpu_dead( spin_lock(&xfs_mount_list_lock); list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { spin_unlock(&xfs_mount_list_lock); - xfs_inodegc_cpu_dead(mp, cpu); spin_lock(&xfs_mount_list_lock); } spin_unlock(&xfs_mount_list_lock); -- cgit From f5bfa695f02e02415e4bfb36bd83a8bc933a6d4f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:04 -0700 Subject: xfs: remove the all-mounts list Revert commit 0ed17f01c8540 ("xfs: introduce all-mounts list for cpu hotplug notifications") because the cpu hotplug hooks are now pointless, so we don't need this list anymore. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_mount.h | 1 - fs/xfs/xfs_super.c | 39 --------------------------------------- 2 files changed, 40 deletions(-) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f4a8879ba0e9..6e2806654e94 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -97,7 +97,6 @@ typedef struct xfs_mount { xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ - struct list_head m_mount_list; /* global mount list */ void __percpu *m_inodegc; /* percpu inodegc structures */ /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3a91ba3a4c62..5cced7713cd2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -56,28 +56,6 @@ static struct kset *xfs_kset; /* top-level xfs sysfs dir */ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif -#ifdef CONFIG_HOTPLUG_CPU -static LIST_HEAD(xfs_mount_list); -static DEFINE_SPINLOCK(xfs_mount_list_lock); - -static inline void xfs_mount_list_add(struct xfs_mount *mp) -{ - spin_lock(&xfs_mount_list_lock); - list_add(&mp->m_mount_list, &xfs_mount_list); - spin_unlock(&xfs_mount_list_lock); -} - -static inline void xfs_mount_list_del(struct xfs_mount *mp) -{ - spin_lock(&xfs_mount_list_lock); - list_del(&mp->m_mount_list); - spin_unlock(&xfs_mount_list_lock); -} -#else /* !CONFIG_HOTPLUG_CPU */ -static inline void xfs_mount_list_add(struct xfs_mount *mp) {} -static inline void xfs_mount_list_del(struct xfs_mount *mp) {} -#endif - enum xfs_dax_mode { XFS_DAX_INODE = 0, XFS_DAX_ALWAYS = 1, @@ -1167,7 +1145,6 @@ xfs_fs_put_super( xfs_freesb(mp); xchk_mount_stats_free(mp); free_percpu(mp->m_stats.xs_stats); - xfs_mount_list_del(mp); xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); @@ -1576,13 +1553,6 @@ xfs_fs_fill_super( if (error) goto out_destroy_counters; - /* - * All percpu data structures requiring cleanup when a cpu goes offline - * must be allocated before adding this @mp to the cpu-dead handler's - * mount list. - */ - xfs_mount_list_add(mp); - /* Allocate stats memory before we do operations that might use it */ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) { @@ -1780,7 +1750,6 @@ xfs_fs_fill_super( out_free_stats: free_percpu(mp->m_stats.xs_stats); out_destroy_inodegc: - xfs_mount_list_del(mp); xfs_inodegc_free_percpu(mp); out_destroy_counters: xfs_destroy_percpu_counters(mp); @@ -2330,14 +2299,6 @@ static int xfs_cpu_dead( unsigned int cpu) { - struct xfs_mount *mp, *n; - - spin_lock(&xfs_mount_list_lock); - list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { - spin_unlock(&xfs_mount_list_lock); - spin_lock(&xfs_mount_list_lock); - } - spin_unlock(&xfs_mount_list_lock); return 0; } -- cgit From ef7d9593390a050c50eba5fc02d2cb65a1104434 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:04 -0700 Subject: xfs: remove CPU hotplug infrastructure There are no users of the cpu hotplug hooks in xfs now, so remove it. This reverts f1653c2e2831e ("xfs: introduce CPU hotplug infrastructure"). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_super.c | 42 +----------------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5cced7713cd2..c8a2dae1dd65 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2294,39 +2294,6 @@ xfs_destroy_workqueues(void) destroy_workqueue(xfs_alloc_wq); } -#ifdef CONFIG_HOTPLUG_CPU -static int -xfs_cpu_dead( - unsigned int cpu) -{ - return 0; -} - -static int __init -xfs_cpu_hotplug_init(void) -{ - int error; - - error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, - xfs_cpu_dead); - if (error < 0) - xfs_alert(NULL, -"Failed to initialise CPU hotplug, error %d. XFS is non-functional.", - error); - return error; -} - -static void -xfs_cpu_hotplug_destroy(void) -{ - cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); -} - -#else /* !CONFIG_HOTPLUG_CPU */ -static inline int xfs_cpu_hotplug_init(void) { return 0; } -static inline void xfs_cpu_hotplug_destroy(void) {} -#endif - STATIC int __init init_xfs_fs(void) { @@ -2343,13 +2310,9 @@ init_xfs_fs(void) xfs_dir_startup(); - error = xfs_cpu_hotplug_init(); - if (error) - goto out; - error = xfs_init_caches(); if (error) - goto out_destroy_hp; + goto out; error = xfs_init_workqueues(); if (error) @@ -2433,8 +2396,6 @@ init_xfs_fs(void) xfs_destroy_workqueues(); out_destroy_caches: xfs_destroy_caches(); - out_destroy_hp: - xfs_cpu_hotplug_destroy(); out: return error; } @@ -2458,7 +2419,6 @@ exit_xfs_fs(void) xfs_destroy_workqueues(); xfs_destroy_caches(); xfs_uuid_table_free(); - xfs_cpu_hotplug_destroy(); } module_init(init_xfs_fs); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 06dda85f0424..068f7738be22 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, - CPUHP_XFS_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, -- cgit From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:19 +0200 Subject: mtd: rawnand: marvell: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: stable@vger.kernel.org Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver") Reported-by: Aviram Dali Signed-off-by: Miquel Raynal Tested-by: Ravi Chandra Minnikanti Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/marvell_nand.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 2c94da7a3b3a..b841a81cb128 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, .ndcb[2] = NDCB2_ADDR5_PAGE(page), }; unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0); + u8 status; int ret; /* NFCv2 needs more information about the operation being executed */ @@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, ret = marvell_nfc_wait_op(chip, PSEC_TO_MSEC(sdr->tPROG_max)); - return ret; + if (ret) + return ret; + + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip, @@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, int data_len = lt->data_bytes; int spare_len = lt->spare_bytes; int chunk, ret; + u8 status; marvell_nfc_select_target(chip, chip->cur_cs); @@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, if (ret) return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + return 0; } -- cgit From 6792b7fce610bcd1cf3e07af3607fe7e2c38c1d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:00:34 +0200 Subject: mtd: physmap-core: Restore map_rom fallback When the exact mapping type driver was not available, the old physmap_of_core driver fell back to mapping the region as ROM. Unfortunately this feature was lost when the DT and pdata cases were merged. Revive this useful feature. Fixes: 642b1e8dbed7bbbf ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/550e8c8c1da4c4baeb3d71ff79b14a18d4194f9e.1693407371.git.geert+renesas@glider.be --- drivers/mtd/maps/physmap-core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 78710fbc8e7f..fc8721339282 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -551,6 +551,17 @@ static int physmap_flash_probe(struct platform_device *dev) if (info->probe_type) { info->mtds[i] = do_map_probe(info->probe_type, &info->maps[i]); + + /* Fall back to mapping region as ROM */ + if (!info->mtds[i] && IS_ENABLED(CONFIG_MTD_ROM) && + strcmp(info->probe_type, "map_rom")) { + dev_warn(&dev->dev, + "map_probe() failed for type %s\n", + info->probe_type); + + info->mtds[i] = do_map_probe("map_rom", + &info->maps[i]); + } } else { int j; -- cgit From 9836a987860e33943945d4b257729a4f94eae576 Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Tue, 5 Sep 2023 17:56:37 +0300 Subject: mtd: spinand: micron: correct bitmask for ecc status Valid bitmask is 0x70 in the status register. Fixes: a508e8875e13 ("mtd: spinand: Add initial support for Micron MT29F2G01ABAGD") Signed-off-by: Martin Kurbanov Reviewed-by: Frieder Schrempf Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230905145637.139068-1-mmkurbanov@sberdevices.ru --- drivers/mtd/nand/spi/micron.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c index 50b7295bc922..12601bc4227a 100644 --- a/drivers/mtd/nand/spi/micron.c +++ b/drivers/mtd/nand/spi/micron.c @@ -12,7 +12,7 @@ #define SPINAND_MFR_MICRON 0x2c -#define MICRON_STATUS_ECC_MASK GENMASK(7, 4) +#define MICRON_STATUS_ECC_MASK GENMASK(6, 4) #define MICRON_STATUS_ECC_NO_BITFLIPS (0 << 4) #define MICRON_STATUS_ECC_1TO3_BITFLIPS (1 << 4) #define MICRON_STATUS_ECC_4TO6_BITFLIPS (3 << 4) -- cgit From 779873ec81306d2c40c459fa7c91a5d40655510d Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 6 Sep 2023 01:48:15 -0700 Subject: power: supply: mt6370: Fix missing error code in mt6370_chg_toggle_cfo() When mt6370_chg_field_get() suceeds, ret is set to zero and returning zero when flash led is still in strobe mode looks incorrect. Fixes: 233cb8a47d65 ("power: supply: mt6370: Add MediaTek MT6370 charger driver") Signed-off-by: Harshit Mogalapalli Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: ChiaEn Wu Link: https://lore.kernel.org/r/20230906084815.2827930-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/mt6370-charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/mt6370-charger.c b/drivers/power/supply/mt6370-charger.c index f27dae5043f5..a9641bd3d8cf 100644 --- a/drivers/power/supply/mt6370-charger.c +++ b/drivers/power/supply/mt6370-charger.c @@ -324,7 +324,7 @@ static int mt6370_chg_toggle_cfo(struct mt6370_priv *priv) if (fl_strobe) { dev_err(priv->dev, "Flash led is still in strobe mode\n"); - return ret; + return -EINVAL; } /* cfo off */ -- cgit From 9a85653ed3b9a9b7b31d95a34b64b990c3d33ca1 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Thu, 3 Aug 2023 16:56:23 -0300 Subject: iio: dac: ad3552r: Correct device IDs Device IDs for AD3542R and AD3552R were swapped leading to unintended collection of DAC output ranges being used for each design. Change device ID values so they are correct for each DAC chip. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Marcelo Schmitt Reported-by: Chandrakant Minajigi Link: https://lore.kernel.org/r/011f480220799fbfabdd53896f8a2f251ad995ad.1691091324.git.marcelo.schmitt1@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index d5ea1a1be122..a492e8f2fc0f 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -140,8 +140,8 @@ enum ad3552r_ch_vref_select { }; enum ad3542r_id { - AD3542R_ID = 0x4008, - AD3552R_ID = 0x4009, + AD3542R_ID = 0x4009, + AD3552R_ID = 0x4008, }; enum ad3552r_ch_output_range { -- cgit From 85dfb43bf69281adb1f345dfd9a39faf2e5a718d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Fri, 11 Aug 2023 16:58:29 +0100 Subject: iio: pressure: bmp280: Fix NULL pointer exception The bmp085 EOC IRQ support is optional, but the driver's common probe function queries the IRQ properties whether or not it exists, which can trigger a NULL pointer exception. Avoid any exception by making the query conditional on the possession of a valid IRQ. Fixes: aae953949651 ("iio: pressure: bmp280: add support for BMP085 EOC interrupt") Signed-off-by: Phil Elwell Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20230811155829.51208-1-phil@raspberrypi.com Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 6089f3f9d8f4..a2ef1373a274 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -2179,7 +2179,7 @@ int bmp280_common_probe(struct device *dev, * however as it happens, the BMP085 shares the chip ID of BMP180 * so we look for an IRQ if we have that. */ - if (irq > 0 || (chip_id == BMP180_CHIP_ID)) { + if (irq > 0 && (chip_id == BMP180_CHIP_ID)) { ret = bmp085_fetch_eoc_irq(dev, name, irq, data); if (ret) return ret; -- cgit From 287d998af24326b009ae0956820a3188501b34a0 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 7 Aug 2023 17:38:05 +0300 Subject: iio: admv1013: add mixer_vgate corner cases Include the corner cases in the computation of the MIXER_VGATE register value. According to the datasheet: The MIXER_VGATE values follows the VCM such as, that for a 0V to 1.8V VCM, MIXER_VGATE = 23.89 VCM + 81, and for a > 1.8V to 2.6V VCM, MIXER_VGATE = 23.75 VCM + 1.25. Fixes: da35a7b526d9 ("iio: frequency: admv1013: add support for ADMV1013") Signed-off-by: Antoniu Miclaus Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230807143806.6954-1-antoniu.miclaus@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/admv1013.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index 6355c1f28423..92923074f930 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -351,9 +351,9 @@ static int admv1013_update_mixer_vgate(struct admv1013_state *st) if (vcm < 0) return vcm; - if (vcm < 1800000) + if (vcm <= 1800000) mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100; - else if (vcm > 1800000 && vcm < 2600000) + else if (vcm > 1800000 && vcm <= 2600000) mixer_vgate = (2375 * vcm / 1000000 + 125) / 100; else return -EINVAL; -- cgit From 9855d60cfc720ff32355484c119acafd3c4dc806 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 11 Sep 2023 10:46:16 +0300 Subject: spi: intel-pci: Add support for Granite Rapids SPI serial flash Intel Granite Rapids has a flash controller that is compatible with the other Cannon Lake derivatives. Add Granite Rapids PCI ID to the driver list of supported devices. Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20230911074616.3473347-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index a7381e774b95..57d767a68e7b 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -72,6 +72,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, -- cgit From d52b59315bf5e86e83c00bfae47cedd388dad6a8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:20 +0800 Subject: bpf: Adjust size_index according to the value of KMALLOC_MIN_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following warning was reported when running "./test_progs -a link_api -a linked_list" on a RISC-V QEMU VM: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 261 at kernel/bpf/memalloc.c:342 bpf_mem_refill Modules linked in: bpf_testmod(OE) CPU: 3 PID: 261 Comm: test_progs- ... 6.5.0-rc5-01743-gdcb152bb8328 #2 Hardware name: riscv-virtio,qemu (DT) epc : bpf_mem_refill+0x1fc/0x206 ra : irq_work_single+0x68/0x70 epc : ffffffff801b1bc4 ra : ffffffff8015fe84 sp : ff2000000001be20 gp : ffffffff82d26138 tp : ff6000008477a800 t0 : 0000000000046600 t1 : ffffffff812b6ddc t2 : 0000000000000000 s0 : ff2000000001be70 s1 : ff5ffffffffe8998 a0 : ff5ffffffffe8998 a1 : ff600003fef4b000 a2 : 000000000000003f a3 : ffffffff80008250 a4 : 0000000000000060 a5 : 0000000000000080 a6 : 0000000000000000 a7 : 0000000000735049 s2 : ff5ffffffffe8998 s3 : 0000000000000022 s4 : 0000000000001000 s5 : 0000000000000007 s6 : ff5ffffffffe8570 s7 : ffffffff82d6bd30 s8 : 000000000000003f s9 : ffffffff82d2c5e8 s10: 000000000000ffff s11: ffffffff82d2c5d8 t3 : ffffffff81ea8f28 t4 : 0000000000000000 t5 : ff6000008fd28278 t6 : 0000000000040000 [] bpf_mem_refill+0x1fc/0x206 [] irq_work_single+0x68/0x70 [] irq_work_run_list+0x28/0x36 [] irq_work_run+0x38/0x66 [] handle_IPI+0x3a/0xb4 [] handle_percpu_devid_irq+0xa4/0x1f8 [] generic_handle_domain_irq+0x28/0x36 [] ipi_mux_process+0xac/0xfa [] sbi_ipi_handle+0x2e/0x88 [] generic_handle_domain_irq+0x28/0x36 [] riscv_intc_irq+0x36/0x4e [] handle_riscv_irq+0x54/0x86 [] do_irq+0x66/0x98 ---[ end trace 0000000000000000 ]--- The warning is due to WARN_ON_ONCE(tgt->unit_size != c->unit_size) in free_bulk(). The direct reason is that a object is allocated and freed by bpf_mem_caches with different unit_size. The root cause is that KMALLOC_MIN_SIZE is 64 and there is no 96-bytes slab cache in the specific VM. When linked_list test allocates a 72-bytes object through bpf_obj_new(), bpf_global_ma will allocate it from a bpf_mem_cache with 96-bytes unit_size, but this bpf_mem_cache is backed by 128-bytes slab cache. When the object is freed, bpf_mem_free() uses ksize() to choose the corresponding bpf_mem_cache. Because the object is allocated from 128-bytes slab cache, ksize() returns 128, bpf_mem_free() chooses a 128-bytes bpf_mem_cache to free the object and triggers the warning. A similar warning will also be reported when using CONFIG_SLAB instead of CONFIG_SLUB in a x86-64 kernel. Because CONFIG_SLUB defines KMALLOC_MIN_SIZE as 8 but CONFIG_SLAB defines KMALLOC_MIN_SIZE as 32. An alternative fix is to use kmalloc_size_round() in bpf_mem_alloc() to choose a bpf_mem_cache which has the same unit_size with the backing slab cache, but it may introduce performance degradation, so fix the warning by adjusting the indexes in size_index according to the value of KMALLOC_MIN_SIZE just like setup_kmalloc_cache_index_table() does. Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.") Reported-by: Björn Töpel Closes: https://lore.kernel.org/bpf/87jztjmmy4.fsf@all.your.base.are.belong.to.us Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 9c49ae53deaf..98d9e96fba3c 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -916,3 +916,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } + +/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ +static __init int bpf_mem_cache_adjust_size(void) +{ + unsigned int size, index; + + /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be + * up-to 256-bytes. + */ + size = KMALLOC_MIN_SIZE; + if (size <= 192) + index = size_index[(size - 1) / 8]; + else + index = fls(size - 1) - 1; + for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + + /* The minimal alignment is 64-bytes, so disable 96-bytes cache and + * use 128-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 64) { + index = size_index[(128 - 1) / 8]; + for (size = 64 + 8; size <= 96; size += 8) + size_index[(size - 1) / 8] = index; + } + + /* The minimal alignment is 128-bytes, so disable 192-bytes cache and + * use 256-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 128) { + index = fls(256 - 1) - 1; + for (size = 128 + 8; size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + } + + return 0; +} +subsys_initcall(bpf_mem_cache_adjust_size); -- cgit From b1d53958b69312e43c118d4093d8f93d3f6f80af Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:21 +0800 Subject: bpf: Don't prefill for unused bpf_mem_cache When the unit_size of a bpf_mem_cache is unmatched with the object_size of the underlying slab cache, the bpf_mem_cache will not be used, and the allocation will be redirected to a bpf_mem_cache with a bigger unit_size instead, so there is no need to prefill for these unused bpf_mem_caches. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 98d9e96fba3c..90c1ed8210a2 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c) * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. */ - -static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); if (c->unit_size <= 256) { @@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) c->high_watermark = max(96 * 256 / c->unit_size, 3); } c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); +} +static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +{ /* To avoid consuming memory assume that 1st run of bpf * prog won't be doing more than 4 map_update_elem from * irq disabled region @@ -521,6 +523,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; + init_refill_work(c); prefill_mem_cache(c, cpu); } ma->cache = pc; @@ -544,6 +547,15 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->unit_size = sizes[i]; c->objcg = objcg; c->tgt = c; + + init_refill_work(c); + /* Another bpf_mem_cache will be used when allocating + * c->unit_size in bpf_mem_alloc(), so doesn't prefill + * for the bpf_mem_cache because these free objects will + * never be used. + */ + if (i != bpf_mem_cache_idx(c->unit_size)) + continue; prefill_mem_cache(c, cpu); } } -- cgit From c930472552022bd09aab3cd946ba3f243070d5c7 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:22 +0800 Subject: bpf: Ensure unit_size is matched with slab cache object size Add extra check in bpf_mem_alloc_init() to ensure the unit_size of bpf_mem_cache is matched with the object_size of underlying slab cache. If these two sizes are unmatched, print a warning once and return -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and the potential issue can be prevented. Suggested-by: Alexei Starovoitov Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 90c1ed8210a2..1c22b90e754a 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); } +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) +{ + struct llist_node *first; + unsigned int obj_size; + + first = c->free_llist.first; + if (!first) + return 0; + + obj_size = ksize(first); + if (obj_size != c->unit_size) { + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", + idx, obj_size, c->unit_size); + return -EINVAL; + } + return 0; +} + /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * @@ -496,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; - int cpu, i, unit_size, percpu_size = 0; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -537,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; + err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif @@ -557,10 +576,20 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) if (i != bpf_mem_cache_idx(c->unit_size)) continue; prefill_mem_cache(c, cpu); + err = check_obj_size(c, i); + if (err) + goto out; } } + +out: ma->caches = pcc; - return 0; + /* refill_work is either zeroed or initialized, so it is safe to + * call irq_work_sync(). + */ + if (err) + bpf_mem_alloc_destroy(ma); + return err; } static void drain_mem_cache(struct bpf_mem_cache *c) -- cgit From f0a42ab5890f749626b35f9fddd8d0704fc89524 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 8 Sep 2023 21:39:23 +0800 Subject: selftests/bpf: Test all valid alloc sizes for bpf mem allocator Add a test to test all possible and valid allocation size for bpf memory allocator. For each possible allocation size, the test uses the following two steps to test the alloc and free path: 1) allocate N (N > high_watermark) objects to trigger the refill executed in irq_work. 2) free N objects to trigger the freeing executed in irq_work. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230908133923.2675053-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/test_bpf_ma.c | 50 +++++++++ tools/testing/selftests/bpf/progs/test_bpf_ma.c | 123 +++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_ma.c diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c new file mode 100644 index 000000000000..0cca4e8ae38e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "test_bpf_ma.skel.h" + +void test_test_bpf_ma(void) +{ + struct test_bpf_ma *skel; + struct btf *btf; + int i, err; + + skel = test_bpf_ma__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + btf = bpf_object__btf(skel->obj); + if (!ASSERT_OK_PTR(btf, "btf")) + goto out; + + for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { + char name[32]; + int id; + + snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, "bin_data")) + goto out; + skel->rodata->data_btf_ids[i] = id; + } + + err = test_bpf_ma__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + err = test_bpf_ma__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_OK(skel->bss->err, "test error"); +out: + test_bpf_ma__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c new file mode 100644 index 000000000000..ecde41ae0fc8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct generic_map_value { + void *data; +}; + +char _license[] SEC("license") = "GPL"; + +const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + +int err = 0; +int pid = 0; + +#define DEFINE_ARRAY_WITH_KPTR(_size) \ + struct bin_data_##_size { \ + char data[_size - sizeof(void *)]; \ + }; \ + struct map_value_##_size { \ + struct bin_data_##_size __kptr * data; \ + /* To emit BTF info for bin_data_xx */ \ + struct bin_data_##_size not_used; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_##_size); \ + __uint(max_entries, 128); \ + } array_##_size SEC(".maps"); + +static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + new = bpf_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) { + err = 2; + return; + } + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_obj_drop(old); + err = 3; + return; + } + } + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 4; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) { + err = 5; + return; + } + bpf_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ + batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + +DEFINE_ARRAY_WITH_KPTR(8); +DEFINE_ARRAY_WITH_KPTR(16); +DEFINE_ARRAY_WITH_KPTR(32); +DEFINE_ARRAY_WITH_KPTR(64); +DEFINE_ARRAY_WITH_KPTR(96); +DEFINE_ARRAY_WITH_KPTR(128); +DEFINE_ARRAY_WITH_KPTR(192); +DEFINE_ARRAY_WITH_KPTR(256); +DEFINE_ARRAY_WITH_KPTR(512); +DEFINE_ARRAY_WITH_KPTR(1024); +DEFINE_ARRAY_WITH_KPTR(2048); +DEFINE_ARRAY_WITH_KPTR(4096); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int test_bpf_mem_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free 128 8-bytes objects in batch to trigger freeing. + */ + CALL_BATCH_ALLOC_FREE(8, 128, 0); + CALL_BATCH_ALLOC_FREE(16, 128, 1); + CALL_BATCH_ALLOC_FREE(32, 128, 2); + CALL_BATCH_ALLOC_FREE(64, 128, 3); + CALL_BATCH_ALLOC_FREE(96, 128, 4); + CALL_BATCH_ALLOC_FREE(128, 128, 5); + CALL_BATCH_ALLOC_FREE(192, 128, 6); + CALL_BATCH_ALLOC_FREE(256, 128, 7); + CALL_BATCH_ALLOC_FREE(512, 64, 8); + CALL_BATCH_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_ALLOC_FREE(4096, 8, 11); + + return 0; +} -- cgit From 1aa2a9f27627447da247997c34c71af9402fa237 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Aug 2023 11:48:09 +0100 Subject: dt-bindings: clock: versaclock3: Add description for #clock-cells property Add description for "#clock-cells" property to map indexes to the clock output in the Table 3. ("Output Source") in the 5P35023 datasheet (ie: {REF,SE1,SE2,SE3,DIFF1,DIFF2}. Also update the "assigned-clock-rates" in the example. While at it, replace clocks phandle in the example from x1_x2->x1 as X2 is a different 32768 kHz crystal. Suggested-by: Geert Uytterhoeven Signed-off-by: Biju Das Acked-by: Conor Dooley Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230824104812.147775-2-biju.das.jz@bp.renesas.com Reviewed-by: Geert Uytterhoeven Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/renesas,5p35023.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml b/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml index 839648e753d4..42b6f80613f3 100644 --- a/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml @@ -37,6 +37,9 @@ properties: maxItems: 1 '#clock-cells': + description: + The index in the assigned-clocks is mapped to the output clock as below + 0 - REF, 1 - SE1, 2 - SE2, 3 - SE3, 4 - DIFF1, 5 - DIFF2. const: 1 clocks: @@ -68,7 +71,7 @@ examples: reg = <0x68>; #clock-cells = <1>; - clocks = <&x1_x2>; + clocks = <&x1>; renesas,settings = [ 80 00 11 19 4c 02 23 7f 83 19 08 a9 5f 25 24 bf @@ -79,8 +82,8 @@ examples: assigned-clocks = <&versa3 0>, <&versa3 1>, <&versa3 2>, <&versa3 3>, <&versa3 4>, <&versa3 5>; - assigned-clock-rates = <12288000>, <25000000>, - <12000000>, <11289600>, - <11289600>, <24000000>; + assigned-clock-rates = <24000000>, <11289600>, + <11289600>, <12000000>, + <25000000>, <12288000>; }; }; -- cgit From 576418e3417267e93ffee09c46f56434108c4548 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Aug 2023 11:48:10 +0100 Subject: clk: vc3: Fix 64 by 64 division Fix the below cocci warnings by replacing do_div()->div64_ul() and bound the result with a max value of U16_MAX. cocci warnings: drivers/clk/clk-versaclock3.c:404:2-8: WARNING: do_div() does a 64-by-32 division, please consider using div64_ul instead. Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202307270841.yr5HxYIl-lkp@intel.com/ Fixes: 6e9aff555db7 ("clk: Add support for versa3 clock driver") Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230824104812.147775-3-biju.das.jz@bp.renesas.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock3.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-versaclock3.c b/drivers/clk/clk-versaclock3.c index 7ab2447bd203..b1a94db1f3c9 100644 --- a/drivers/clk/clk-versaclock3.c +++ b/drivers/clk/clk-versaclock3.c @@ -401,11 +401,10 @@ static long vc3_pll_round_rate(struct clk_hw *hw, unsigned long rate, /* Determine best fractional part, which is 16 bit wide */ div_frc = rate % *parent_rate; div_frc *= BIT(16) - 1; - do_div(div_frc, *parent_rate); - vc3->div_frc = (u32)div_frc; + vc3->div_frc = min_t(u64, div64_ul(div_frc, *parent_rate), U16_MAX); rate = (*parent_rate * - (vc3->div_int * VC3_2_POW_16 + div_frc) / VC3_2_POW_16); + (vc3->div_int * VC3_2_POW_16 + vc3->div_frc) / VC3_2_POW_16); } else { rate = *parent_rate * vc3->div_int; } -- cgit From 6dcf03bcac31dec528867180f96580652fc3ac5b Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Aug 2023 11:48:11 +0100 Subject: clk: vc3: Fix output clock mapping According to Table 3. ("Output Source") in the 5P35023 datasheet, the output clock mapping should be 0=REF, 1=SE1, 2=SE2, 3=SE3, 4=DIFF1, 5=DIFF2. But the code uses inverse. Fix this mapping issue. Suggested-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdUHD+bEco=WYTYWsTAyRt3dTQQt4Xpaejss0Y2ZpLCMNg@mail.gmail.com/ Fixes: 6e9aff555db7 ("clk: Add support for versa3 clock driver") Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230824104812.147775-4-biju.das.jz@bp.renesas.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock3.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/clk/clk-versaclock3.c b/drivers/clk/clk-versaclock3.c index b1a94db1f3c9..157cf510b23a 100644 --- a/drivers/clk/clk-versaclock3.c +++ b/drivers/clk/clk-versaclock3.c @@ -119,20 +119,20 @@ enum vc3_div { }; enum vc3_clk_mux { - VC3_DIFF2_MUX, - VC3_DIFF1_MUX, - VC3_SE3_MUX, - VC3_SE2_MUX, VC3_SE1_MUX, + VC3_SE2_MUX, + VC3_SE3_MUX, + VC3_DIFF1_MUX, + VC3_DIFF2_MUX, }; enum vc3_clk { - VC3_DIFF2, - VC3_DIFF1, - VC3_SE3, - VC3_SE2, - VC3_SE1, VC3_REF, + VC3_SE1, + VC3_SE2, + VC3_SE3, + VC3_DIFF1, + VC3_DIFF2, }; struct vc3_clk_data { @@ -896,33 +896,33 @@ static struct vc3_hw_data clk_div[] = { }; static struct vc3_hw_data clk_mux[] = { - [VC3_DIFF2_MUX] = { + [VC3_SE1_MUX] = { .data = &(struct vc3_clk_data) { - .offs = VC3_DIFF2_CTRL_REG, - .bitmsk = VC3_DIFF2_CTRL_REG_DIFF2_CLK_SEL + .offs = VC3_SE1_DIV4_CTRL, + .bitmsk = VC3_SE1_DIV4_CTRL_SE1_CLK_SEL }, .hw.init = &(struct clk_init_data){ - .name = "diff2_mux", + .name = "se1_mux", .ops = &vc3_clk_mux_ops, .parent_hws = (const struct clk_hw *[]) { - &clk_div[VC3_DIV1].hw, - &clk_div[VC3_DIV3].hw + &clk_div[VC3_DIV5].hw, + &clk_div[VC3_DIV4].hw }, .num_parents = 2, .flags = CLK_SET_RATE_PARENT } }, - [VC3_DIFF1_MUX] = { + [VC3_SE2_MUX] = { .data = &(struct vc3_clk_data) { - .offs = VC3_DIFF1_CTRL_REG, - .bitmsk = VC3_DIFF1_CTRL_REG_DIFF1_CLK_SEL + .offs = VC3_SE2_CTRL_REG0, + .bitmsk = VC3_SE2_CTRL_REG0_SE2_CLK_SEL }, .hw.init = &(struct clk_init_data){ - .name = "diff1_mux", + .name = "se2_mux", .ops = &vc3_clk_mux_ops, .parent_hws = (const struct clk_hw *[]) { - &clk_div[VC3_DIV1].hw, - &clk_div[VC3_DIV3].hw + &clk_div[VC3_DIV5].hw, + &clk_div[VC3_DIV4].hw }, .num_parents = 2, .flags = CLK_SET_RATE_PARENT @@ -944,33 +944,33 @@ static struct vc3_hw_data clk_mux[] = { .flags = CLK_SET_RATE_PARENT } }, - [VC3_SE2_MUX] = { + [VC3_DIFF1_MUX] = { .data = &(struct vc3_clk_data) { - .offs = VC3_SE2_CTRL_REG0, - .bitmsk = VC3_SE2_CTRL_REG0_SE2_CLK_SEL + .offs = VC3_DIFF1_CTRL_REG, + .bitmsk = VC3_DIFF1_CTRL_REG_DIFF1_CLK_SEL }, .hw.init = &(struct clk_init_data){ - .name = "se2_mux", + .name = "diff1_mux", .ops = &vc3_clk_mux_ops, .parent_hws = (const struct clk_hw *[]) { - &clk_div[VC3_DIV5].hw, - &clk_div[VC3_DIV4].hw + &clk_div[VC3_DIV1].hw, + &clk_div[VC3_DIV3].hw }, .num_parents = 2, .flags = CLK_SET_RATE_PARENT } }, - [VC3_SE1_MUX] = { + [VC3_DIFF2_MUX] = { .data = &(struct vc3_clk_data) { - .offs = VC3_SE1_DIV4_CTRL, - .bitmsk = VC3_SE1_DIV4_CTRL_SE1_CLK_SEL + .offs = VC3_DIFF2_CTRL_REG, + .bitmsk = VC3_DIFF2_CTRL_REG_DIFF2_CLK_SEL }, .hw.init = &(struct clk_init_data){ - .name = "se1_mux", + .name = "diff2_mux", .ops = &vc3_clk_mux_ops, .parent_hws = (const struct clk_hw *[]) { - &clk_div[VC3_DIV5].hw, - &clk_div[VC3_DIV4].hw + &clk_div[VC3_DIV1].hw, + &clk_div[VC3_DIV3].hw }, .num_parents = 2, .flags = CLK_SET_RATE_PARENT @@ -1109,7 +1109,7 @@ static int vc3_probe(struct i2c_client *client) name, 0, CLK_SET_RATE_PARENT, 1, 1); else clk_out[i] = devm_clk_hw_register_fixed_factor_parent_hw(dev, - name, &clk_mux[i].hw, CLK_SET_RATE_PARENT, 1, 1); + name, &clk_mux[i - 1].hw, CLK_SET_RATE_PARENT, 1, 1); if (IS_ERR(clk_out[i])) return PTR_ERR(clk_out[i]); -- cgit From 7cb779a6867fea00b4209bcf6de2f178a743247d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 11 Sep 2023 12:47:30 -0700 Subject: bpf: Clarify error expectations from bpf_clone_redirect Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped packets") exposed the fact that bpf_clone_redirect is capable of returning raw NET_XMIT_XXX return codes. This is in the conflict with its UAPI doc which says the following: "0 on success, or a negative error in case of failure." Update the UAPI to reflect the fact that bpf_clone_redirect can return positive error numbers, but don't explicitly define their meaning. Reported-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230911194731.286342-1-sdf@google.com --- include/uapi/linux/bpf.h | 4 +++- tools/include/uapi/linux/bpf.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8790b3962e4b..0448700890f7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1962,7 +1962,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8790b3962e4b..0448700890f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1962,7 +1962,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Description -- cgit From b772b70b69046c5b76e3f2eda680f692dee5e6d5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 11 Sep 2023 12:47:31 -0700 Subject: selftests/bpf: Update bpf_clone_redirect expected return code Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped packets") started propagating proper NET_XMIT_DROP error to the caller which means it's now possible to get positive error code when calling bpf_clone_redirect() in this particular test. Update the test to reflect that. Reported-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230911194731.286342-2-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/empty_skb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 3b77d8a422db..261228eb68e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -24,6 +24,7 @@ void test_empty_skb(void) int *ifindex; int err; int ret; + int lwt_egress_ret; /* expected retval at lwt/egress */ bool success_on_tc; } tests[] = { /* Empty packets are always rejected. */ @@ -57,6 +58,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &veth_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, .success_on_tc = true, }, { @@ -70,6 +72,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &ipip_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, }, /* ETH_HLEN+1-sized packet should be redirected. */ @@ -79,6 +82,7 @@ void test_empty_skb(void) .data_in = eth_hlen_pp, .data_size_in = sizeof(eth_hlen_pp), .ifindex = &veth_ifindex, + .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */ }, { .msg = "ipip ETH_HLEN+1 packet ingress", @@ -108,8 +112,12 @@ void test_empty_skb(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { bpf_object__for_each_program(prog, bpf_obj->obj) { - char buf[128]; + bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL; bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + int expected_ret; + char buf[128]; + + expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret; tattr.data_in = tests[i].data_in; tattr.data_size_in = tests[i].data_size_in; @@ -128,7 +136,7 @@ void test_empty_skb(void) if (at_tc && tests[i].success_on_tc) ASSERT_GE(bpf_obj->bss->ret, 0, buf); else - ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf); } } -- cgit From eec11486d191c6247e6ffdc898bc31da3cfadcce Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Aug 2023 11:48:12 +0100 Subject: clk: vc3: Make vc3_clk_mux enum values based on vc3_clk enum values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make vc3_clk_mux enum values depend upon vc3_clk enum values to avoid any accidental breakage in the future. Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230824104812.147775-5-biju.das.jz@bp.renesas.com Reviewed-by: Geert Uytterhoeven Signed-off-by: Stephen Boyd --- drivers/clk/clk-versaclock3.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/clk/clk-versaclock3.c b/drivers/clk/clk-versaclock3.c index 157cf510b23a..3d7de355f8f6 100644 --- a/drivers/clk/clk-versaclock3.c +++ b/drivers/clk/clk-versaclock3.c @@ -118,14 +118,6 @@ enum vc3_div { VC3_DIV5, }; -enum vc3_clk_mux { - VC3_SE1_MUX, - VC3_SE2_MUX, - VC3_SE3_MUX, - VC3_DIFF1_MUX, - VC3_DIFF2_MUX, -}; - enum vc3_clk { VC3_REF, VC3_SE1, @@ -135,6 +127,14 @@ enum vc3_clk { VC3_DIFF2, }; +enum vc3_clk_mux { + VC3_SE1_MUX = VC3_SE1 - 1, + VC3_SE2_MUX = VC3_SE2 - 1, + VC3_SE3_MUX = VC3_SE3 - 1, + VC3_DIFF1_MUX = VC3_DIFF1 - 1, + VC3_DIFF2_MUX = VC3_DIFF2 - 1, +}; + struct vc3_clk_data { u8 offs; u8 bitmsk; -- cgit From b7b20cfe6f849c2682c5f7d3f50ede6321a5d04c Mon Sep 17 00:00:00 2001 From: Zhifeng Tang Date: Thu, 24 Aug 2023 17:26:24 +0800 Subject: clk: sprd: Fix thm_parents incorrect configuration The thm*_clk have two clock sources 32k and 250k,excluding 32m. Fixes: af3bd36573e3 ("clk: sprd: Add clocks support for UMS512") Signed-off-by: Zhifeng Tang Acked-by: Chunyan Zhang Reviewed-by: Baolin Wang Link: https://lore.kernel.org/r/20230824092624.20020-1-zhifeng.tang@unisoc.com Signed-off-by: Stephen Boyd --- drivers/clk/sprd/ums512-clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sprd/ums512-clk.c b/drivers/clk/sprd/ums512-clk.c index 8f4441dd572b..9384ecc6c741 100644 --- a/drivers/clk/sprd/ums512-clk.c +++ b/drivers/clk/sprd/ums512-clk.c @@ -800,7 +800,7 @@ static SPRD_MUX_CLK_DATA(uart1_clk, "uart1-clk", uart_parents, 0x250, 0, 3, UMS512_MUX_FLAG); static const struct clk_parent_data thm_parents[] = { - { .fw_name = "ext-32m" }, + { .fw_name = "ext-32k" }, { .hw = &clk_250k.hw }, }; static SPRD_MUX_CLK_DATA(thm0_clk, "thm0-clk", thm_parents, -- cgit From f03a562450eef35b785a814005ed164a89dfb2db Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 31 Aug 2023 20:16:55 +0200 Subject: clk: si521xx: Use REGCACHE_FLAT instead of NONE In order to reload registers into the clock generator on resume using regcache_sync(), it is necessary to select one of the regcache types which are not NONE. Since this device has some 7 registers, use the simplest one, FLAT. The regcache code complains about REGCACHE_NONE being selected and generates a WARNING, this fixes that warning. Fixes: edc12763a3a2 ("clk: si521xx: Clock driver for Skyworks Si521xx I2C PCIe clock generators") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20230831181656.154750-1-marex@denx.de Signed-off-by: Stephen Boyd --- drivers/clk/clk-si521xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-si521xx.c b/drivers/clk/clk-si521xx.c index 4eaf1b53f06b..0b9e2edbbe67 100644 --- a/drivers/clk/clk-si521xx.c +++ b/drivers/clk/clk-si521xx.c @@ -146,7 +146,7 @@ static int si521xx_regmap_i2c_read(void *context, unsigned int reg, static const struct regmap_config si521xx_regmap_config = { .reg_bits = 8, .val_bits = 8, - .cache_type = REGCACHE_NONE, + .cache_type = REGCACHE_FLAT, .max_register = SI521XX_REG_DA, .rd_table = &si521xx_readable_table, .wr_table = &si521xx_writeable_table, -- cgit From 83df5bf010eb5ccc11ce95f2d076515ec216c99c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 31 Aug 2023 20:16:56 +0200 Subject: clk: si521xx: Fix regmap write accessor Rework the write operation such that the Byte Count register is written with a single raw i2c write outside of regmap using transfer which does specify the number of bytes to be transfered, one in this case, and which makes the expected subsequent write transfer look like address+register+data, and then make use of this method. Without this change, the Byte Count register write in probe() would succeed as it would provide the byte count as part of its write payload, but any subsequent writes would fail due to this Byte Count register programming. Such failing writes happens e.g. during resume, when restoring the regmap content. Fixes: edc12763a3a2 ("clk: si521xx: Clock driver for Skyworks Si521xx I2C PCIe clock generators") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20230831181656.154750-2-marex@denx.de Signed-off-by: Stephen Boyd --- drivers/clk/clk-si521xx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-si521xx.c b/drivers/clk/clk-si521xx.c index 0b9e2edbbe67..ef4ba467e747 100644 --- a/drivers/clk/clk-si521xx.c +++ b/drivers/clk/clk-si521xx.c @@ -96,7 +96,7 @@ static int si521xx_regmap_i2c_write(void *context, unsigned int reg, unsigned int val) { struct i2c_client *i2c = context; - const u8 data[3] = { reg, 1, val }; + const u8 data[2] = { reg, val }; const int count = ARRAY_SIZE(data); int ret; @@ -281,9 +281,10 @@ static int si521xx_probe(struct i2c_client *client) { const u16 chip_info = (u16)(uintptr_t)device_get_match_data(&client->dev); const struct clk_parent_data clk_parent_data = { .index = 0 }; - struct si521xx *si; + const u8 data[3] = { SI521XX_REG_BC, 1, 1 }; unsigned char name[6] = "DIFF0"; struct clk_init_data init = {}; + struct si521xx *si; int i, ret; if (!chip_info) @@ -308,7 +309,7 @@ static int si521xx_probe(struct i2c_client *client) "Failed to allocate register map\n"); /* Always read back 1 Byte via I2C */ - ret = regmap_write(si->regmap, SI521XX_REG_BC, 1); + ret = i2c_master_send(client, data, ARRAY_SIZE(data)); if (ret < 0) return ret; -- cgit From 0339dc39a521ead3dbcf101acd8c028c61db57dc Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:03 +0000 Subject: cxl/pci: Fix appropriate checking for _OSC while handling CXL RAS registers cxl_pci fails to unmask CXL protocol errors when CXL memory error reporting is not granted native control. Given that CXL memory error reporting uses the event interface and protocol errors use AER, unmask protocol errors based only on the native AER setting. Without this change end user deployments will fail to report protocol errors in the case where native memory error handling is not granted to Linux. Also, return zero instead of an error code to not block the communication with the cxl device when in native memory error reporting mode. Fixes: 248529edc86f ("cxl: add RAS status unmasking for CXL") Cc: Signed-off-by: Smita Koralahalli Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1cb1494c28fe..2323169b6e5f 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -541,9 +541,9 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) return 0; } - /* BIOS has CXL error control */ - if (!host_bridge->native_cxl_error) - return -ENXIO; + /* BIOS has PCIe AER error control */ + if (!host_bridge->native_aer) + return 0; rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); if (rc) -- cgit From 49f776724e64c27dd861e7ac8da9d42f01d9d172 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:04 +0000 Subject: PCI/AER: Export pcie_aer_is_native() Export and move the declaration of pcie_aer_is_native() to a common header file to be reused by cxl/pci module. Signed-off-by: Smita Koralahalli Acked-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/pci/pcie/aer.c | 1 + drivers/pci/pcie/portdrv.h | 2 -- include/linux/aer.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e85ff946e8c8..9c8fd69ae5ad 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -229,6 +229,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 58a2b1a1cae4..1f3803bde7ee 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -29,10 +29,8 @@ extern bool pcie_ports_dpc_native; #ifdef CONFIG_PCIEAER int pcie_aer_init(void); -int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pcie_aer_init(void) { return 0; } -static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_HOTPLUG_PCI_PCIE diff --git a/include/linux/aer.h b/include/linux/aer.h index 2dd175f5debd..29cc10220952 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,11 +42,13 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif void cper_print_aer(struct pci_dev *dev, int aer_severity, -- cgit From 55b8ff06a0c70e9a6a1696c69f52c0240167d23f Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:05 +0000 Subject: cxl/pci: Replace host_bridge->native_aer with pcie_aer_is_native() Use pcie_aer_is_native() to determine the native AER ownership as the usage of host_bride->native_aer does not cover command line override of AER ownership. Signed-off-by: Smita Koralahalli Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2323169b6e5f..44a21ab7add5 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -529,7 +529,6 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, static int cxl_pci_ras_unmask(struct pci_dev *pdev) { - struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 orig_val, val, mask; @@ -542,7 +541,7 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) } /* BIOS has PCIe AER error control */ - if (!host_bridge->native_aer) + if (!pcie_aer_is_native(pdev)) return 0; rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); -- cgit From a34a9f1a19afe9c60ca0ea61dfeee63a1c2baac8 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 11 Sep 2023 15:28:14 +0200 Subject: bpf: Avoid deadlock when using queue and stack maps from NMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sysbot discovered that the queue and stack maps can deadlock if they are being used from a BPF program that can be called from NMI context (such as one that is attached to a perf HW counter event). To fix this, add an in_nmi() check and use raw_spin_trylock() in NMI context, erroring out if grabbing the lock fails. Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Reported-by: Hsin-Wei Hung Tested-by: Hsin-Wei Hung Co-developed-by: Hsin-Wei Hung Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20230911132815.717240-1-toke@redhat.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 8d2ddcb7566b..d869f51ea93a 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -98,7 +98,12 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) int err = 0; void *ptr; - raw_spin_lock_irqsave(&qs->lock, flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, flags); + } if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -128,7 +133,12 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) void *ptr; u32 index; - raw_spin_lock_irqsave(&qs->lock, flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, flags); + } if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -193,7 +203,12 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; - raw_spin_lock_irqsave(&qs->lock, irq_flags); + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) + return -EBUSY; + } else { + raw_spin_lock_irqsave(&qs->lock, irq_flags); + } if (queue_stack_map_is_full(qs)) { if (!replace) { -- cgit From 1a49f4195d3498fe458a7f5ff7ec5385da70d92e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 12 Sep 2023 03:55:37 +0300 Subject: bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init Fix for a bug observable under the following sequence of events: 1. Create a network device that does not support XDP offload. 2. Load a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag (such programs are not offloaded). 3. Load a device bound XDP program with zero flags (such programs are offloaded). At step (2) __bpf_prog_dev_bound_init() associates with device (1) a dummy bpf_offload_netdev struct with .offdev field set to NULL. At step (3) __bpf_prog_dev_bound_init() would reuse dummy struct allocated at step (2). However, downstream usage of the bpf_offload_netdev assumes that .offdev field can't be NULL, e.g. in bpf_prog_offload_verifier_prep(). Adjust __bpf_prog_dev_bound_init() to require bpf_offload_netdev with non-NULL .offdev for offloaded BPF programs. Fixes: 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") Reported-by: syzbot+291100dcb32190ec02a8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912005539.2248244-2-eddyz87@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/offload.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 3e4f2ec1af06..87d6693d8233 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -199,12 +199,14 @@ static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *n offload->netdev = netdev; ondev = bpf_offload_find_netdev(offload->netdev); + /* When program is offloaded require presence of "true" + * bpf_offload_netdev, avoid the one created for !ondev case below. + */ + if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) { + err = -EINVAL; + goto err_free; + } if (!ondev) { - if (bpf_prog_is_offloaded(prog->aux)) { - err = -EINVAL; - goto err_free; - } - /* When only binding to the device, explicitly * create an entry in the hashtable. */ -- cgit From e4c31164737e9a00de1be6455e2c667ac5478b3c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 12 Sep 2023 03:55:38 +0300 Subject: selftests/bpf: Offloaded prog after non-offloaded should not cause BUG Check what happens if non-offloaded dev bound BPF program is followed by offloaded dev bound program. Test case adapated from syzbot report [1]. [1] https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912005539.2248244-3-eddyz87@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/xdp_dev_bound_only.c | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c new file mode 100644 index 000000000000..7dd18c6d06c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#define LOCAL_NETNS "xdp_dev_bound_only_netns" + +static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags) +{ + struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.prog_flags = flags; + opts.prog_ifindex = ifindex; + return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +/* A test case for bpf_offload_netdev->offload handling bug: + * - create a veth device (does not support offload); + * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag + * (such programs are not offloaded); + * - create a device bound XDP program without flags (such programs are offloaded). + * This might lead to 'BUG: kernel NULL pointer dereference'. + */ +void test_xdp_dev_bound_only_offdev(void) +{ + struct nstoken *tok = NULL; + __u32 ifindex; + int fd1 = -1; + int fd2 = -1; + + SYS(out, "ip netns add " LOCAL_NETNS); + tok = open_netns(LOCAL_NETNS); + if (!ASSERT_OK_PTR(tok, "open_netns")) + goto out; + SYS(out, "ip link add eth42 type veth"); + ifindex = if_nametoindex("eth42"); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) { + perror("if_nametoindex"); + goto out; + } + fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) { + perror("load_dummy_prog #1"); + goto out; + } + /* Program with ifindex is considered offloaded, however veth + * does not support offload => error should be reported. + */ + fd2 = load_dummy_prog("dummy2", ifindex, 0); + ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)"); + +out: + close(fd1); + close(fd2); + close_netns(tok); + /* eth42 was added inside netns, removing the netns will + * also remove eth42 veth pair. + */ + SYS_NOFAIL("ip netns del " LOCAL_NETNS); +} -- cgit From 87d315a34133edcb29c4cadbf196ec6c30dfd47b Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Fri, 25 Aug 2023 13:15:28 +0300 Subject: pinctrl: nuvoton: wpcm450: fix out of bounds write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write into 'pctrl->gpio_bank' happens before the check for GPIO index validity, so out of bounds write may happen. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: a1d1e0e3d80a ("pinctrl: nuvoton: Add driver for WPCM450") Signed-off-by: Mikhail Kobuk Reviewed-by: Alexey Khoroshilov Reviewed-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20230825101532.6624-1-m.kobuk@ispras.ru Signed-off-by: Linus Walleij --- drivers/pinctrl/nuvoton/pinctrl-wpcm450.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c index 2d1c1652cfd9..8a9961ac8712 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c +++ b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c @@ -1062,13 +1062,13 @@ static int wpcm450_gpio_register(struct platform_device *pdev, if (ret < 0) return ret; - gpio = &pctrl->gpio_bank[reg]; - gpio->pctrl = pctrl; - if (reg >= WPCM450_NUM_BANKS) return dev_err_probe(dev, -EINVAL, "GPIO index %d out of range!\n", reg); + gpio = &pctrl->gpio_bank[reg]; + gpio->pctrl = pctrl; + bank = &wpcm450_banks[reg]; gpio->bank = bank; -- cgit From caaaa34eff2a3fc4e61bfccde9e15ae5dba49a7d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 8 Sep 2023 11:12:23 +0100 Subject: ALSA: hda: cs35l56: Call pm_runtime_dont_use_autosuspend() Driver remove() must call pm_runtime_dont_use_autosuspend(). Drivers that call pm_runtime_use_autosuspend() must disable it in driver remove(). Unfortunately until recently this was only mentioned in 1 line in a 900+ line document so most people hadn't noticed this. It has only recently been added to the kerneldoc of pm_runtime_use_autosuspend(). Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://lore.kernel.org/r/20230908101223.2656901-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 76b9c685560b..9e4976bdb5e0 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1003,6 +1003,7 @@ void cs35l56_hda_remove(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_get_sync(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); -- cgit From 60edec9beffebd01a49c005221230f3a61fe6587 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Sep 2023 09:59:44 +0200 Subject: ALSA: docs: Fix a typo of midi2_ump_probe option for snd-usb-audio A simple typo fix: midi2_probe => midi2_ump_probe. Fixes: febdfa0e9c8a ("ALSA: docs: Update MIDI 2.0 documentation for UMP 1.1 enhancement") Link: https://lore.kernel.org/r/20230912075944.14032-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- Documentation/sound/designs/midi-2.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst index 45987f256b97..086487ca7ab1 100644 --- a/Documentation/sound/designs/midi-2.0.rst +++ b/Documentation/sound/designs/midi-2.0.rst @@ -74,8 +74,8 @@ topology based on those information. When the device is older and doesn't respond to the new UMP inquiries, the driver falls back and builds the topology based on Group Terminal Block (GTB) information from the USB descriptor. Some device might be screwed up by the -unexpected UMP command; in such a case, pass `midi2_probe=0` option to -snd-usb-audio driver for skipping the UMP v1.1 inquiries. +unexpected UMP command; in such a case, pass `midi2_ump_probe=0` +option to snd-usb-audio driver for skipping the UMP v1.1 inquiries. When the MIDI 2.0 device is probed, the kernel creates a rawmidi device for each UMP Endpoint of the device. Its device name is -- cgit From 8406d6b5916663b4edc604b3effbf4935b61c2da Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Tue, 5 Sep 2023 20:21:04 +0800 Subject: pinctrl: starfive: jh7110: Fix failure to set irq after CONFIG_PM is enabled The issue was found when we enabled CONFIG_PM and tested edge events using libgpiod. > # gpiomon -r gpiochip0 55 > gpiomon: error waiting for events: Permission denied `gpiomon` will call irq_chip_pm_get() and then call pm_runtime_resume_and_get() if (IS_ENABLED(CONFIG_PM) && sfp->gc.irq.domain->pm_dev). pm_runtime_resume_and_get() will fail if the runtime pm of pinctrl device is disabled. As we expect the pinctrl driver can be always working and never suspend during runtime, unset sfp->gc.irq.domain->pm_dev to make sure pm_runtime_resume_and_get() won't be called when setting irq. Fixes: 447976ab62c5 ("pinctrl: starfive: Add StarFive JH7110 sys controller driver") Signed-off-by: Hal Feng Link: https://lore.kernel.org/r/20230905122105.117000-2-hal.feng@starfivetech.com Signed-off-by: Linus Walleij --- drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index b9081805c8f6..ac6a0abe5534 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -967,8 +967,6 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "could not register gpiochip\n"); - irq_domain_set_pm_device(sfp->gc.irq.domain, dev); - dev_info(dev, "StarFive GPIO chip registered %d GPIOs\n", sfp->gc.ngpio); return pinctrl_enable(sfp->pctl); -- cgit From 64061b67335e958e6328bcb5bb2b5490d57f3f59 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Tue, 5 Sep 2023 20:21:05 +0800 Subject: pinctrl: starfive: jh7110: Add system pm ops to save and restore context Add system pm ops to save and restore pinctrl registers when suspending and resuming the driver, respectively. Signed-off-by: Hal Feng Link: https://lore.kernel.org/r/20230905122105.117000-3-hal.feng@starfivetech.com Signed-off-by: Linus Walleij --- MAINTAINERS | 1 + .../pinctrl/starfive/pinctrl-starfive-jh7110-aon.c | 4 +++ .../pinctrl/starfive/pinctrl-starfive-jh7110-sys.c | 4 +++ drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c | 40 ++++++++++++++++++++++ drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h | 4 +++ 5 files changed, 53 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..cbc16ca7a1fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20489,6 +20489,7 @@ F: include/dt-bindings/clock/starfive?jh71*.h STARFIVE JH71X0 PINCTRL DRIVERS M: Emil Renner Berthing M: Jianlong Huang +M: Hal Feng L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pinctrl/starfive,jh71*.yaml diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c index 4bfe3aa57f8a..cf42e204cbf0 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c @@ -31,6 +31,8 @@ #define JH7110_AON_NGPIO 4 #define JH7110_AON_GC_BASE 64 +#define JH7110_AON_REGS_NUM 37 + /* registers */ #define JH7110_AON_DOEN 0x0 #define JH7110_AON_DOUT 0x4 @@ -145,6 +147,7 @@ static const struct jh7110_pinctrl_soc_info jh7110_aon_pinctrl_info = { .gpi_mask = GENMASK(3, 0), .gpioin_reg_base = JH7110_AON_GPIOIN, .irq_reg = &jh7110_aon_irq_reg, + .nsaved_regs = JH7110_AON_REGS_NUM, .jh7110_set_one_pin_mux = jh7110_aon_set_one_pin_mux, .jh7110_get_padcfg_base = jh7110_aon_get_padcfg_base, .jh7110_gpio_irq_handler = jh7110_aon_irq_handler, @@ -165,6 +168,7 @@ static struct platform_driver jh7110_aon_pinctrl_driver = { .driver = { .name = "starfive-jh7110-aon-pinctrl", .of_match_table = jh7110_aon_pinctrl_of_match, + .pm = pm_sleep_ptr(&jh7110_pinctrl_pm_ops), }, }; module_platform_driver(jh7110_aon_pinctrl_driver); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c index 20c85db1cd3a..03c2ad808d61 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c @@ -31,6 +31,8 @@ #define JH7110_SYS_NGPIO 64 #define JH7110_SYS_GC_BASE 0 +#define JH7110_SYS_REGS_NUM 174 + /* registers */ #define JH7110_SYS_DOEN 0x000 #define JH7110_SYS_DOUT 0x040 @@ -417,6 +419,7 @@ static const struct jh7110_pinctrl_soc_info jh7110_sys_pinctrl_info = { .gpi_mask = GENMASK(6, 0), .gpioin_reg_base = JH7110_SYS_GPIOIN, .irq_reg = &jh7110_sys_irq_reg, + .nsaved_regs = JH7110_SYS_REGS_NUM, .jh7110_set_one_pin_mux = jh7110_sys_set_one_pin_mux, .jh7110_get_padcfg_base = jh7110_sys_get_padcfg_base, .jh7110_gpio_irq_handler = jh7110_sys_irq_handler, @@ -437,6 +440,7 @@ static struct platform_driver jh7110_sys_pinctrl_driver = { .driver = { .name = "starfive-jh7110-sys-pinctrl", .of_match_table = jh7110_sys_pinctrl_of_match, + .pm = pm_sleep_ptr(&jh7110_pinctrl_pm_ops), }, }; module_platform_driver(jh7110_sys_pinctrl_driver); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index ac6a0abe5534..640f827a9b2c 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -872,6 +872,13 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) if (!sfp) return -ENOMEM; +#if IS_ENABLED(CONFIG_PM_SLEEP) + sfp->saved_regs = devm_kcalloc(dev, info->nsaved_regs, + sizeof(*sfp->saved_regs), GFP_KERNEL); + if (!sfp->saved_regs) + return -ENOMEM; +#endif + sfp->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sfp->base)) return PTR_ERR(sfp->base); @@ -973,6 +980,39 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(jh7110_pinctrl_probe); +static int jh7110_pinctrl_suspend(struct device *dev) +{ + struct jh7110_pinctrl *sfp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + raw_spin_lock_irqsave(&sfp->lock, flags); + for (i = 0 ; i < sfp->info->nsaved_regs ; i++) + sfp->saved_regs[i] = readl_relaxed(sfp->base + 4 * i); + + raw_spin_unlock_irqrestore(&sfp->lock, flags); + return 0; +} + +static int jh7110_pinctrl_resume(struct device *dev) +{ + struct jh7110_pinctrl *sfp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + raw_spin_lock_irqsave(&sfp->lock, flags); + for (i = 0 ; i < sfp->info->nsaved_regs ; i++) + writel_relaxed(sfp->saved_regs[i], sfp->base + 4 * i); + + raw_spin_unlock_irqrestore(&sfp->lock, flags); + return 0; +} + +const struct dev_pm_ops jh7110_pinctrl_pm_ops = { + LATE_SYSTEM_SLEEP_PM_OPS(jh7110_pinctrl_suspend, jh7110_pinctrl_resume) +}; +EXPORT_SYMBOL_GPL(jh7110_pinctrl_pm_ops); + MODULE_DESCRIPTION("Pinctrl driver for the StarFive JH7110 SoC"); MODULE_AUTHOR("Emil Renner Berthing "); MODULE_AUTHOR("Jianlong Huang "); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h index 3f20b7ff96dd..a33d0d4e1382 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h @@ -21,6 +21,7 @@ struct jh7110_pinctrl { /* register read/write mutex */ struct mutex mutex; const struct jh7110_pinctrl_soc_info *info; + u32 *saved_regs; }; struct jh7110_gpio_irq_reg { @@ -50,6 +51,8 @@ struct jh7110_pinctrl_soc_info { const struct jh7110_gpio_irq_reg *irq_reg; + unsigned int nsaved_regs; + /* generic pinmux */ int (*jh7110_set_one_pin_mux)(struct jh7110_pinctrl *sfp, unsigned int pin, @@ -66,5 +69,6 @@ void jh7110_set_gpiomux(struct jh7110_pinctrl *sfp, unsigned int pin, unsigned int din, u32 dout, u32 doen); int jh7110_pinctrl_probe(struct platform_device *pdev); struct jh7110_pinctrl *jh7110_from_irq_desc(struct irq_desc *desc); +extern const struct dev_pm_ops jh7110_pinctrl_pm_ops; #endif /* __PINCTRL_STARFIVE_JH7110_H__ */ -- cgit From 22eefaeab03fe968ab7786fb3d5c5abd203a8bab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Sep 2023 10:51:44 +0200 Subject: ALSA: seq: Avoid delivery of events for disabled UMP groups ALSA sequencer core still delivers events to the disabled UMP Group, leaving this handling to the device. But it's rather risky and it's easy to imagine that such an unexpected event may screw up the device firmware. This patch avoids the superfluous event deliveries by setting the group_filter of the UMP client as default, and evaluate the group_filter properly at delivery from non-UMP clients. The grouop_filter is updated upon the dynamic UMP Function Block updates, so that it follows the change of the disabled UMP Groups, too. Fixes: d2b706077792 ("ALSA: seq: Add UMP group filter") Link: https://lore.kernel.org/r/20230912085144.32534-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 22 ++++++++++++++++++++++ sound/core/seq/seq_ump_convert.c | 2 ++ 2 files changed, 24 insertions(+) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index f26a1812dfa7..a60e3f069a80 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -416,6 +416,25 @@ static void setup_client_midi_version(struct seq_ump_client *client) snd_seq_kernel_client_put(cptr); } +/* set up client's group_filter bitmap */ +static void setup_client_group_filter(struct seq_ump_client *client) +{ + struct snd_seq_client *cptr; + unsigned int filter; + int p; + + cptr = snd_seq_kernel_client_get(client->seq_client); + if (!cptr) + return; + filter = ~(1U << 0); /* always allow groupless messages */ + for (p = 0; p < SNDRV_UMP_MAX_GROUPS; p++) { + if (client->groups[p].active) + filter &= ~(1U << (p + 1)); + } + cptr->group_filter = filter; + snd_seq_kernel_client_put(cptr); +} + /* UMP group change notification */ static void handle_group_notify(struct work_struct *work) { @@ -424,6 +443,7 @@ static void handle_group_notify(struct work_struct *work) update_group_attrs(client); update_port_infos(client); + setup_client_group_filter(client); } /* UMP FB change notification */ @@ -492,6 +512,8 @@ static int snd_seq_ump_probe(struct device *_dev) goto error; } + setup_client_group_filter(client); + err = create_ump_endpoint_port(client); if (err < 0) goto error; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index 7cc84e137999..b141024830ec 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1197,6 +1197,8 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, struct snd_seq_event *event, int atomic, int hop) { + if (dest->group_filter & (1U << dest_port->ump_group)) + return 0; /* group filtered - skip the event */ if (event->type == SNDRV_SEQ_EVENT_SYSEX) return cvt_sysex_to_ump(dest, dest_port, event, atomic, hop); else if (snd_seq_client_is_midi2(dest)) -- cgit From fb6254df09bba303db2a1002085f6c0b90a456ed Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 12 Sep 2023 15:31:49 +0800 Subject: ALSA: hda/realtek - Fixed two speaker platform If system has two speakers and one connect to 0x14 pin, use this function will disable it. Fixes: e43252db7e20 ("ALSA: hda/realtek - ALC287 I2S speaker platform support") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/e3f2aac3fe6a47079d728a6443358cc2@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b7e78bfcffd8..887c1b163865 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7073,8 +7073,10 @@ static void alc287_fixup_bind_dacs(struct hda_codec *codec, snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); spec->gen.preferred_dacs = preferred_pairs; spec->gen.auto_mute_via_amp = 1; - snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - 0x0); /* Make sure 0x14 was disable */ + if (spec->gen.autocfg.speaker_pins[0] != 0x14) { + snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, + 0x0); /* Make sure 0x14 was disable */ + } } -- cgit From 3579dc742f76207a4854a87a8e3ce44434d7e308 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Aug 2023 15:46:49 +0100 Subject: KVM: arm64: Properly return allocated EL2 VA from hyp_alloc_private_va_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marek reports that his RPi4 spits out a warning at boot time, right at the point where the GICv2 virtual CPU interface gets mapped. Upon investigation, it seems that we never return the allocated VA and use whatever was on the stack at this point. Yes, this is good stuff, and Marek was pretty lucky that he ended-up with a VA that intersected with something that was already mapped. On my setup, this random value is plausible enough for the mapping to take place. Who knows what happens... Fixes: f156a7d13fc3 ("KVM: arm64: Remove size-order align in the nVHE hyp private VA range") Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Vincent Donnefort Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/79b0ad6e-0c2a-f777-d504-e40e8123d81d@samsung.com Link: https://lore.kernel.org/r/20230828153121.4179627-1-maz@kernel.org --- arch/arm64/kvm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 587a104f66c3..482280fe22d7 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -652,6 +652,9 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) mutex_unlock(&kvm_hyp_pgd_mutex); + if (!ret) + *haddr = base; + return ret; } -- cgit From 373beef00f7d781a000b12c31fb17a5a9c25969c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 11 Sep 2023 15:52:57 +0100 Subject: KVM: arm64: nvhe: Ignore SVE hint in SMCCC function ID When SVE is enabled, the host may set bit 16 in SMCCC function IDs, a hint that indicates an unused SVE state. At the moment NVHE doesn't account for this bit when inspecting the function ID, and rejects most calls. Clear the hint bit before comparing function IDs. About version compatibility: the host's PSCI driver initially probes the firmware for a SMCCC version number. If the firmware implements a protocol recent enough (1.3), subsequent SMCCC calls have the hint bit set. Since the hint bit was reserved in earlier versions of the protocol, clearing it is fine regardless of the version in use. When a new hint is added to the protocol in the future, it will be added to ARM_SMCCC_CALL_HINTS and NVHE will handle it straight away. This patch only clears known hints and leaves reserved bits as is, because future SMCCC versions could use reserved bits as modifiers for the function ID, rather than hints. Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Reported-by: Ben Horgan Signed-off-by: Jean-Philippe Brucker Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230911145254.934414-4-jean-philippe@linaro.org --- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/ffa.h | 2 +- arch/arm64/kvm/hyp/nvhe/ffa.c | 3 +-- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 8 ++++++-- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +-- include/linux/arm-smccc.h | 2 ++ 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b7238c72a04c..66efd67ea7e8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,7 +118,7 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); u64 __guest_enter(struct kvm_vcpu *vcpu); -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h index 1becb10ecd80..d9fd5e6c7d3c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -12,6 +12,6 @@ #define FFA_MAX_FUNC_NUM 0x7F int hyp_ffa_init(void *pages); -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #endif /* __KVM_HYP_FFA_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index ab4f5d160c58..6e4dba9eadef 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -634,9 +634,8 @@ out_handled: return true; } -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); struct arm_smccc_res res; /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 90fade1b032e..1cc06e6797bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,6 +57,7 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc + bic x0, x0, #ARM_SMCCC_CALL_HINTS mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) cmp x0, x3 b.eq 1f diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 857d9bc04fd4..2385fd03ed87 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -368,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) @@ -392,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { + DECLARE_REG(u64, func_id, host_ctxt, 0); bool handled; - handled = kvm_host_psci_handler(host_ctxt); + func_id &= ~ARM_SMCCC_CALL_HINTS; + + handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) - handled = kvm_host_ffa_handler(host_ctxt); + handled = kvm_host_ffa_handler(host_ctxt, func_id); if (!handled) default_host_smc_handler(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 24543d2a3490..d57bcb6ab94d 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ } } -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; switch (kvm_host_psci_config.version) { diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7c67c17321d4..083f85653716 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -67,6 +67,8 @@ #define ARM_SMCCC_VERSION_1_3 0x10003 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 +#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT + #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit From 1263cc0f414d212129c0f1289b49b7df77f92084 Mon Sep 17 00:00:00 2001 From: August Wikerfors Date: Mon, 11 Sep 2023 23:34:09 +0200 Subject: ASoC: amd: yc: Fix non-functional mic on Lenovo 82QF and 82UG Like the Lenovo 82TL and 82V2, the Lenovo 82QF (Yoga 7 14ARB7) and 82UG (Legion S7 16ARHA7) both need a quirk entry for the internal microphone to function. Commit c008323fe361 ("ASoC: amd: yc: Fix a non-functional mic on Lenovo 82SJ") restricted the quirk that previously matched "82" to "82V2", breaking microphone functionality on these devices. Fix this by adding specific quirks for these models, as was done for the Lenovo 82TL. Fixes: c008323fe361 ("ASoC: amd: yc: Fix a non-functional mic on Lenovo 82SJ") Closes: https://github.com/tomsom/yoga-linux/issues/51 Link: https://bugzilla.kernel.org/show_bug.cgi?id=208555#c780 Cc: stable@vger.kernel.org Signed-off-by: August Wikerfors Link: https://lore.kernel.org/r/20230911213409.6106-1-git@augustwikerfors.se Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 59aa2e9d3a79..94e9eb8e73f2 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -213,6 +213,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -220,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82TL"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UG"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit From 5873d380f4c0ff23ec7d0d1780107e46a4637c0e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 5 Sep 2023 15:19:25 +0200 Subject: irqchip/qcom-pdc: Add support for v3.2 HW Starting from HW version 3.2 the IRQ_ENABLE bit has moved to the IRQ_i_CFG register and requires a change of the driver to avoid writing into an undefined register address. Get the HW version from registers and set the IRQ_ENABLE bit to the correct register depending on the HW version. Signed-off-by: Dmitry Baryshkov Reviewed-by: Maulik Shah Signed-off-by: Neil Armstrong Acked-by: Konrad Dybcio Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230905-topic-sm8x50-upstream-pdc-ver-v4-1-fc633c7df84b@linaro.org --- drivers/irqchip/qcom-pdc.c | 69 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index a32c0d28d038..74b2f124116e 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -22,9 +22,20 @@ #define PDC_MAX_GPIO_IRQS 256 +/* Valid only on HW version < 3.2 */ #define IRQ_ENABLE_BANK 0x10 #define IRQ_i_CFG 0x110 +/* Valid only on HW version >= 3.2 */ +#define IRQ_i_CFG_IRQ_ENABLE 3 + +#define IRQ_i_CFG_TYPE_MASK GENMASK(2, 0) + +#define PDC_VERSION_REG 0x1000 + +/* Notable PDC versions */ +#define PDC_VERSION_3_2 0x30200 + struct pdc_pin_region { u32 pin_base; u32 parent_base; @@ -37,6 +48,7 @@ static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; static struct pdc_pin_region *pdc_region; static int pdc_region_cnt; +static unsigned int pdc_version; static void pdc_reg_write(int reg, u32 i, u32 val) { @@ -48,20 +60,32 @@ static u32 pdc_reg_read(int reg, u32 i) return readl_relaxed(pdc_base + reg + i * sizeof(u32)); } -static void pdc_enable_intr(struct irq_data *d, bool on) +static void __pdc_enable_intr(int pin_out, bool on) { - int pin_out = d->hwirq; unsigned long enable; - unsigned long flags; - u32 index, mask; - index = pin_out / 32; - mask = pin_out % 32; + if (pdc_version < PDC_VERSION_3_2) { + u32 index, mask; + + index = pin_out / 32; + mask = pin_out % 32; + + enable = pdc_reg_read(IRQ_ENABLE_BANK, index); + __assign_bit(mask, &enable, on); + pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + } else { + enable = pdc_reg_read(IRQ_i_CFG, pin_out); + __assign_bit(IRQ_i_CFG_IRQ_ENABLE, &enable, on); + pdc_reg_write(IRQ_i_CFG, pin_out, enable); + } +} + +static void pdc_enable_intr(struct irq_data *d, bool on) +{ + unsigned long flags; raw_spin_lock_irqsave(&pdc_lock, flags); - enable = pdc_reg_read(IRQ_ENABLE_BANK, index); - __assign_bit(mask, &enable, on); - pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + __pdc_enable_intr(d->hwirq, on); raw_spin_unlock_irqrestore(&pdc_lock, flags); } @@ -142,6 +166,7 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type) } old_pdc_type = pdc_reg_read(IRQ_i_CFG, d->hwirq); + pdc_type |= (old_pdc_type & ~IRQ_i_CFG_TYPE_MASK); pdc_reg_write(IRQ_i_CFG, d->hwirq, pdc_type); ret = irq_chip_set_type_parent(d, type); @@ -246,7 +271,6 @@ static const struct irq_domain_ops qcom_pdc_ops = { static int pdc_setup_pin_mapping(struct device_node *np) { int ret, n, i; - u32 irq_index, reg_index, val; n = of_property_count_elems_of_size(np, "qcom,pdc-ranges", sizeof(u32)); if (n <= 0 || n % 3) @@ -276,29 +300,38 @@ static int pdc_setup_pin_mapping(struct device_node *np) if (ret) return ret; - for (i = 0; i < pdc_region[n].cnt; i++) { - reg_index = (i + pdc_region[n].pin_base) >> 5; - irq_index = (i + pdc_region[n].pin_base) & 0x1f; - val = pdc_reg_read(IRQ_ENABLE_BANK, reg_index); - val &= ~BIT(irq_index); - pdc_reg_write(IRQ_ENABLE_BANK, reg_index, val); - } + for (i = 0; i < pdc_region[n].cnt; i++) + __pdc_enable_intr(i + pdc_region[n].pin_base, 0); } return 0; } +#define QCOM_PDC_SIZE 0x30000 + static int qcom_pdc_init(struct device_node *node, struct device_node *parent) { struct irq_domain *parent_domain, *pdc_domain; + resource_size_t res_size; + struct resource res; int ret; - pdc_base = of_iomap(node, 0); + /* compat with old sm8150 DT which had very small region for PDC */ + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + res_size = max_t(resource_size_t, resource_size(&res), QCOM_PDC_SIZE); + if (res_size > resource_size(&res)) + pr_warn("%pOF: invalid reg size, please fix DT\n", node); + + pdc_base = ioremap(res.start, res_size); if (!pdc_base) { pr_err("%pOF: unable to map PDC registers\n", node); return -ENXIO; } + pdc_version = pdc_reg_read(PDC_VERSION_REG, 0); + parent_domain = irq_find_host(parent); if (!parent_domain) { pr_err("%pOF: unable to find PDC's parent domain\n", node); -- cgit From cf5716acbfc6190b3f97f4614affdf5991aed7b2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 5 Sep 2023 15:19:26 +0200 Subject: arm64: dts: qcom: sm8150: extend the size of the PDC resource Follow the example of other platforms and extend the PDC resource region to 0x30000, so that the PDC driver can read the PDC_VERSION register. Fixes: 397ad94668c1 ("arm64: dts: qcom: sm8150: Add pdc interrupt controller node") Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230905-topic-sm8x50-upstream-pdc-ver-v4-2-fc633c7df84b@linaro.org --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index a7c3020a5de4..06c53000bb74 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3958,7 +3958,7 @@ pdc: interrupt-controller@b220000 { compatible = "qcom,sm8150-pdc", "qcom,pdc"; - reg = <0 0x0b220000 0 0x400>; + reg = <0 0x0b220000 0 0x30000>; qcom,pdc-ranges = <0 480 94>, <94 609 31>, <125 63 1>; #interrupt-cells = <2>; -- cgit From cfa1f9db6d6088118ef311c0927c66072665b47e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 22 Jul 2022 16:11:54 +0100 Subject: dt-bindings: interrupt-controller: renesas,rzg2l-irqc: Update description for '#interrupt-cells' property Update description for '#interrupt-cells' property to utilize the RZG2L_{NMI,IRQX} for the first cell defined in the include/dt-bindings/interrupt-controller/irqc-rzg2l.h file. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Fixes: 96fed779d3d4cb3c ("dt-bindings: interrupt-controller: Add Renesas RZ/G2L Interrupt Controller") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220722151155.21100-3-prabhakar.mahadev-lad.rj@bp.renesas.com --- .../devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml index 33b90e975e33..ea7db3618b23 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml @@ -31,8 +31,9 @@ properties: - const: renesas,rzg2l-irqc '#interrupt-cells': - description: The first cell should contain external interrupt number (IRQ0-7) and the - second cell is used to specify the flag. + description: The first cell should contain a macro RZG2L_{NMI,IRQX} included in the + include/dt-bindings/interrupt-controller/irqc-rzg2l.h and the second + cell is used to specify the flag. const: 2 '#address-cells': -- cgit From 091c2848b0f7643eeb44abc1e7ba8f9ef5eb366f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 12 Sep 2023 14:01:13 +0300 Subject: ALSA: core: Use dev_name of card_dev as debugfs directory name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to use temporary string for the debugfs directory name as we can use the device name of the card. This change will also fixes the following compiler warning/error (W=1): sound/core/init.c: In function ‘snd_card_init’: sound/core/init.c:367:28: error: ‘%d’ directive writing between 1 and 10 bytes into a region of size 4 [-Werror=format-overflow=] 367 | sprintf(name, "card%d", idx); | ^~ sound/core/init.c:367:23: note: directive argument in the range [0, 2147483646] 367 | sprintf(name, "card%d", idx); | ^~~~~~~~ sound/core/init.c:367:9: note: ‘sprintf’ output between 6 and 15 bytes into a destination of size 8 367 | sprintf(name, "card%d", idx); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The idx is guarantied to be less than SNDRV_CARDS (max 256 or 8) by the code in snd_card_init(), however the compiler does not see that. The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Suggested-by: Arnd Bergmann Suggested-by: Takashi Iwai Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230912110113.3166-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/core/init.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index d61bde1225f2..22c0d217b860 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -278,9 +278,6 @@ static int snd_card_init(struct snd_card *card, struct device *parent, size_t extra_size) { int err; -#ifdef CONFIG_SND_DEBUG - char name[8]; -#endif if (extra_size > 0) card->private_data = (char *)card + sizeof(struct snd_card); @@ -364,8 +361,8 @@ static int snd_card_init(struct snd_card *card, struct device *parent, } #ifdef CONFIG_SND_DEBUG - sprintf(name, "card%d", idx); - card->debugfs_root = debugfs_create_dir(name, sound_debugfs_root); + card->debugfs_root = debugfs_create_dir(dev_name(&card->card_dev), + sound_debugfs_root); #endif return 0; -- cgit From fa6a0c0c1dd53b3949ca56bf7213648dfd6a62ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:40 +0200 Subject: ASoC: rt5640: Revert "Fix sleep in atomic context" Commit 70a6404ff610 ("ASoC: rt5640: Fix sleep in atomic context") not only switched from request_irq() to request_threaded_irq(), to fix the sleep in atomic context issue, but it also added devm management of the IRQ by actually switching to devm_request_threaded_irq() (without any explanation in the commit message for this change). This is wrong since the IRQ was already explicitly managed by the driver. On unbind the ASoC core will call rt5640_set_jack(NULL) which in turn will call rt5640_disable_jack_detect() which frees the IRQ already. So now we have a double free. Besides the unexplained switch to devm being wrong, the actual fix for the sleep in atomic context issue also is not the best solution. The only thing which rt5640_irq() does is cancel + (re-)queue the jack_work delayed_work. This can be done in a single non sleeping call by replacing queue_delayed_work() with mod_delayed_work(), which does not sleep. Using mod_delayed_work() is a much better fix then adding a thread which does nothing other then queuing a work-item. This patch is a straight revert of the troublesome changes, the switch to mod_delayed_work() is done in a separate follow-up patch. Fixes: 70a6404ff610 ("ASoC: rt5640: Fix sleep in atomic context") Cc: Sameer Pujar Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 15e1a62b9e57..05ff8066171b 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2565,10 +2565,9 @@ static void rt5640_enable_jack_detect(struct snd_soc_component *component, if (jack_data && jack_data->use_platform_clock) rt5640->use_platform_clock = jack_data->use_platform_clock; - ret = devm_request_threaded_irq(component->dev, rt5640->irq, - NULL, rt5640_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - "rt5640", rt5640); + ret = request_irq(rt5640->irq, rt5640_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); rt5640_disable_jack_detect(component); @@ -2621,9 +2620,8 @@ static void rt5640_enable_hda_jack_detect( rt5640->jack = jack; - ret = devm_request_threaded_irq(component->dev, rt5640->irq, - NULL, rt5640_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT, - "rt5640", rt5640); + ret = request_irq(rt5640->irq, rt5640_irq, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); rt5640->irq = -ENXIO; -- cgit From df7d595f6bd9dc96cc275cc4b0f313fcfa423c58 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:41 +0200 Subject: ASoC: rt5640: Fix sleep in atomic context Following prints are observed while testing audio on Jetson AGX Orin which has onboard RT5640 audio codec: BUG: sleeping function called from invalid context at kernel/workqueue.c:3027 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/0 preempt_count: 10001, expected: 0 RCU nest depth: 0, expected: 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x1e0/0x270 ---[ end trace ad1c64905aac14a6 ]- The IRQ handler rt5640_irq() runs in interrupt context and can sleep during cancel_delayed_work_sync(). The only thing which rt5640_irq() does is cancel + (re-)queue the jack_work delayed_work. This can be done in a single non sleeping call by replacing queue_delayed_work() with mod_delayed_work(), avoiding the sleep in atomic context. Fixes: 051dade34695 ("ASoC: rt5640: Fix the wrong state of JD1 and JD2") Reported-by: Sameer Pujar Closes: https://lore.kernel.org/r/1688015537-31682-4-git-send-email-spujar@nvidia.com Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 05ff8066171b..5c34c045d396 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2403,13 +2403,11 @@ static irqreturn_t rt5640_irq(int irq, void *data) struct rt5640_priv *rt5640 = data; int delay = 0; - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { - cancel_delayed_work_sync(&rt5640->jack_work); + if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) delay = 100; - } if (rt5640->jack) - queue_delayed_work(system_long_wq, &rt5640->jack_work, delay); + mod_delayed_work(system_long_wq, &rt5640->jack_work, delay); return IRQ_HANDLED; } -- cgit From 786120ebb649b166021f0212250e8627e53d068a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:42 +0200 Subject: ASoC: rt5640: Do not disable/enable IRQ twice on suspend/resume When jack-detect was originally added disabling the IRQ during suspend was done by the sound/soc/intel/boards/bytcr_rt5640.c driver calling snd_soc_component_set_jack(NULL) on suspend, which calls rt5640_disable_jack_detect(), which calls free_irq() which also disables it. Commit 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") added disable_irq() / enable_irq() calls on suspend/resume for machine drivers which do not call snd_soc_component_set_jack(NULL) on suspend. The new disable_irq() / enable_irq() are made conditional by "if (rt5640->irq)" statements, but this is true for the machine drivers which do call snd_soc_component_set_jack(NULL) on suspend too, causing a disable_irq() call there on the already free-ed IRQ. Change the "if (rt5640->irq)" condition to "if (rt5640->jack)" to fix this, rt5640->jack is only set if the jack-detect IRQ handler is still active when rt5640_suspend() runs. And adjust rt5640_enable_hda_jack_detect()'s request_irq() error handling to set rt5640->jack to NULL to match (note that the old setting of irq to -ENOXIO still resulted in disable_irq(-ENOXIO) calls on suspend). Fixes: 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-4-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 5c34c045d396..1bc281d42ca8 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2622,7 +2622,7 @@ static void rt5640_enable_hda_jack_detect( IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640); if (ret) { dev_warn(component->dev, "Failed to request IRQ %d: %d\n", rt5640->irq, ret); - rt5640->irq = -ENXIO; + rt5640->jack = NULL; return; } @@ -2797,7 +2797,7 @@ static int rt5640_suspend(struct snd_soc_component *component) { struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component); - if (rt5640->irq) { + if (rt5640->jack) { /* disable jack interrupts during system suspend */ disable_irq(rt5640->irq); } @@ -2825,10 +2825,9 @@ static int rt5640_resume(struct snd_soc_component *component) regcache_cache_only(rt5640->regmap, false); regcache_sync(rt5640->regmap); - if (rt5640->irq) + if (rt5640->jack) { enable_irq(rt5640->irq); - if (rt5640->jack) { if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { snd_soc_component_update_bits(component, RT5640_DUMMY2, 0x1100, 0x1100); -- cgit From b5e85e535551bf82242aa5896e14a136ed3c156d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:43 +0200 Subject: ASoC: rt5640: Enable the IRQ on resume after configuring jack-detect The jack-detect IRQ should be enabled *after* the jack-detect related configuration registers have been programmed. Move the enable_irq() call for this to after the register setup. Fixes: 5fabcc90e79b ("ASoC: rt5640: Fix Jack work after system suspend") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-5-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 1bc281d42ca8..03c866c04c7a 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2826,8 +2826,6 @@ static int rt5640_resume(struct snd_soc_component *component) regcache_sync(rt5640->regmap); if (rt5640->jack) { - enable_irq(rt5640->irq); - if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { snd_soc_component_update_bits(component, RT5640_DUMMY2, 0x1100, 0x1100); @@ -2854,6 +2852,7 @@ static int rt5640_resume(struct snd_soc_component *component) } } + enable_irq(rt5640->irq); queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); } -- cgit From 8c8bf3df6b7c0ed1c4dd373b23eb0ce13a63f452 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:44 +0200 Subject: ASoC: rt5640: Fix IRQ not being free-ed for HDA jack detect mode Set "rt5640->irq_requested = true" after a successful request_irq() in rt5640_enable_hda_jack_detect(), so that rt5640_disable_jack_detect() properly frees the IRQ. This fixes the IRQ not being freed on rmmod / driver unbind. Fixes: 2b9c8d2b3c89 ("ASoC: rt5640: Add the HDA header support") Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-6-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 03c866c04c7a..a4a11407ab10 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2625,6 +2625,7 @@ static void rt5640_enable_hda_jack_detect( rt5640->jack = NULL; return; } + rt5640->irq_requested = true; /* sync initial jack state */ queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); -- cgit From 8fc7cc507d61fc655172836c74fb7fcc8b7a978b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 13:32:45 +0200 Subject: ASoC: rt5640: Only cancel jack-detect work on suspend if active If jack-detection is not used; or has already been disabled then there is no need to call rt5640_cancel_work(). Move the rt5640_cancel_work() inside the "if (rt5640->jack) {}" block, grouping it together with the disabling of the IRQ which queues the work in the first place. This also makes suspend() symetrical with resume() which re-queues the work in an "if (rt5640->jack) {}" block. Cc: Oder Chiou Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230912113245.320159-7-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index a4a11407ab10..e8cdc166bdaa 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2801,9 +2801,9 @@ static int rt5640_suspend(struct snd_soc_component *component) if (rt5640->jack) { /* disable jack interrupts during system suspend */ disable_irq(rt5640->irq); + rt5640_cancel_work(rt5640); } - rt5640_cancel_work(rt5640); snd_soc_component_force_bias_level(component, SND_SOC_BIAS_OFF); rt5640_reset(component); regcache_cache_only(rt5640->regmap, true); -- cgit From 02d89917ef68acbe65c7cc2323f1db4429879878 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 4 Aug 2023 17:52:11 +0200 Subject: rtla/timerlat_aa: Zero thread sum after every sample analysis The thread thread_thread_sum accounts for thread interference during a single activation. It was not being zeroed, so it was accumulating thread interference over all activations. It was not that visible when timerlat was the highest priority. Link: https://lore.kernel.org/lkml/97bff55b0141f2d01b47d9450a5672fde147b89a.1691162043.git.bristot@kernel.org Fixes: 27e348b221f6 ("rtla/timerlat: Add auto-analysis core") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/timerlat_aa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index e0ffe69c271c..dec5b4c4511e 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -159,6 +159,7 @@ static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, taa_data->thread_nmi_sum = 0; taa_data->thread_irq_sum = 0; taa_data->thread_softirq_sum = 0; + taa_data->thread_thread_sum = 0; taa_data->thread_blocking_duration = 0; taa_data->timer_irq_start_time = 0; taa_data->timer_irq_duration = 0; -- cgit From 6c73daf26420b97fb8b4a620e4ffee5c1f9d44d1 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 4 Aug 2023 17:52:12 +0200 Subject: rtla/timerlat_aa: Fix negative IRQ delay When estimating the IRQ timer delay, we are dealing with two different clock sources: the external clock source that timerlat uses as a reference and the clock used by the tracer. There are also two moments: the time reading the clock and the timer in which the event is placed in the buffer (the trace event timestamp). If the processor is slow or there is some hardware noise, the difference between the timestamp and the external clock, read can be longer than the IRQ handler delay, resulting in a negative time. If so, set IRQ to start delay as 0. In the end, it is less near-zero and relevant then the noise. Link: https://lore.kernel.org/lkml/a066fb667c7136d86dcddb3c7ccd72587db3e7c7.1691162043.git.bristot@kernel.org Fixes: 27e348b221f6 ("rtla/timerlat: Add auto-analysis core") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/timerlat_aa.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index dec5b4c4511e..baf1efda0581 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -338,7 +338,23 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor taa_data->timer_irq_start_time = start; taa_data->timer_irq_duration = duration; - taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + /* + * We are dealing with two different clock sources: the + * external clock source that timerlat uses as a reference + * and the clock used by the tracer. There are also two + * moments: the time reading the clock and the timer in + * which the event is placed in the buffer (the trace + * event timestamp). If the processor is slow or there + * is some hardware noise, the difference between the + * timestamp and the external clock read can be longer + * than the IRQ handler delay, resulting in a negative + * time. If so, set IRQ start delay as 0. In the end, + * it is less relevant than the noise. + */ + if (expected_start < taa_data->timer_irq_start_time) + taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + else + taa_data->timer_irq_start_delay = 0; /* * not exit from idle. -- cgit From 301deca09b254965661d3e971f1a60ac2ce41f5f Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 4 Aug 2023 17:52:13 +0200 Subject: rtla/timerlat_aa: Fix previous IRQ delay for IRQs that happens after thread sample timerlat auto-analysis takes note of all IRQs, before or after the execution of the timerlat thread. Because we cannot go backward in the trace (we will fix it when moving to trace-cmd lib?), timerlat aa take note of the last IRQ execution in the waiting for the IRQ state, and then print it if it is executed after the expected timer IRQ starting time. After the thread sample, the timerlat starts recording the next IRQs as "previous" irq for the next occurrence. However, if an IRQ happens after the thread measurement but before the tracing stops, it is classified as a previous IRQ. That is not wrong, as it can be "previous" for the subsequent activation. What is wrong is considering it as a potential source for the last activation. Ignore the IRQ interference that happens after the IRQ starting time for now. A future improvement for timerlat can be either keeping a list of previous IRQ execution or using the trace-cmd library. Still, it requires further investigation - it is a new feature. Link: https://lore.kernel.org/lkml/a44a3f5c801dcc697bacf7325b65d4a5b0460537.1691162043.git.bristot@kernel.org Fixes: 27e348b221f6 ("rtla/timerlat: Add auto-analysis core") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/timerlat_aa.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index baf1efda0581..7093fd5333be 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -545,7 +545,7 @@ static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_rec static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, int irq_thresh, int thread_thresh) { - unsigned long long exp_irq_ts; + long long exp_irq_ts; int total; int irq; @@ -562,12 +562,15 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, /* * Expected IRQ arrival time using the trace clock as the base. + * + * TODO: Add a list of previous IRQ, and then run the list backwards. */ exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; - - if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) - printf(" Previous IRQ interference: \t\t up to %9.2f us\n", - ns_to_usf(taa_data->prev_irq_duration)); + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) + printf(" Previous IRQ interference: \t\t up to %9.2f us\n", + ns_to_usf(taa_data->prev_irq_duration)); + } /* * The delay that the IRQ suffered before starting. -- cgit From 4eb94a7793074f799b1f558471019e9a21fa9546 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Sep 2023 22:59:28 -0700 Subject: selftests/bpf: ensure all CI arches set CONFIG_BPF_KPROBE_OVERRIDE=y Turns out CONFIG_BPF_KPROBE_OVERRIDE=y is only enabled in x86-64 CI, but is not set on aarch64, causing CI failures ([0]). Move CONFIG_BPF_KPROBE_OVERRIDE=y to arch-agnostic CI config. [0] https://github.com/kernel-patches/bpf/actions/runs/6122324047/job/16618390535 Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230912055928.1704269-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/config.x86_64 | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 1c7584e8dd9e..e41eb33b2704 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,6 +4,7 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index b650b2e617b8..2e70a6048278 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -20,7 +20,6 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y CONFIG_BPFILTER=y -- cgit From 3e8bd1ba29f5e64c7ff2cb29f737f8510db1092f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sat, 26 Aug 2023 20:27:02 +0200 Subject: riscv: dts: starfive: fix NOR flash reserved-data partition size The Starfive VisionFive 2 has a 16MiB NOR flash, while the reserved-data partition is declared starting at address 0x600000 with a size of 0x1000000. This causes the kernel to output the following warning: [ 22.156589] mtd: partition "reserved-data" extends beyond the end of device "13010000.spi.0" -- size truncated to 0xa00000 It seems to be a confusion between the size of the partition and the end address. Fix that by specifying the right size. Fixes: 8384087a4223 ("riscv: dts: starfive: Add QSPI controller node for StarFive JH7110 SoC") Signed-off-by: Aurelien Jarno Reviewed-by: Emil Renner Berthing Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi index d79f94432b27..d4ceda901f33 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -262,7 +262,7 @@ reg = <0x100000 0x400000>; }; reserved-data@600000 { - reg = <0x600000 0x1000000>; + reg = <0x600000 0xa00000>; }; }; }; -- cgit From 37d1a624cb7934687dd9775f7fea771ae5aadd29 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Aug 2023 15:42:01 -0700 Subject: power: supply: rt5033_charger: recognize EXTCON setting CHARGER_RT5033 should honor the EXTCON setting to prevent these build errors: riscv64-linux-ld: drivers/power/supply/rt5033_charger.o: in function `.L33': rt5033_charger.c:(.text.rt5033_charger_probe+0x578): undefined reference to `extcon_find_edev_by_node' riscv64-linux-ld: drivers/power/supply/rt5033_charger.o: in function `.L0 ': rt5033_charger.c:(.text.rt5033_charger_probe+0x64e): undefined reference to `devm_extcon_register_notifier_all' riscv64-linux-ld: drivers/power/supply/rt5033_charger.o: in function `.L96': rt5033_charger.c:(.text.rt5033_charger_extcon_work+0x32): undefined reference to `extcon_get_state' Fixes: 12cc585f36b8 ("power: supply: rt5033_charger: Add cable detection and USB OTG supply") Signed-off-by: Randy Dunlap Cc: Jakob Hauser Cc: Sebastian Reichel Cc: Lee Jones Cc: linux-pm@vger.kernel.org Link: https://lore.kernel.org/r/20230828224201.26823-1-rdunlap@infradead.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index 663a1c423806..a61bb1283e19 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -769,6 +769,7 @@ config BATTERY_RT5033 config CHARGER_RT5033 tristate "RT5033 battery charger support" depends on MFD_RT5033 + depends on EXTCON || !EXTCON help This adds support for battery charger in Richtek RT5033 PMIC. The device supports pre-charge mode, fast charge mode and -- cgit From 4aa8cdd5e523d2d8ec8df29dcd696bf207d7a494 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Sep 2023 10:05:48 -0700 Subject: iomap: handle error conditions more gracefully in iomap_to_bh iomap_to_bh currently BUG()s when the passed in block number is not in the iomap. For file systems that have proper synchronization this should never happen and so far hasn't in mainline, but for block devices size changes aren't fully synchronized against ongoing I/O. Instead of BUG()ing in this case, return -EIO to the caller, which already has proper error handling. While we're at it, also return -EIO for an unknown iomap state instead of returning garbage. Fixes: 487c607df790 ("block: use iomap for writes to block devices") Reported-by: syzbot+4a08ffdf3667b36650a1@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Damien Le Moal --- fs/buffer.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 2379564e5aea..a6785cd07081 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2011,7 +2011,7 @@ void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to) } EXPORT_SYMBOL(folio_zero_new_buffers); -static void +static int iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, const struct iomap *iomap) { @@ -2025,7 +2025,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, * current block, then do not map the buffer and let the caller * handle it. */ - BUG_ON(offset >= iomap->offset + iomap->length); + if (offset >= iomap->offset + iomap->length) + return -EIO; switch (iomap->type) { case IOMAP_HOLE: @@ -2037,7 +2038,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) set_buffer_new(bh); - break; + return 0; case IOMAP_DELALLOC: if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) @@ -2045,7 +2046,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, set_buffer_uptodate(bh); set_buffer_mapped(bh); set_buffer_delay(bh); - break; + return 0; case IOMAP_UNWRITTEN: /* * For unwritten regions, we always need to ensure that regions @@ -2062,7 +2063,10 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> inode->i_blkbits; set_buffer_mapped(bh); - break; + return 0; + default: + WARN_ON_ONCE(1); + return -EIO; } } @@ -2103,13 +2107,12 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, clear_buffer_new(bh); if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); - if (get_block) { + if (get_block) err = get_block(inode, block, bh, 1); - if (err) - break; - } else { - iomap_to_bh(inode, block, bh, iomap); - } + else + err = iomap_to_bh(inode, block, bh, iomap); + if (err) + break; if (buffer_new(bh)) { clean_bdev_bh_alias(bh); -- cgit From f12b96683d6976a3a07fdf3323277c79dbe8f6ab Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:07 -0700 Subject: xfs: use i_prev_unlinked to distinguish inodes that are not on the unlinked list Alter the definition of i_prev_unlinked slightly to make it more obvious when an inode with 0 link count is not part of the iunlink bucket lists rooted in the AGI. This distinction is necessary because it is not sufficient to check inode.i_nlink to decide if an inode is on the unlinked list. Updates to i_nlink can happen while holding only ILOCK_EXCL, but updates to an inode's position in the AGI unlinked list (which happen after the nlink update) requires both ILOCK_EXCL and the AGI buffer lock. The next few patches will make it possible to reload an entire unlinked bucket list when we're walking the inode table or performing handle operations and need more than the ability to iget the last inode in the chain. The upcoming directory repair code also needs to be able to make this distinction to decide if a zero link count directory should be moved to the orphanage or allowed to inactivate. An upcoming enhancement to the online AGI fsck code will need this distinction to check and rebuild the AGI unlinked buckets. Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_inode.c | 3 ++- fs/xfs/xfs_inode.h | 20 +++++++++++++++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 30d7454a9b93..3c210ac83713 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -113,7 +113,7 @@ xfs_inode_alloc( INIT_LIST_HEAD(&ip->i_ioend_list); spin_lock_init(&ip->i_ioend_lock); ip->i_next_unlinked = NULLAGINO; - ip->i_prev_unlinked = NULLAGINO; + ip->i_prev_unlinked = 0; return ip; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7b11059067f7..475de8f919be 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2014,6 +2014,7 @@ xfs_iunlink_insert_inode( } /* Point the head of the list to point to this inode. */ + ip->i_prev_unlinked = NULLAGINO; return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); } @@ -2116,7 +2117,7 @@ xfs_iunlink_remove_inode( } ip->i_next_unlinked = NULLAGINO; - ip->i_prev_unlinked = NULLAGINO; + ip->i_prev_unlinked = 0; return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7547caf2f2ab..65aae8925509 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -68,8 +68,21 @@ typedef struct xfs_inode { uint64_t i_diflags2; /* XFS_DIFLAG2_... */ struct timespec64 i_crtime; /* time created */ - /* unlinked list pointers */ + /* + * Unlinked list pointers. These point to the next and previous inodes + * in the AGI unlinked bucket list, respectively. These fields can + * only be updated with the AGI locked. + * + * i_next_unlinked caches di_next_unlinked. + */ xfs_agino_t i_next_unlinked; + + /* + * If the inode is not on an unlinked list, this field is zero. If the + * inode is the first element in an unlinked list, this field is + * NULLAGINO. Otherwise, i_prev_unlinked points to the previous inode + * in the unlinked list. + */ xfs_agino_t i_prev_unlinked; /* VFS inode */ @@ -81,6 +94,11 @@ typedef struct xfs_inode { struct list_head i_ioend_list; } xfs_inode_t; +static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip) +{ + return ip->i_prev_unlinked != 0; +} + static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip) { return ip->i_forkoff > 0; -- cgit From 76e589013fec672c3587d6314f2d1f0aeddc26d9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:42:34 -0700 Subject: xfs: allow inode inactivation during a ro mount log recovery In the next patch, we're going to prohibit log recovery if the primary superblock contains an unrecognized rocompat feature bit even on readonly mounts. This requires removing all the code in the log mounting process that temporarily disables the readonly state. Unfortunately, inode inactivation disables itself on readonly mounts. Clearing the iunlinked lists after log recovery needs inactivation to run to free the unreferenced inodes, which (AFAICT) is the only reason why log mounting plays games with the readonly state in the first place. Therefore, change the inactivation predicates to allow inactivation during log recovery of a readonly mount. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_inode.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 360fe83a334f..f7f8292347ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1642,8 +1642,11 @@ xfs_inode_needs_inactive( if (VFS_I(ip)->i_mode == 0) return false; - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) return false; /* If the log isn't running, push inodes straight to reclaim. */ @@ -1703,8 +1706,11 @@ xfs_inactive( mp = ip->i_mount; ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) goto out; /* Metadata inodes require explicit resource cleanup. */ -- cgit From 83771c50e42b92de6740a63e152c96c052d37736 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:07 -0700 Subject: xfs: reload entire unlinked bucket lists The previous patch to reload unrecovered unlinked inodes when adding a newly created inode to the unlinked list is missing a key piece of functionality. It doesn't handle the case that someone calls xfs_iget on an inode that is not the last item in the incore list. For example, if at mount time the ondisk iunlink bucket looks like this: AGI -> 7 -> 22 -> 3 -> NULL None of these three inodes are cached in memory. Now let's say that someone tries to open inode 3 by handle. We need to walk the list to make sure that inodes 7 and 22 get loaded cold, and that the i_prev_unlinked of inode 3 gets set to 22. Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_export.c | 6 ++++ fs/xfs/xfs_inode.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode.h | 9 +++++ fs/xfs/xfs_itable.c | 9 +++++ fs/xfs/xfs_trace.h | 20 +++++++++++ 5 files changed, 144 insertions(+) diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1064c2342876..f71ea786a6d2 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -146,6 +146,12 @@ xfs_nfs_get_inode( return ERR_PTR(error); } + error = xfs_inode_reload_unlinked(ip); + if (error) { + xfs_irele(ip); + return ERR_PTR(error); + } + if (VFS_I(ip)->i_generation != generation) { xfs_irele(ip); return ERR_PTR(-ESTALE); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 475de8f919be..2fd22db528b1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3606,3 +3606,103 @@ xfs_iunlock2_io_mmap( if (ip1 != ip2) inode_unlock(VFS_I(ip1)); } + +/* + * Reload the incore inode list for this inode. Caller should ensure that + * the link count cannot change, either by taking ILOCK_SHARED or otherwise + * preventing other threads from executing. + */ +int +xfs_inode_reload_unlinked_bucket( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *agibp; + struct xfs_agi *agi; + struct xfs_perag *pag; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + xfs_agino_t prev_agino, next_agino; + unsigned int bucket; + bool foundit = false; + int error; + + /* Grab the first inode in the list */ + pag = xfs_perag_get(mp, agno); + error = xfs_ialloc_read_agi(pag, tp, &agibp); + xfs_perag_put(pag); + if (error) + return error; + + bucket = agino % XFS_AGI_UNLINKED_BUCKETS; + agi = agibp->b_addr; + + trace_xfs_inode_reload_unlinked_bucket(ip); + + xfs_info_ratelimited(mp, + "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.", + agino, agno); + + prev_agino = NULLAGINO; + next_agino = be32_to_cpu(agi->agi_unlinked[bucket]); + while (next_agino != NULLAGINO) { + struct xfs_inode *next_ip = NULL; + + if (next_agino == agino) { + /* Found this inode, set its backlink. */ + next_ip = ip; + next_ip->i_prev_unlinked = prev_agino; + foundit = true; + } + if (!next_ip) { + /* Inode already in memory. */ + next_ip = xfs_iunlink_lookup(pag, next_agino); + } + if (!next_ip) { + /* Inode not in memory, reload. */ + error = xfs_iunlink_reload_next(tp, agibp, prev_agino, + next_agino); + if (error) + break; + + next_ip = xfs_iunlink_lookup(pag, next_agino); + } + if (!next_ip) { + /* No incore inode at all? We reloaded it... */ + ASSERT(next_ip != NULL); + error = -EFSCORRUPTED; + break; + } + + prev_agino = next_agino; + next_agino = next_ip->i_next_unlinked; + } + + xfs_trans_brelse(tp, agibp); + /* Should have found this inode somewhere in the iunlinked bucket. */ + if (!error && !foundit) + error = -EFSCORRUPTED; + return error; +} + +/* Decide if this inode is missing its unlinked list and reload it. */ +int +xfs_inode_reload_unlinked( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(ip->i_mount, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_unlinked_incomplete(ip)) + error = xfs_inode_reload_unlinked_bucket(tp, ip); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_trans_cancel(tp); + + return error; +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65aae8925509..a111b5551ecd 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -593,4 +593,13 @@ void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); +static inline bool +xfs_inode_unlinked_incomplete( + struct xfs_inode *ip) +{ + return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); +} +int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); +int xfs_inode_reload_unlinked(struct xfs_inode *ip); + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c2093cb56092..ccf0c4ff4490 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -80,6 +80,15 @@ xfs_bulkstat_one_int( if (error) goto out; + if (xfs_inode_unlinked_incomplete(ip)) { + error = xfs_inode_reload_unlinked_bucket(tp, ip); + if (error) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_irele(ip); + return error; + } + } + ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); inode = VFS_I(ip); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7b1cb5d59d8f..3926cf7f2a6e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3849,6 +3849,26 @@ TRACE_EVENT(xfs_iunlink_reload_next, __entry->next_agino) ); +TRACE_EVENT(xfs_inode_reload_unlinked_bucket, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); + __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); + ), + TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agino, + __entry->agino % XFS_AGI_UNLINKED_BUCKETS) +); + DECLARE_EVENT_CLASS(xfs_ag_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), -- cgit From 74ad4693b6473950e971b3dc525b5ee7570e05d0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:42:35 -0700 Subject: xfs: fix log recovery when unknown rocompat bits are set Log recovery has always run on read only mounts, even where the primary superblock advertises unknown rocompat bits. Due to a misunderstanding between Eric and Darrick back in 2018, we accidentally changed the superblock write verifier to shutdown the fs over that exact scenario. As a result, the log cleaning that occurs at the end of the mounting process fails if there are unknown rocompat bits set. As we now allow writing of the superblock if there are unknown rocompat bits set on a RO mount, we no longer want to turn off RO state to allow log recovery to succeed on a RO mount. Hence we also remove all the (now unnecessary) RO state toggling from the log recovery path. Fixes: 9e037cb7972f ("xfs: check for unknown v5 feature bits in superblock write verifier" Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_sb.c | 3 ++- fs/xfs/xfs_log.c | 17 ----------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5e174685a77c..6264daaab37b 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -266,7 +266,8 @@ xfs_validate_sb_write( return -EFSCORRUPTED; } - if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + if (!xfs_is_readonly(mp) && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 79004d193e54..51c100c86177 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -715,15 +715,7 @@ xfs_log_mount( * just worked. */ if (!xfs_has_norecovery(mp)) { - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, - &mp->m_opstate); error = xlog_recover(log); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); @@ -772,7 +764,6 @@ xfs_log_mount_finish( struct xfs_mount *mp) { struct xlog *log = mp->m_log; - bool readonly; int error = 0; if (xfs_has_norecovery(mp)) { @@ -780,12 +771,6 @@ xfs_log_mount_finish( return 0; } - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - /* * During the second phase of log recovery, we need iget and * iput to behave like they do for an active filesystem. @@ -835,8 +820,6 @@ xfs_log_mount_finish( xfs_buftarg_drain(mp->m_ddev_targp); clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* Make sure the log is dead if we're returning failure. */ ASSERT(!error || xlog_is_shutdown(log)); -- cgit From 3c919b0910906cc69d76dea214776f0eac73358b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:05 -0700 Subject: xfs: reserve less log space when recovering log intent items Wengang Wang reports that a customer's system was running a number of truncate operations on a filesystem with a very small log. Contention on the reserve heads lead to other threads stalling on smaller updates (e.g. mtime updates) long enough to result in the node being rebooted on account of the lack of responsivenes. The node failed to recover because log recovery of an EFI became stuck waiting for a grant of reserve space. From Wengang's report: "For the file deletion, log bytes are reserved basing on xfs_mount->tr_itruncate which is: tr_logres = 175488, tr_logcount = 2, tr_logflags = XFS_TRANS_PERM_LOG_RES, "You see it's a permanent log reservation with two log operations (two transactions in rolling mode). After calculation (xlog_calc_unit_res() adds space for various log headers), the final log space needed per transaction changes from 175488 to 180208 bytes. So the total log space needed is 360416 bytes (180208 * 2). [That quantity] of log space (360416 bytes) needs to be reserved for both run time inode removing (xfs_inactive_truncate()) and EFI recover (xfs_efi_item_recover())." In other words, runtime pre-reserves 360K of space in anticipation of running a chain of two transactions in which each transaction gets a 180K reservation. Now that we've allocated the transaction, we delete the bmap mapping, log an EFI to free the space, and roll the transaction as part of finishing the deferops chain. Rolling creates a new xfs_trans which shares its ticket with the old transaction. Next, xfs_trans_roll calls __xfs_trans_commit with regrant == true, which calls xlog_cil_commit with the same regrant parameter. xlog_cil_commit calls xfs_log_ticket_regrant, which decrements t_cnt and subtracts t_curr_res from the reservation and write heads. If the filesystem is fresh and the first transaction only used (say) 20K, then t_curr_res will be 160K, and we give that much reservation back to the reservation head. Or if the file is really fragmented and the first transaction actually uses 170K, then t_curr_res will be 10K, and that's what we give back to the reservation. Having done that, we're now headed into the second transaction with an EFI and 180K of reservation. Other threads apparently consumed all the reservation for smaller transactions, such as timestamp updates. Now let's say the first transaction gets written to disk and we crash without ever completing the second transaction. Now we remount the fs, log recovery finds the unfinished EFI, and calls xfs_efi_recover to finish the EFI. However, xfs_efi_recover starts a new tr_itruncate tranasction, which asks for 360K log reservation. This is a lot more than the 180K that we had reserved at the time of the crash. If the first EFI to be recovered is also pinning the tail of the log, we will be unable to free any space in the log, and recovery livelocks. Wengang confirmed this: "Now we have the second transaction which has 180208 log bytes reserved too. The second transaction is supposed to process intents including extent freeing. With my hacking patch, I blocked the extent freeing 5 hours. So in that 5 hours, 180208 (NOT 360416) log bytes are reserved. "With my test case, other transactions (update timestamps) then happen. As my hacking patch pins the journal tail, those timestamp-updating transactions finally use up (almost) all the left available log space (in memory in on disk). And finally the on disk (and in memory) available log space goes down near to 180208 bytes. Those 180208 bytes are reserved by [the] second (extent-free) transaction [in the chain]." Wengang and I noticed that EFI recovery starts a transaction, completes one step of the chain, and commits the transaction without completing any other steps of the chain. Those subsequent steps are completed by xlog_finish_defer_ops, which allocates yet another transaction to finish the rest of the chain. That transaction gets the same tr_logres as the head transaction, but with tr_logcount = 1 to force regranting with every roll to avoid livelocks. In other words, we already figured this out in commit 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation"), but should have applied that logic to each intent item's recovery function. For Wengang's case, the xfs_trans_alloc call in the EFI recovery function should only be asking for a single transaction's worth of log reservation -- 180K, not 360K. Quoting Wengang again: "With log recovery, during EFI recovery, we use tr_itruncate again to reserve two transactions that needs 360416 log bytes. Reserving 360416 bytes fails [stalls] because we now only have about 180208 available. "Actually during the EFI recover, we only need one transaction to free the extents just like the 2nd transaction at RUNTIME. So it only needs to reserve 180208 rather than 360416 bytes. We have (a bit) more than 180208 available log bytes on disk, so [if we decrease the reservation to 180K] the reservation goes and the recovery [finishes]. That is to say: we can fix the log recover part to fix the issue. We can introduce a new xfs_trans_res xfs_mount->tr_ext_free { tr_logres = 175488, tr_logcount = 0, tr_logflags = 0, } "and use tr_ext_free instead of tr_itruncate in EFI recover." However, I don't think it quite makes sense to create an entirely new transaction reservation type to handle single-stepping during log recovery. Instead, we should copy the transaction reservation information in the xfs_mount, change tr_logcount to 1, and pass that into xfs_trans_alloc. We know this won't risk changing the min log size computation since we always ask for a fraction of the reservation for all known transaction types. This looks like it's been lurking in the codebase since commit 3d3c8b5222b92, which changed the xfs_trans_reserve call in xlog_recover_process_efi to use the tr_logcount in tr_itruncate. That changed the EFI recovery transaction from making a non-XFS_TRANS_PERM_LOG_RES request for one transaction's worth of log space to a XFS_TRANS_PERM_LOG_RES request for two transactions worth. Fixes: 3d3c8b5222b92 ("xfs: refactor xfs_trans_reserve() interface") Complements: 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation") Suggested-by: Wengang Wang Cc: Srikanth C S [djwong: apply the same transformation to all log intent recovery] Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_recover.h | 22 ++++++++++++++++++++++ fs/xfs/xfs_attr_item.c | 7 ++++--- fs/xfs/xfs_bmap_item.c | 4 +++- fs/xfs/xfs_extfree_item.c | 4 +++- fs/xfs/xfs_refcount_item.c | 6 ++++-- fs/xfs/xfs_rmap_item.c | 6 ++++-- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 2420865f3007..a5100a11faf9 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct xlog *log); #define xlog_check_buf_cancel_table(log) do { } while (0) #endif +/* + * Transform a regular reservation into one suitable for recovery of a log + * intent item. + * + * Intent recovery only runs a single step of the transaction chain and defers + * the rest to a separate transaction. Therefore, we reduce logcount to 1 here + * to avoid livelocks if the log grant space is nearly exhausted due to the + * recovered intent pinning the tail. Keep the same logflags to avoid tripping + * asserts elsewhere. Struct copies abound below. + */ +static inline struct xfs_trans_res +xlog_recover_resv(const struct xfs_trans_res *r) +{ + struct xfs_trans_res ret = { + .tr_logres = r->tr_logres, + .tr_logcount = 1, + .tr_logflags = r->tr_logflags, + }; + + return ret; +} + #endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 2788a6f2edcd..36fe2abb16e6 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -547,7 +547,7 @@ xfs_attri_item_recover( struct xfs_inode *ip; struct xfs_da_args *args; struct xfs_trans *tp; - struct xfs_trans_res tres; + struct xfs_trans_res resv; struct xfs_attri_log_format *attrp; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; int error; @@ -618,8 +618,9 @@ xfs_attri_item_recover( goto out; } - xfs_init_attr_trans(args, &tres, &total); - error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp); + xfs_init_attr_trans(args, &resv, &total); + resv = xlog_recover_resv(&resv); + error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 7551c3ec4ea5..e736a0844c89 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -490,6 +490,7 @@ xfs_bui_item_recover( struct list_head *capture_list) { struct xfs_bmap_intent fake = { }; + struct xfs_trans_res resv; struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; @@ -515,7 +516,8 @@ xfs_bui_item_recover( return error; /* Allocate transaction and do the work. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) goto err_rele; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f1a5ecf099aa..3fa8789820ad 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -660,6 +660,7 @@ xfs_efi_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_efi_log_item *efip = EFI_ITEM(lip); struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_efd_log_item *efdp; @@ -683,7 +684,8 @@ xfs_efi_item_recover( } } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); if (error) return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index edd8587658d5..2d4444d61e98 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -477,6 +477,7 @@ xfs_cui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_cui_log_item *cuip = CUI_ITEM(lip); struct xfs_cud_log_item *cudp; struct xfs_trans *tp; @@ -514,8 +515,9 @@ xfs_cui_item_recover( * doesn't fit. We need to reserve enough blocks to handle a * full btree split on either end of the refcount range. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, - mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 520c7ebdfed8..0e0e747028da 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -507,6 +507,7 @@ xfs_rui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_rui_log_item *ruip = RUI_ITEM(lip); struct xfs_rud_log_item *rudp; struct xfs_trans *tp; @@ -530,8 +531,9 @@ xfs_rui_item_recover( } } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, - mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; rudp = xfs_trans_get_rud(tp, ruip); -- cgit From 68b957f64fca1930164bfc6d6d379acdccd547d7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:06 -0700 Subject: xfs: load uncached unlinked inodes into memory on demand shrikanth hegde reports that filesystems fail shortly after mount with the following failure: WARNING: CPU: 56 PID: 12450 at fs/xfs/xfs_inode.c:1839 xfs_iunlink_lookup+0x58/0x80 [xfs] This of course is the WARN_ON_ONCE in xfs_iunlink_lookup: ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (WARN_ON_ONCE(!ip || !ip->i_ino)) { ... } From diagnostic data collected by the bug reporters, it would appear that we cleanly mounted a filesystem that contained unlinked inodes. Unlinked inodes are only processed as a final step of log recovery, which means that clean mounts do not process the unlinked list at all. Prior to the introduction of the incore unlinked lists, this wasn't a problem because the unlink code would (very expensively) traverse the entire ondisk metadata iunlink chain to keep things up to date. However, the incore unlinked list code complains when it realizes that it is out of sync with the ondisk metadata and shuts down the fs, which is bad. Ritesh proposed to solve this problem by unconditionally parsing the unlinked lists at mount time, but this imposes a mount time cost for every filesystem to catch something that should be very infrequent. Instead, let's target the places where we can encounter a next_unlinked pointer that refers to an inode that is not in cache, and load it into cache. Note: This patch does not address the problem of iget loading an inode from the middle of the iunlink list and needing to set i_prev_unlinked correctly. Reported-by: shrikanth hegde Triaged-by: Ritesh Harjani Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_inode.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_trace.h | 25 +++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f7f8292347ab..7b11059067f7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1828,12 +1828,17 @@ xfs_iunlink_lookup( rcu_read_lock(); ip = radix_tree_lookup(&pag->pag_ici_root, agino); + if (!ip) { + /* Caller can handle inode not being in memory. */ + rcu_read_unlock(); + return NULL; + } /* - * Inode not in memory or in RCU freeing limbo should not happen. - * Warn about this and let the caller handle the failure. + * Inode in RCU freeing limbo should not happen. Warn about this and + * let the caller handle the failure. */ - if (WARN_ON_ONCE(!ip || !ip->i_ino)) { + if (WARN_ON_ONCE(!ip->i_ino)) { rcu_read_unlock(); return NULL; } @@ -1842,7 +1847,10 @@ xfs_iunlink_lookup( return ip; } -/* Update the prev pointer of the next agino. */ +/* + * Update the prev pointer of the next agino. Returns -ENOLINK if the inode + * is not in cache. + */ static int xfs_iunlink_update_backref( struct xfs_perag *pag, @@ -1857,7 +1865,8 @@ xfs_iunlink_update_backref( ip = xfs_iunlink_lookup(pag, next_agino); if (!ip) - return -EFSCORRUPTED; + return -ENOLINK; + ip->i_prev_unlinked = prev_agino; return 0; } @@ -1901,6 +1910,62 @@ xfs_iunlink_update_bucket( return 0; } +/* + * Load the inode @next_agino into the cache and set its prev_unlinked pointer + * to @prev_agino. Caller must hold the AGI to synchronize with other changes + * to the unlinked list. + */ +STATIC int +xfs_iunlink_reload_next( + struct xfs_trans *tp, + struct xfs_buf *agibp, + xfs_agino_t prev_agino, + xfs_agino_t next_agino) +{ + struct xfs_perag *pag = agibp->b_pag; + struct xfs_mount *mp = pag->pag_mount; + struct xfs_inode *next_ip = NULL; + xfs_ino_t ino; + int error; + + ASSERT(next_agino != NULLAGINO); + +#ifdef DEBUG + rcu_read_lock(); + next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino); + ASSERT(next_ip == NULL); + rcu_read_unlock(); +#endif + + xfs_info_ratelimited(mp, + "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.", + next_agino, pag->pag_agno); + + /* + * Use an untrusted lookup just to be cautious in case the AGI has been + * corrupted and now points at a free inode. That shouldn't happen, + * but we'd rather shut down now since we're already running in a weird + * situation. + */ + ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); + error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip); + if (error) + return error; + + /* If this is not an unlinked inode, something is very wrong. */ + if (VFS_I(next_ip)->i_nlink != 0) { + error = -EFSCORRUPTED; + goto rele; + } + + next_ip->i_prev_unlinked = prev_agino; + trace_xfs_iunlink_reload_next(next_ip); +rele: + ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE)); + xfs_irele(next_ip); + return error; +} + static int xfs_iunlink_insert_inode( struct xfs_trans *tp, @@ -1932,6 +1997,8 @@ xfs_iunlink_insert_inode( * inode. */ error = xfs_iunlink_update_backref(pag, agino, next_agino); + if (error == -ENOLINK) + error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); if (error) return error; @@ -2026,6 +2093,9 @@ xfs_iunlink_remove_inode( */ error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, ip->i_next_unlinked); + if (error == -ENOLINK) + error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, + ip->i_next_unlinked); if (error) return error; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 902c7f67a117..7b1cb5d59d8f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3824,6 +3824,31 @@ TRACE_EVENT(xfs_iunlink_update_dinode, __entry->new_ptr) ); +TRACE_EVENT(xfs_iunlink_reload_next, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + __field(xfs_agino_t, prev_agino) + __field(xfs_agino_t, next_agino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); + __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); + __entry->prev_agino = ip->i_prev_unlinked; + __entry->next_agino = ip->i_next_unlinked; + ), + TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agino, + __entry->prev_agino, + __entry->next_agino) +); + DECLARE_EVENT_CLASS(xfs_ag_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), -- cgit From 49813a21ed57895b73ec4ed3b99d4beec931496f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:08 -0700 Subject: xfs: make inode unlinked bucket recovery work with quotacheck Teach quotacheck to reload the unlinked inode lists when walking the inode table. This requires extra state handling, since it's possible that a reloaded inode will get inactivated before quotacheck tries to scan it; in this case, we need to ensure that the reloaded inode does not have dquots attached when it is freed. Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_attr_inactive.c | 1 - fs/xfs/xfs_inode.c | 12 +++++++++--- fs/xfs/xfs_inode.h | 5 ++++- fs/xfs/xfs_mount.h | 10 +++++++++- fs/xfs/xfs_qm.c | 7 +++++++ 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 5db87b34fb6e..89c7a9f4f930 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -333,7 +333,6 @@ xfs_attr_inactive( int error = 0; mp = dp->i_mount; - ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); xfs_ilock(dp, lock_mode); if (!xfs_inode_has_attr_fork(dp)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2fd22db528b1..f94f7b374041 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1742,9 +1742,13 @@ xfs_inactive( ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) truncate = 1; - error = xfs_qm_dqattach(ip); - if (error) - goto out; + if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { + xfs_qm_dqdetach(ip); + } else { + error = xfs_qm_dqattach(ip); + if (error) + goto out; + } if (S_ISLNK(VFS_I(ip)->i_mode)) error = xfs_inactive_symlink(ip); @@ -1962,6 +1966,8 @@ xfs_iunlink_reload_next( trace_xfs_iunlink_reload_next(next_ip); rele: ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE)); + if (xfs_is_quotacheck_running(mp) && next_ip) + xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED); xfs_irele(next_ip); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a111b5551ecd..0c5bdb91152e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -344,6 +344,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) */ #define XFS_INACTIVATING (1 << 13) +/* Quotacheck is running but inode has not been added to quota counts. */ +#define XFS_IQUOTAUNCHECKED (1 << 14) + /* All inode state flags related to inode reclaim. */ #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ XFS_IRECLAIM | \ @@ -358,7 +361,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ - XFS_INACTIVATING) + XFS_INACTIVATING | XFS_IQUOTAUNCHECKED) /* * Flags for inode locking. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 6e2806654e94..d19cca099bc3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -405,6 +405,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID) #define XFS_OPSTATE_WARNED_SHRINK 8 /* Kernel has logged a warning about logged xattr updates being used. */ #define XFS_OPSTATE_WARNED_LARP 9 +/* Mount time quotacheck is running */ +#define XFS_OPSTATE_QUOTACHECK_RUNNING 10 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ @@ -427,6 +429,11 @@ __XFS_IS_OPSTATE(inode32, INODE32) __XFS_IS_OPSTATE(readonly, READONLY) __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) +#ifdef CONFIG_XFS_QUOTA +__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING) +#else +# define xfs_is_quotacheck_running(mp) (false) +#endif static inline bool xfs_should_warn(struct xfs_mount *mp, long nr) @@ -444,7 +451,8 @@ xfs_should_warn(struct xfs_mount *mp, long nr) { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \ { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \ { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \ - { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" } + { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \ + { (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" } /* * Max and min values for mount-option defined I/O diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6abcc34fafd8..7256090c3895 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1160,6 +1160,10 @@ xfs_qm_dqusage_adjust( if (error) return error; + error = xfs_inode_reload_unlinked(ip); + if (error) + goto error0; + ASSERT(ip->i_delayed_blks == 0); if (XFS_IS_REALTIME_INODE(ip)) { @@ -1173,6 +1177,7 @@ xfs_qm_dqusage_adjust( } nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks; + xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED); /* * Add the (disk blocks and inode) resources occupied by this @@ -1319,8 +1324,10 @@ xfs_qm_quotacheck( flags |= XFS_PQUOTA_CHKD; } + xfs_set_quotacheck_running(mp); error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, NULL); + xfs_clear_quotacheck_running(mp); /* * On error, the inode walk may have partially populated the dquot -- cgit From 34389616a963480b20ea7f997533380ae3946fba Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:09 -0700 Subject: xfs: require a relatively recent V5 filesystem for LARP mode While reviewing the FIEXCHANGE code in XFS, I realized that the function that enables logged xattrs doesn't actually check that the superblock has a LOG_INCOMPAT feature bit field. Add a check to refuse the operation if we don't have a V5 filesystem... ...but on second though, let's require either reflink or rmap so that we only have to deal with LARP mode on relatively /modern/ kernel. 4.14 is about as far back as I feel like going. Seeing as LARP is a debugging-only option anyway, this isn't likely to affect any real users. Fixes: d9c61ccb3b09 ("xfs: move xfs_attr_use_log_assist out of xfs_log.c") Really-Fixes: f3f36c893f26 ("xfs: Add xfs_attr_set_deferred and xfs_attr_remove_deferred") Signed-off-by: Darrick J. Wong Reviewed-by: Bill O'Donnell --- fs/xfs/xfs_xattr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 43e5c219aaed..a3975f325f4e 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -46,6 +46,17 @@ xfs_attr_grab_log_assist( if (xfs_sb_version_haslogxattrs(&mp->m_sb)) return 0; + /* + * Check if the filesystem featureset is new enough to set this log + * incompat feature bit. Strictly speaking, the minimum requirement is + * a V5 filesystem for the superblock field, but we'll require rmap + * or reflink to avoid having to deal with really old kernels. + */ + if (!xfs_has_reflink(mp) && !xfs_has_rmapbt(mp)) { + error = -EOPNOTSUPP; + goto drop_incompat; + } + /* Enable log-assisted xattrs. */ error = xfs_add_incompat_log_feature(mp, XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); -- cgit From e03192820002feb064cc4fd0df9b8f0a94675c7d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:06 -0700 Subject: xfs: only call xchk_stats_merge after validating scrub inputs Harshit Mogalapalli slogged through several reports from our internal syzbot instance and observed that they all had a common stack trace: BUG: KASAN: user-memory-access in instrument_atomic_read_write include/linux/instrumented.h:96 [inline] BUG: KASAN: user-memory-access in atomic_try_cmpxchg_acquire include/linux/atomic/atomic-instrumented.h:1294 [inline] BUG: KASAN: user-memory-access in queued_spin_lock include/asm-generic/qspinlock.h:111 [inline] BUG: KASAN: user-memory-access in do_raw_spin_lock include/linux/spinlock.h:187 [inline] BUG: KASAN: user-memory-access in __raw_spin_lock include/linux/spinlock_api_smp.h:134 [inline] BUG: KASAN: user-memory-access in _raw_spin_lock+0x76/0xe0 kernel/locking/spinlock.c:154 Write of size 4 at addr 0000001dd87ee280 by task syz-executor365/1543 CPU: 2 PID: 1543 Comm: syz-executor365 Not tainted 6.5.0-syzk #1 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module+el8.3.0+7860+a7792d29 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x83/0xb0 lib/dump_stack.c:106 print_report+0x3f8/0x620 mm/kasan/report.c:478 kasan_report+0xb0/0xe0 mm/kasan/report.c:588 check_region_inline mm/kasan/generic.c:181 [inline] kasan_check_range+0x139/0x1e0 mm/kasan/generic.c:187 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_try_cmpxchg_acquire include/linux/atomic/atomic-instrumented.h:1294 [inline] queued_spin_lock include/asm-generic/qspinlock.h:111 [inline] do_raw_spin_lock include/linux/spinlock.h:187 [inline] __raw_spin_lock include/linux/spinlock_api_smp.h:134 [inline] _raw_spin_lock+0x76/0xe0 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] xchk_stats_merge_one.isra.1+0x39/0x650 fs/xfs/scrub/stats.c:191 xchk_stats_merge+0x5f/0xe0 fs/xfs/scrub/stats.c:225 xfs_scrub_metadata+0x252/0x14e0 fs/xfs/scrub/scrub.c:599 xfs_ioc_scrub_metadata+0xc8/0x160 fs/xfs/xfs_ioctl.c:1646 xfs_file_ioctl+0x3fd/0x1870 fs/xfs/xfs_ioctl.c:1955 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x199/0x220 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3e/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7ff155af753d Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1b 79 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc006e2568 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ff155af753d RDX: 00000000200000c0 RSI: 00000000c040583c RDI: 0000000000000003 RBP: 00000000ffffffff R08: 00000000004010c0 R09: 00000000004010c0 R10: 00000000004010c0 R11: 0000000000000246 R12: 0000000000400cb0 R13: 00007ffc006e2670 R14: 0000000000000000 R15: 0000000000000000 The root cause here is that xchk_stats_merge_one walks off the end of the xchk_scrub_stats.cs_stats array because it has been fed a garbage value in sm->sm_type. That occurs because I put the xchk_stats_merge in the wrong place -- it should have been after the last xchk_teardown call on our way out of xfs_scrub_metadata because we only call the teardown function if we called the setup function, and we don't call the setup functions if the inputs are obviously garbage. Thanks to Harshit for triaging the bug reports and bringing this to my attention. Fixes: d7a74cad8f45 ("xfs: track usage statistics of online fsck") Reported-by: Harshit Mogalapalli Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/scrub.c | 4 ++-- fs/xfs/scrub/stats.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 7d3aa14d81b5..4849efcaa33a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -588,6 +588,8 @@ out_nofix: out_teardown: error = xchk_teardown(sc, error); out_sc: + if (error != -ENOENT) + xchk_stats_merge(mp, sm, &run); kfree(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); @@ -595,8 +597,6 @@ out: sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; error = 0; } - if (error != -ENOENT) - xchk_stats_merge(mp, sm, &run); return error; need_drain: error = xchk_teardown(sc, 0); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index aeb92624176b..cd91db4a5548 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -185,7 +185,10 @@ xchk_stats_merge_one( { struct xchk_scrub_stats *css; - ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) { + ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); + return; + } css = &cs->cs_stats[sm->sm_type]; spin_lock(&css->css_lock); -- cgit From 18789be8e0d9fbb78b2290dcf93f500726ed19f0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 14:38:41 +0100 Subject: ASoC: cs35l56: Disable low-power hibernation mode Do not allow the CS35L56 to be put into its lowest power "hibernation" mode. This only affects I2C because "hibernation" is already disabled on SPI and SoundWire. Recent firmwares need a different wake-up sequence. Until that sequence has been specified, the chip "hibernation" mode must be disabled otherwise it can intermittently fail to wake. THIS WILL NOT APPLY CLEANLY TO 6.5 AND EARLIER: We will send a separate backport patch to stable. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230912133841.3480466-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-i2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-i2c.c b/sound/soc/codecs/cs35l56-i2c.c index 9f4f2f4f23f5..d10e0e2380e8 100644 --- a/sound/soc/codecs/cs35l56-i2c.c +++ b/sound/soc/codecs/cs35l56-i2c.c @@ -27,7 +27,6 @@ static int cs35l56_i2c_probe(struct i2c_client *client) return -ENOMEM; cs35l56->base.dev = dev; - cs35l56->base.can_hibernate = true; i2c_set_clientdata(client, cs35l56); cs35l56->base.regmap = devm_regmap_init_i2c(client, regmap_config); -- cgit From a47b44fbb13f5e7a981b4515dcddc93a321ae89c Mon Sep 17 00:00:00 2001 From: Timo Alho Date: Tue, 12 Sep 2023 14:29:50 +0300 Subject: clk: tegra: fix error return case for recalc_rate tegra-bpmp clocks driver makes implicit conversion of signed error code to unsigned value in recalc_rate operation. The behavior for recalc_rate, according to it's specification, should be that "If the driver cannot figure out a rate for this clock, it must return 0." Fixes: ca6f2796eef7 ("clk: tegra: Add BPMP clock driver") Signed-off-by: Timo Alho Signed-off-by: Mikko Perttunen Link: https://lore.kernel.org/r/20230912112951.2330497-1-cyndis@kapsi.fi Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-bpmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c index a9f3fb448de6..7bfba0afd778 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -159,7 +159,7 @@ static unsigned long tegra_bpmp_clk_recalc_rate(struct clk_hw *hw, err = tegra_bpmp_clk_transfer(clk->bpmp, &msg); if (err < 0) - return err; + return 0; return response.rate; } -- cgit From 82f07f1acf417b81e793145c167dd5e156024de4 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 23 Aug 2023 09:37:43 -0400 Subject: pwr-mlxbf: extend Kconfig to include gpio-mlxbf3 dependency The BlueField power handling driver (pwr-mlxbf.c) provides functionality for both BlueField-2 and BlueField-3 based platforms. This driver also depends on the SoC-specific BlueField GPIO driver, whether gpio-mlxbf2 or gpio-mlxbf3. This patch extends the Kconfig definition to include the dependency on the gpio-mlxbf3 driver, if applicable. Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Link: https://lore.kernel.org/r/20230823133743.31275-1-davthompson@nvidia.com Signed-off-by: Sebastian Reichel --- drivers/power/reset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 59e1ebb7842e..411e00b255d6 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -300,7 +300,7 @@ config NVMEM_REBOOT_MODE config POWER_MLXBF tristate "Mellanox BlueField power handling driver" - depends on (GPIO_MLXBF2 && ACPI) + depends on (GPIO_MLXBF2 || GPIO_MLXBF3) && ACPI help This driver supports reset or low power mode handling for Mellanox BlueField. -- cgit From 926ce6ba25101ccc659475e01ce5748374ab5856 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 23 Aug 2023 10:56:01 +0200 Subject: power: reset: use capital "OR" for multiple licenses in SPDX Documentation/process/license-rules.rst and checkpatch expect the SPDX identifier syntax for multiple licenses to use capital "OR". Correct it to keep consistent format and avoid copy-paste issues. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230823085601.116562-1-krzysztof.kozlowski@linaro.org Signed-off-by: Sebastian Reichel --- drivers/power/reset/pwr-mlxbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/pwr-mlxbf.c b/drivers/power/reset/pwr-mlxbf.c index 12dedf841a44..de35d24bb7ef 100644 --- a/drivers/power/reset/pwr-mlxbf.c +++ b/drivers/power/reset/pwr-mlxbf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause /* * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. -- cgit From 4ec7b666fb4247bc6b9cdc84fa753d8dc2994d25 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 14 Aug 2023 22:21:51 +0000 Subject: power: vexpress: fix -Wvoid-pointer-to-enum-cast warning When building with clang 18 I see the following warning: | drivers/power/reset/vexpress-poweroff.c:124:10: warning: cast to smaller integer type 'enum vexpress_reset_func' from 'const void *' [-Wvoid-pointer-to-enum-cast] | 124 | switch ((enum vexpress_reset_func)match->data) { This is due to the fact that `match->data` is a void* while `enum vexpress_reset_func` has the size of an int. This leads to truncation and possible data loss. Link: https://github.com/ClangBuiltLinux/linux/issues/1910 Reported-by: Nathan Chancellor Signed-off-by: Justin Stitt Acked-by: Sudeep Holla Signed-off-by: Sebastian Reichel --- drivers/power/reset/vexpress-poweroff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c index 447ffdacddf9..17064d7b19f6 100644 --- a/drivers/power/reset/vexpress-poweroff.c +++ b/drivers/power/reset/vexpress-poweroff.c @@ -121,7 +121,7 @@ static int vexpress_reset_probe(struct platform_device *pdev) return PTR_ERR(regmap); dev_set_drvdata(&pdev->dev, regmap); - switch ((enum vexpress_reset_func)match->data) { + switch ((uintptr_t)match->data) { case FUNC_SHUTDOWN: vexpress_power_off_device = &pdev->dev; pm_power_off = vexpress_power_off; -- cgit From a8f12572860ad8ba659d96eee9cf09e181f6ebcc Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 8 Sep 2023 18:33:35 +0200 Subject: bpf: Fix a erroneous check after snprintf() snprintf() does not return negative error code on error, it returns the number of characters which *would* be generated for the given input. Fix the error handling check. Fixes: 57539b1c0ac2 ("bpf: Enable annotating trusted nested pointers") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/393bdebc87b22563c08ace094defa7160eb7a6c0.1694190795.git.christophe.jaillet@wanadoo.fr Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1095bbe29859..8090d7fb11ef 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, tname = btf_name_by_offset(btf, walk_type->name_off); ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); - if (ret < 0) + if (ret >= sizeof(safe_tname)) return false; safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info)); -- cgit From d128860dbb29cafc3c65ca2d22082745a32829dd Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 12 Sep 2023 14:06:31 +0200 Subject: selftests/bpf: fix unpriv_disabled check in test_verifier Commit 1d56ade032a49 changed the function get_unpriv_disabled() to return its results as a bool instead of updating a global variable, but test_verifier was not updated to keep in line with these changes. Thus unpriv_disabled is always false in test_verifier and unprivileged tests are not properly skipped on systems with unprivileged bpf disabled. Fixes: 1d56ade032a49 ("selftests/bpf: Unprivileged tests for test_loader.c") Signed-off-by: Artem Savkov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230912120631.213139-1-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 31f1c935cd07..98107e0452d3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1880,7 +1880,7 @@ int main(int argc, char **argv) } } - get_unpriv_disabled(); + unpriv_disabled = get_unpriv_disabled(); if (unpriv && unpriv_disabled) { printf("Cannot run as unprivileged user with sysctl %s.\n", UNPRIV_SYSCTL); -- cgit From 214bfd267f4929722b374b43fda456c21cd6f016 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2023 23:08:12 -0700 Subject: bpf, cgroup: fix multiple kernel-doc warnings Fix missing or extra function parameter kernel-doc warnings in cgroup.c: kernel/bpf/cgroup.c:1359: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_skb' kernel/bpf/cgroup.c:1359: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_skb' kernel/bpf/cgroup.c:1439: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sk' kernel/bpf/cgroup.c:1439: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sk' kernel/bpf/cgroup.c:1467: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_addr' kernel/bpf/cgroup.c:1467: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_addr' kernel/bpf/cgroup.c:1512: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_ops' kernel/bpf/cgroup.c:1512: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_ops' kernel/bpf/cgroup.c:1685: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sysctl' kernel/bpf/cgroup.c:1685: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sysctl' kernel/bpf/cgroup.c:795: warning: Excess function parameter 'type' description in '__cgroup_bpf_replace' kernel/bpf/cgroup.c:795: warning: Function parameter or member 'new_prog' not described in '__cgroup_bpf_replace' Signed-off-by: Randy Dunlap Cc: Martin KaFai Lau Cc: bpf@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Link: https://lore.kernel.org/r/20230912060812.1715-1-rdunlap@infradead.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5b2741aa0d9b..03b3d4492980 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -785,7 +785,8 @@ found: * to descendants * @cgrp: The cgroup which descendants to traverse * @link: A link for which to replace BPF program - * @type: Type of attach operation + * @new_prog: &struct bpf_prog for the target BPF program with its refcnt + * incremented * * Must be called with cgroup_mutex held. */ @@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socket sending or receiving traffic * @skb: The skb that is being sent or received - * @type: The type of program to be executed + * @atype: The type of program to be executed * * If no socket is passed, or the socket is not of type INET or INET6, * this function does nothing and returns 0. @@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); /** * __cgroup_bpf_run_filter_sk() - Run a program on a sock * @sk: sock structure to manipulate - * @type: The type of program to be executed + * @atype: The type of program to be executed * * socket is passed is expected to be of type INET or INET6. * @@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user - * @type: The type of program to be executed + * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). @@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains * sk with connection information (IP addresses, etc.) May not contain * cgroup info if it is a req sock. - * @type: The type of program to be executed + * @atype: The type of program to be executed * * socket passed is expected to be of type INET or INET6. * @@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * @ppos: value-result argument: value is position at which read from or write * to sysctl is happening, result is new position if program overrode it, * initial value otherwise - * @type: type of program to be executed + * @atype: type of program to be executed * * Program is run when sysctl is being accessed, either read or written, and * can allow or deny such access. -- cgit From 8b010acb3154b669e52f0eef4a6d925e3cc1db2f Mon Sep 17 00:00:00 2001 From: Wang Jianchao Date: Wed, 13 Sep 2023 09:38:01 +0800 Subject: xfs: use roundup_pow_of_two instead of ffs during xlog_find_tail In our production environment, we find that mounting a 500M /boot which is umount cleanly needs ~6s. One cause is that ffs() is used by xlog_write_log_records() to decide the buffer size. It can cause a lot of small IO easily when xlog_clear_stale_blocks() needs to wrap around the end of log area and log head block is not power of two. Things are similar in xlog_find_verify_cycle(). The code is able to handed bigger buffer very well, we can use roundup_pow_of_two() to replace ffs() directly to avoid small and sychronous IOs. Reviewed-by: Dave Chinner Signed-off-by: Wang Jianchao Signed-off-by: Chandan Babu R --- fs/xfs/xfs_log_recover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82c81d20459d..13b94d2e605b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -329,7 +329,7 @@ xlog_find_verify_cycle( * try a smaller size. We need to be able to read at least * a log sector, or we're out of luck. */ - bufblks = 1 << ffs(nbblks); + bufblks = roundup_pow_of_two(nbblks); while (bufblks > log->l_logBBsize) bufblks >>= 1; while (!(buffer = xlog_alloc_buffer(log, bufblks))) { @@ -1528,7 +1528,7 @@ xlog_write_log_records( * a smaller size. We need to be able to write at least a * log sector, or we're out of luck. */ - bufblks = 1 << ffs(blocks); + bufblks = roundup_pow_of_two(blocks); while (bufblks > log->l_logBBsize) bufblks >>= 1; while (!(buffer = xlog_alloc_buffer(log, bufblks))) { -- cgit From 069969d6c5264d2348fd6cf0cedc00fd87ff3cee Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 10 Aug 2023 22:49:43 +0800 Subject: tee: Remove unused declarations Commit 4fb0a5eb364d ("tee: add OP-TEE driver") declared but never implemented optee_supp_read()/optee_supp_write(). Commit 967c9cca2cc5 ("tee: generic TEE subsystem") never implemented tee_shm_init(). Signed-off-by: Yue Haibing Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/optee_private.h | 2 -- drivers/tee/tee_private.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 72685ee0d53f..6bb5cae09688 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -238,8 +238,6 @@ int optee_notif_send(struct optee *optee, u_int key); u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, struct tee_param *param); -int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len); -int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len); void optee_supp_init(struct optee_supp *supp); void optee_supp_uninit(struct optee_supp *supp); void optee_supp_release(struct optee_supp *supp); diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h index 409cadcc1cff..754e11dcb240 100644 --- a/drivers/tee/tee_private.h +++ b/drivers/tee/tee_private.h @@ -47,8 +47,6 @@ struct tee_device { struct tee_shm_pool *pool; }; -int tee_shm_init(void); - int tee_shm_get_fd(struct tee_shm *shm); bool tee_device_get(struct tee_device *teedev); -- cgit From 6469b2feade8fd82d224dd3734e146536f3e9f0e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Sep 2023 07:07:38 +0300 Subject: ARM: dts: ti: omap: Fix bandgap thermal cells addressing for omap3/4 Fix "thermal_sys: cpu_thermal: Failed to read thermal-sensors cells: -2" error on boot for omap3/4. This is caused by wrong addressing in the dts for bandgap sensor for single sensor instances. Note that omap4-cpu-thermal.dtsi is shared across omap4/5 and dra7, so we can't just change the addressing in omap4-cpu-thermal.dtsi. Cc: Ivaylo Dimitrov Cc: Carl Philipp Klemm Cc: Merlijn Wajer Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: a761d517bbb1 ("ARM: dts: omap3: Add cpu_thermal zone") Fixes: 0bbf6c54d100 ("arm: dts: add omap4 CPU thermal data") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/ti/omap/omap3-cpu-thermal.dtsi | 3 +-- arch/arm/boot/dts/ti/omap/omap4-cpu-thermal.dtsi | 5 ++++- arch/arm/boot/dts/ti/omap/omap443x.dtsi | 1 + arch/arm/boot/dts/ti/omap/omap4460.dtsi | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/ti/omap/omap3-cpu-thermal.dtsi b/arch/arm/boot/dts/ti/omap/omap3-cpu-thermal.dtsi index 0da759f8e2c2..7dd2340bc5e4 100644 --- a/arch/arm/boot/dts/ti/omap/omap3-cpu-thermal.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap3-cpu-thermal.dtsi @@ -12,8 +12,7 @@ cpu_thermal: cpu-thermal { polling-delay = <1000>; /* milliseconds */ coefficients = <0 20000>; - /* sensor ID */ - thermal-sensors = <&bandgap 0>; + thermal-sensors = <&bandgap>; cpu_trips: trips { cpu_alert0: cpu_alert { diff --git a/arch/arm/boot/dts/ti/omap/omap4-cpu-thermal.dtsi b/arch/arm/boot/dts/ti/omap/omap4-cpu-thermal.dtsi index 801b4f10350c..d484ec1e4fd8 100644 --- a/arch/arm/boot/dts/ti/omap/omap4-cpu-thermal.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4-cpu-thermal.dtsi @@ -12,7 +12,10 @@ cpu_thermal: cpu_thermal { polling-delay-passive = <250>; /* milliseconds */ polling-delay = <1000>; /* milliseconds */ - /* sensor ID */ + /* + * See 44xx files for single sensor addressing, omap5 and dra7 need + * also sensor ID for addressing. + */ thermal-sensors = <&bandgap 0>; cpu_trips: trips { diff --git a/arch/arm/boot/dts/ti/omap/omap443x.dtsi b/arch/arm/boot/dts/ti/omap/omap443x.dtsi index 238aceb799f8..2104170fe2cd 100644 --- a/arch/arm/boot/dts/ti/omap/omap443x.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap443x.dtsi @@ -69,6 +69,7 @@ }; &cpu_thermal { + thermal-sensors = <&bandgap>; coefficients = <0 20000>; }; diff --git a/arch/arm/boot/dts/ti/omap/omap4460.dtsi b/arch/arm/boot/dts/ti/omap/omap4460.dtsi index 1b27a862ae81..a6764750d447 100644 --- a/arch/arm/boot/dts/ti/omap/omap4460.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4460.dtsi @@ -79,6 +79,7 @@ }; &cpu_thermal { + thermal-sensors = <&bandgap>; coefficients = <348 (-9301)>; }; -- cgit From ac08bda1569b06b7a62c7b4dd00d4c3b28ceaaec Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Sep 2023 07:07:38 +0300 Subject: ARM: dts: ti: omap: motorola-mapphone: Fix abe_clkctrl warning on boot Commit 0840242e8875 ("ARM: dts: Configure clock parent for pwm vibra") attempted to fix the PWM settings but ended up causin an additional clock reparenting error: clk: failed to reparent abe-clkctrl:0060:24 to sys_clkin_ck: -22 Only timer9 is in the PER domain and can use the sys_clkin_ck clock source. For timer8, the there is no sys_clkin_ck available as it's in the ABE domain, instead it should use syc_clk_div_ck. However, for power management, we want to use the always on sys_32k_ck instead. Cc: Ivaylo Dimitrov Cc: Carl Philipp Klemm Cc: Merlijn Wajer Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: 0840242e8875 ("ARM: dts: Configure clock parent for pwm vibra") Depends-on: 61978617e905 ("ARM: dts: Add minimal support for Droid Bionic xt875") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi index 091ba310053e..d69f0f4b4990 100644 --- a/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi @@ -614,12 +614,12 @@ /* Configure pwm clock source for timers 8 & 9 */ &timer8 { assigned-clocks = <&abe_clkctrl OMAP4_TIMER8_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; &timer9 { assigned-clocks = <&l4_per_clkctrl OMAP4_TIMER9_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; /* -- cgit From 5ad37b5e30433afa7a5513e3eb61f69fa0976785 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Sep 2023 07:07:38 +0300 Subject: ARM: dts: ti: omap: Fix noisy serial with overrun-throttle-ms for mapphone On mapphone devices we may get lots of noise on the micro-USB port in debug uart mode until the phy-cpcap-usb driver probes. Let's limit the noise by using overrun-throttle-ms. Note that there is also a related separate issue where the charger cable connected may cause random sysrq requests until phy-cpcap-usb probes that still remains. Cc: Ivaylo Dimitrov Cc: Carl Philipp Klemm Cc: Merlijn Wajer Cc: Pavel Machek Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi index d69f0f4b4990..d2d516d113ba 100644 --- a/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/motorola-mapphone-common.dtsi @@ -640,6 +640,7 @@ &uart3 { interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core 0x17c>; + overrun-throttle-ms = <500>; }; &uart4 { -- cgit From 36bee3f6b300c50b267aaeb5096124c267f0bd15 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Sep 2023 07:07:38 +0300 Subject: ARM: omap2+: Downgrade u-boot version warnings to debug statements We should be able to see real issues with dmesg -l err,warn. The u-boot revision warning should be a debug statement rather than a warning. Cc: Ivaylo Dimitrov Cc: Carl Philipp Klemm Cc: Merlijn Wajer Cc: Pavel Machek Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pm44xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index f57802f3ee3a..37b168119fe4 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -99,7 +99,7 @@ static int omap4_pm_suspend(void) * possible causes. * http://www.spinics.net/lists/arm-kernel/msg218641.html */ - pr_warn("A possible cause could be an old bootloader - try u-boot >= v2012.07\n"); + pr_debug("A possible cause could be an old bootloader - try u-boot >= v2012.07\n"); } else { pr_info("Successfully put all powerdomains to target state\n"); } @@ -257,7 +257,7 @@ int __init omap4_pm_init(void) * http://www.spinics.net/lists/arm-kernel/msg218641.html */ if (cpu_is_omap44xx()) - pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); + pr_debug("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); ret = pwrdm_for_each(pwrdms_setup, NULL); if (ret) { -- cgit From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2023 18:13:59 +0000 Subject: xfrm: fix a data-race in xfrm_gen_index() xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock. This means we must use a per-netns idx_generator variable, instead of a static one. Alternative would be to use an atomic variable. syzbot reported: BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0: xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline] xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1: xfrm_sk_policy_insert+0x13e/0x640 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00006ad8 -> 0x00006b18 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: 1121994c803f ("netns xfrm: policy insertion in netns") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Cc: Herbert Xu Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd7c3be4af5d..423b52eca908 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,6 +50,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + unsigned int idx_generator; struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 113fb7e9cdaf..99df2862bdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1375,8 +1375,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild); * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { - static u32 idx_generator; - for (;;) { struct hlist_head *list; struct xfrm_policy *p; @@ -1384,8 +1382,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) int found; if (!index) { - idx = (idx_generator | dir); - idx_generator += 8; + idx = (net->xfrm.idx_generator | dir); + net->xfrm.idx_generator += 8; } else { idx = index; index = 0; -- cgit From e5deb8f76e64d94ccef715e75ebafffd0c312d80 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 7 Sep 2023 08:53:28 +0300 Subject: bus: ti-sysc: Fix SYSC_QUIRK_SWSUP_SIDLE_ACT handling for uart wake-up The uarts should be tagged with SYSC_QUIRK_SWSUP_SIDLE instead of SYSC_QUIRK_SWSUP_SIDLE_ACT. The difference is that SYSC_QUIRK_SWSUP_SIDLE is used to force idle target modules rather than block idle during usage. The SYSC_QUIRK_SWSUP_SIDLE_ACT should disable autoidle and wake-up when a target module is active, and configure autoidle and wake-up when a target module is inactive. We are missing configuring the target module on sysc_disable_module(), and missing toggling of the wake-up bit. Let's fix the issue to allow uart wake-up to work. Fixes: fb685f1c190e ("bus: ti-sysc: Handle swsup idle mode quirks") Tested-by: Dhruva Gole Tested-by: Kevin Hilman Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 33e8d780b04b..d57bc066dce6 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1098,6 +1098,11 @@ static int sysc_enable_module(struct device *dev) if (ddata->cfg.quirks & (SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_SIDLE_ACT)) { best_mode = SYSC_IDLE_NO; + + /* Clear WAKEUP */ + if (regbits->enwkup_shift >= 0 && + ddata->cfg.sysc_val & BIT(regbits->enwkup_shift)) + reg &= ~BIT(regbits->enwkup_shift); } else { best_mode = fls(ddata->cfg.sidlemodes) - 1; if (best_mode > SYSC_IDLE_MASK) { @@ -1225,6 +1230,13 @@ set_sidle: } } + if (ddata->cfg.quirks & SYSC_QUIRK_SWSUP_SIDLE_ACT) { + /* Set WAKEUP */ + if (regbits->enwkup_shift >= 0 && + ddata->cfg.sysc_val & BIT(regbits->enwkup_shift)) + reg |= BIT(regbits->enwkup_shift); + } + reg &= ~(SYSC_IDLE_MASK << regbits->sidle_shift); reg |= best_mode << regbits->sidle_shift; if (regbits->autoidle_shift >= 0 && @@ -1519,16 +1531,16 @@ struct sysc_revision_quirk { static const struct sysc_revision_quirk sysc_revision_quirks[] = { /* These drivers need to be fixed to not use pm_runtime_irq_safe() */ SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x00000046, 0xffffffff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE), + SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE), SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x00000052, 0xffffffff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE), + SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE), /* Uarts on omap4 and later */ SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x50411e03, 0xffff00ff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE), + SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE), SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x47422e03, 0xffffffff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE), + SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE), SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x47424e03, 0xffffffff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE), + SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE), /* Quirks that need to be set based on the module address */ SYSC_QUIRK("mcpdm", 0x40132000, 0, 0x10, -ENODEV, 0x50000800, 0xffffffff, -- cgit From d1383077c225ceb87ac7a3b56b2c505193f77ed7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Sep 2023 09:36:57 +0200 Subject: wifi: cfg80211: add missing kernel-doc for cqm_rssi_work As reported by Stephen, I neglected to add the kernel-doc for the new struct member. Fix that. Reported-by: Stephen Rothwell Fixes: 37c20b2effe9 ("wifi: cfg80211: fix cqm_config access race") Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aebfa54d547a..7192346e4a22 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5941,6 +5941,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away + * @cqm_rssi_work: (private) CQM RSSI reporting work * @cqm_config: (private) nl80211 RSSI monitor state * @pmsr_list: (private) peer measurement requests * @pmsr_lock: (private) peer measurements requests/results lock -- cgit From 0342518b0c15481dd4359b499301711b2f9a796c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 14:27:39 +0100 Subject: ALSA: hda: cs35l56: Disable low-power hibernation mode Do not allow the CS35L56 to be put into its lowest power "hibernation" mode. This only affects I2C because "hibernation" is already disabled on SPI. Recent firmwares need a different wake-up sequence. Until that sequence has been specified, the chip "hibernation" mode must be disabled otherwise it can intermittently fail to wake. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230912132739.3478441-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda_i2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c index 83e4acdd89ac..757a4d193e0f 100644 --- a/sound/pci/hda/cs35l56_hda_i2c.c +++ b/sound/pci/hda/cs35l56_hda_i2c.c @@ -21,7 +21,6 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt) return -ENOMEM; cs35l56->base.dev = &clt->dev; - cs35l56->base.can_hibernate = true; cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c); if (IS_ERR(cs35l56->base.regmap)) { ret = PTR_ERR(cs35l56->base.regmap); -- cgit From 485ddd519fbd89a9d9ac4b02be489e03cbbeebba Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 12 Sep 2023 19:26:17 +0300 Subject: ALSA: hda: intel-sdw-acpi: Use u8 type for link index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use consistently u8 for sdw link index. The id is limited to 4, u8 is adequate in size to store it. This change will also fixes the following compiler warning/error (W=1): sound/hda/intel-sdw-acpi.c: In function ‘sdw_intel_acpi_scan’: sound/hda/intel-sdw-acpi.c:34:35: error: ‘-subproperties’ directive output may be truncated writing 14 bytes into a region of size between 7 and 17 [-Werror=format-truncation=] 34 | "mipi-sdw-link-%d-subproperties", i); | ^~~~~~~~~~~~~~ In function ‘is_link_enabled’, inlined from ‘sdw_intel_scan_controller’ at sound/hda/intel-sdw-acpi.c:106:8, inlined from ‘sdw_intel_acpi_scan’ at sound/hda/intel-sdw-acpi.c:180:9: sound/hda/intel-sdw-acpi.c:33:9: note: ‘snprintf’ output between 30 and 40 bytes into a destination of size 32 33 | snprintf(name, sizeof(name), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | "mipi-sdw-link-%d-subproperties", i); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912162617.29178-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 5cb92f7ccbca..b57d72ea4503 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -23,7 +23,7 @@ static int ctrl_link_mask; module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444); MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)"); -static bool is_link_enabled(struct fwnode_handle *fw_node, int i) +static bool is_link_enabled(struct fwnode_handle *fw_node, u8 idx) { struct fwnode_handle *link; char name[32]; @@ -31,7 +31,7 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i) /* Find master handle */ snprintf(name, sizeof(name), - "mipi-sdw-link-%d-subproperties", i); + "mipi-sdw-link-%hhu-subproperties", idx); link = fwnode_get_named_child_node(fw_node, name); if (!link) @@ -51,8 +51,8 @@ static int sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) { struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle); - int ret, i; - u8 count; + u8 count, i; + int ret; if (!adev) return -EINVAL; -- cgit From 07058dceb038a4b0dd49af07118b6b2a685bb4a6 Mon Sep 17 00:00:00 2001 From: Knyazev Arseniy Date: Wed, 13 Sep 2023 10:33:43 +0500 Subject: ALSA: hda/realtek: Splitting the UX3402 into two separate models UX3402VA and UX3402ZA models require different hex values, so comibining them into one model is incorrect. Fixes: 491a4ccd8a02 ("ALSA: hda/realtek: Add quirk for ASUS Zenbook using CS35L41") Signed-off-by: Knyazev Arseniy Link: https://lore.kernel.org/r/20230913053343.119798-1-poseaydone@ya.ru Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 887c1b163865..883a7e865bc5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9814,7 +9814,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), - SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), -- cgit From 6e48ebffc2db5419b3a51cfc509bde442252b356 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Sep 2023 07:01:34 +0200 Subject: wifi: mac80211: fix mesh id corruption on 32 bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the changed field size was increased to u64, mesh_bss_info_changed pulls invalid bits from the first 3 bytes of the mesh id, clears them, and passes them on to ieee80211_link_info_change_notify, because ifmsh->mbss_changed was not updated to match its size. Fix this by turning into ifmsh->mbss_changed into an unsigned long array with 64 bit size. Fixes: 15ddba5f4311 ("wifi: mac80211: consistently use u64 for BSS changes") Reported-by: Thomas Hühn Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230913050134.53536-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mesh.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 06bd406846d2..b3d00259e1d6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -676,7 +676,7 @@ struct ieee80211_if_mesh { struct timer_list mesh_path_root_timer; unsigned long wrkq_flags; - unsigned long mbss_changed; + unsigned long mbss_changed[64 / BITS_PER_LONG]; bool userspace_handles_dfs; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index af8c5fc2db14..e31c312c124a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1175,7 +1175,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, /* if we race with running work, worst case this work becomes a noop */ for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) - set_bit(bit, &ifmsh->mbss_changed); + set_bit(bit, ifmsh->mbss_changed); set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } @@ -1257,7 +1257,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) /* clear any mesh work (for next join) we may have accrued */ ifmsh->wrkq_flags = 0; - ifmsh->mbss_changed = 0; + memset(ifmsh->mbss_changed, 0, sizeof(ifmsh->mbss_changed)); local->fif_other_bss--; atomic_dec(&local->iff_allmultis); @@ -1724,9 +1724,9 @@ static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) u32 bit; u64 changed = 0; - for_each_set_bit(bit, &ifmsh->mbss_changed, + for_each_set_bit(bit, ifmsh->mbss_changed, sizeof(changed) * BITS_PER_BYTE) { - clear_bit(bit, &ifmsh->mbss_changed); + clear_bit(bit, ifmsh->mbss_changed); changed |= BIT(bit); } -- cgit From 26f7111abd8e15726c93bafe16a349f1db2f14e0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 13 Sep 2023 12:39:33 +0300 Subject: ALSA: usb-audio: mixer: Remove temporary string use in parse_clock_source_unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kctl->id.name can be directly passed to snd_usb_copy_string_desc() and if the string has been fetched the suffix can be appended with the append_ctl_name() call. The temporary name string becomes redundant and can be removed. This change will also fixes the following compiler warning/error (W=1): sound/usb/mixer.c: In function ‘parse_audio_unit’: sound/usb/mixer.c:1972:29: error: ‘ Validity’ directive output may be truncated writing 9 bytes into a region of size between 1 and 44 [-Werror=format-truncation=] 1972 | "%s Validity", name); | ^~~~~~~~~ In function ‘parse_clock_source_unit’, inlined from ‘parse_audio_unit’ at sound/usb/mixer.c:2892:10: sound/usb/mixer.c:1971:17: note: ‘snprintf’ output between 10 and 53 bytes into a destination of size 44 1971 | snprintf(kctl->id.name, sizeof(kctl->id.name), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1972 | "%s Validity", name); | ~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The warnings got brought to light by a recent patch upstream: commit 6d4ab2e97dcf ("extrawarn: enable format and stringop overflow warnings in W=1") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230913093933.24564-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9105ec623120..985b1aea9cdc 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1929,7 +1929,6 @@ static int parse_clock_source_unit(struct mixer_build *state, int unitid, struct uac_clock_source_descriptor *hdr = _ftr; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; int ret; if (state->mixer->protocol != UAC_VERSION_2) @@ -1966,10 +1965,9 @@ static int parse_clock_source_unit(struct mixer_build *state, int unitid, kctl->private_free = snd_usb_mixer_elem_free; ret = snd_usb_copy_string_desc(state->chip, hdr->iClockSource, - name, sizeof(name)); + kctl->id.name, sizeof(kctl->id.name)); if (ret > 0) - snprintf(kctl->id.name, sizeof(kctl->id.name), - "%s Validity", name); + append_ctl_name(kctl, " Validity"); else snprintf(kctl->id.name, sizeof(kctl->id.name), "Clock Source %d Validity", hdr->bClockID); -- cgit From f7875966dc0c68cb7c02992ff9ac9e3f88fb5fca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Sep 2023 10:34:00 -0300 Subject: tools headers UAPI: Sync files changed by new fchmodat2 and map_shadow_stack syscalls with the kernel sources To pick the changes in these csets: c35559f94ebc3e3b ("x86/shstk: Introduce map_shadow_stack syscall") 78252deb023cf087 ("arch: Register fchmodat2, usually as syscall 452") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible: # perf trace -v -e fchmodat*,map_shadow_stack --max-events=4 Using CPUID AuthenticAMD-25-21-0 Reusing "openat" BPF sys_enter augmenter for "fchmodat" event qualifier tracepoint filter: (common_pid != 3499340 && common_pid != 11259) && (id == 268 || id == 452 || id == 453) ^C# And it'll work as with other syscalls, for instance openat: # perf trace -e openat* --max-events=4 0.000 ( 0.015 ms): systemd-oomd/1150 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 11 0.068 ( 0.019 ms): systemd-oomd/1150 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/memory.pressure", flags: RDONLY|CLOEXEC) = 11 0.119 ( 0.008 ms): systemd-oomd/1150 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/memory.current", flags: RDONLY|CLOEXEC) = 11 0.138 ( 0.006 ms): systemd-oomd/1150 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/memory.min", flags: RDONLY|CLOEXEC) = 11 # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ find tools/perf/arch/ -name "syscall*tbl" | xargs grep -E fchmodat\|sys_map_shadow_stack tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl:258 n64 fchmodat sys_fchmodat tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl:452 n64 fchmodat2 sys_fchmodat2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl:297 common fchmodat sys_fchmodat tools/perf/arch/powerpc/entry/syscalls/syscall.tbl:452 common fchmodat2 sys_fchmodat2 tools/perf/arch/s390/entry/syscalls/syscall.tbl:299 common fchmodat sys_fchmodat sys_fchmodat tools/perf/arch/s390/entry/syscalls/syscall.tbl:452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl:268 common fchmodat sys_fchmodat tools/perf/arch/x86/entry/syscalls/syscall_64.tbl:452 common fchmodat2 sys_fchmodat2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl:453 64 map_shadow_stack sys_map_shadow_stack $ $ grep -Ew map_shadow_stack\|fchmodat2 /tmp/build/perf-tools/arch/x86/include/generated/asm/syscalls_64.c [452] = "fchmodat2", [453] = "map_shadow_stack", $ This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl Cc: Adrian Hunter Cc: Christian Brauner Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Rick Edgecombe Link: https://lore.kernel.org/lkml/ZP8bE7aXDBu%2Fdrak@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 5 ++++- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 1 + tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fd6c1cb585db..abe087c53b4b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_fchmodat2 452 +__SYSCALL(__NR_fchmodat2, sys_fchmodat2) + #undef __NR_syscalls -#define __NR_syscalls 452 +#define __NR_syscalls 453 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index cfda2511badf..cb5e757f6621 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -366,3 +366,4 @@ 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat +452 n64 fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 8c0b08b7a80e..20e50586e8a2 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -538,3 +538,4 @@ 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a6935af2235c..0122cc156952 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 227538b0ce80..1d6eee30eceb 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -373,6 +373,8 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 64 map_shadow_stack sys_map_shadow_stack # # Due to a historical design error, certain syscalls are numbered differently -- cgit From 417ecb614fd870a9c898f5ce6fd3d338606aaf5d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Sep 2023 08:35:09 -0300 Subject: tools headers UAPI: Copy seccomp.h to be able to build 'perf bench' in older systems The new 'perf bench' for sched-seccomp-notify uses defines and types not available in older systems where we want to have perf available, so grab a copy of this UAPI from the kernel sources to allow that. This will be checked in the future for drift from the original when we build the perf tool, that will warn when that happens like: make: Entering directory '/var/home/acme/git/perf-tools/tools/perf' BUILD: Doing 'make -j32' parallel build Warning: Kernel ABI header differences: Cc: Adrian Hunter Cc: Andrei Vagin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kees Kook Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/ZQGhMXtwX7RvV3ya@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/seccomp.h | 157 +++++++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 158 insertions(+) create mode 100644 tools/include/uapi/linux/seccomp.h diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h new file mode 100644 index 000000000000..dbfc9b37fcae --- /dev/null +++ b/tools/include/uapi/linux/seccomp.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_SECCOMP_H +#define _UAPI_LINUX_SECCOMP_H + +#include +#include + + +/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, ) */ +#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */ +#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ +#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ + +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 + +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +/* Received notifications wait in killable state (only respond to fatal signals) */ +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) + +/* + * All BPF programs must return a 32-bit value. + * The bottom 16-bits are for optional return data. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). + * + * The ordering ensures that a min_t() over composed return values always + * selects the least permissive choice. + */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +/** + * struct seccomp_data - the format the BPF program executes over. + * @nr: the system call number + * @arch: indicates system call convention as an AUDIT_ARCH_* value + * as defined in . + * @instruction_pointer: at the time of the system call. + * @args: up to 6 system call arguments always stored as 64-bit values + * regardless of the architecture. + */ +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; + +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +/* + * Valid flags for struct seccomp_notif_resp + * + * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution! + * If set by the process supervising the syscalls of another process the + * syscall will continue. This is problematic because of an inherent TOCTOU. + * An attacker can exploit the time while the supervised process is waiting on + * a response from the supervising process to rewrite syscall arguments which + * are passed as pointers of the intercepted syscall. + * It should be absolutely clear that this means that the seccomp notifier + * _cannot_ be used to implement a security policy! It should only ever be used + * in scenarios where a more privileged process supervises the syscalls of a + * lesser privileged process to get around kernel-enforced security + * restrictions when the privileged process deems this safe. In other words, + * in order to continue a syscall the supervising process should be sure that + * another security mechanism or the kernel itself will sufficiently block + * syscalls if arguments are rewritten to something unsafe. + * + * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF + * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the + * same syscall, the most recently added filter takes precedence. This means + * that the new SECCOMP_RET_USER_NOTIF filter can override any + * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all + * such filtered syscalls to be executed by sending the response + * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally + * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE. + */ +#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0) + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ + +/** + * struct seccomp_notif_addfd + * @id: The ID of the seccomp notification + * @flags: SECCOMP_ADDFD_FLAG_* + * @srcfd: The local fd number + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. + * @newfd_flags: The O_* flags the remote FD should have applied + */ +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) + +#endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 4314c9197850..e21caadda7c1 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ FILES=( "include/uapi/linux/perf_event.h" "include/uapi/linux/prctl.h" "include/uapi/linux/sched.h" + "include/uapi/linux/seccomp.h" "include/uapi/linux/stat.h" "include/uapi/linux/usbdevice_fs.h" "include/uapi/linux/vhost.h" -- cgit From 678ddf730a1b0b347ad6e5deb7fdea52654e5bdf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Sep 2023 08:38:56 -0300 Subject: perf bench sched-seccomp-notify: Use the tools copy of seccomp.h UAPI To keep perf building in systems where types and defines used in this new benchmark are not available, such as: 12 13.46 centos:stream : FAIL gcc version 8.5.0 20210514 (Red Hat 8.5.0-20) (GCC) bench/sched-seccomp-notify.c: In function 'user_notif_syscall': bench/sched-seccomp-notify.c:55:27: error: 'SECCOMP_RET_USER_NOTIF' undeclared (first use in this function); did you mean 'SECCOMP_RET_ERRNO'? BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), ^~~~~~~~~~~~~~~~~~~~~~ /git/perf-6.6.0-rc1/tools/include/uapi/linux/filter.h:49:59: note: in definition of macro 'BPF_STMT' #define BPF_STMT(code, k) { (unsigned short)(code), 0, 0, k } ^ bench/sched-seccomp-notify.c:55:27: note: each undeclared identifier is reported only once for each function it appears in BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), ^~~~~~~~~~~~~~~~~~~~~~ /git/perf-6.6.0-rc1/tools/include/uapi/linux/filter.h:49:59: note: in definition of macro 'BPF_STMT' #define BPF_STMT(code, k) { (unsigned short)(code), 0, 0, k } ^ bench/sched-seccomp-notify.c:55:3: error: missing initializer for field 'k' of 'struct sock_filter' [-Werror=missing-field-initializers] BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), ^~~~~~~~ In file included from bench/sched-seccomp-notify.c:5: /git/perf-6.6.0-rc1/tools/include/uapi/linux/filter.h:28:8: note: 'k' declared here __u32 k; /* Generic multiuse field */ ^ bench/sched-seccomp-notify.c: In function 'user_notification_sync_loop': bench/sched-seccomp-notify.c:70:28: error: storage size of 'resp' isn't known struct seccomp_notif_resp resp; ^~~~ bench/sched-seccomp-notify.c:71:23: error: storage size of 'req' isn't known struct seccomp_notif req; ^~~ bench/sched-seccomp-notify.c:76:23: error: 'SECCOMP_IOCTL_NOTIF_RECV' undeclared (first use in this function); did you mean 'SECCOMP_MODE_STRICT'? if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req)) ^~~~~~~~~~~~~~~~~~~~~~~~ SECCOMP_MODE_STRICT bench/sched-seccomp-notify.c:86:23: error: 'SECCOMP_IOCTL_NOTIF_SEND' undeclared (first use in this function); did you mean 'SECCOMP_RET_ACTION'? if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp)) ^~~~~~~~~~~~~~~~~~~~~~~~ SECCOMP_RET_ACTION bench/sched-seccomp-notify.c:71:23: error: unused variable 'req' [-Werror=unused-variable] struct seccomp_notif req; ^~~ bench/sched-seccomp-notify.c:70:28: error: unused variable 'resp' [-Werror=unused-variable] struct seccomp_notif_resp resp; ^~~~ 14 11.31 debian:10 : FAIL gcc version 8.3.0 (Debian 8.3.0-6) Cc: Adrian Hunter Cc: Andrei Vagin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kees Kook Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/ZQGhjaojgOGtSNk6@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-seccomp-notify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c index b04ebcde4036..a01c40131493 100644 --- a/tools/perf/bench/sched-seccomp-notify.c +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include -- cgit From 15ca35494ec58f50aa58f51437a22573431e8448 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Sep 2023 08:50:10 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes from these csets: 1b5277c0ea0b2473 ("x86/srso: Add SRSO_NO support") 8974eb588283b7d4 ("x86/speculation: Add Gather Data Sampling mitigation") That cause no changes to tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ Just silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Adrian Hunter Cc: Borislav Petkov (AMD) Cc: Daniel Sneddon Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZQGismCqcDddjEIQ@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index a00a53e15ab7..1d111350197f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -57,6 +57,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -155,6 +156,15 @@ * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR @@ -178,6 +188,8 @@ #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 -- cgit From c2122b687c212a28d237fb672cc979247bd94449 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Sep 2023 08:55:20 -0300 Subject: tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: ad9ee11fdf113f96 ("drm/doc: document that PRIME import/export is always supported") 2ff4f6d410afa762 ("drm/doc: document drm_event and its types") 9a2eabf48ade4fba ("drm/doc: use proper cross-references for sections") c7a4722971691562 ("drm/syncobj: add IOCTL to register an eventfd") Addressing these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl code into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2023-09-13 08:54:45.170134002 -0300 +++ after 2023-09-13 08:55:06.612712776 -0300 @@ -108,6 +108,7 @@ [0xCC] = "SYNCOBJ_TRANSFER", [0xCD] = "SYNCOBJ_TIMELINE_SIGNAL", [0xCE] = "MODE_GETFB2", + [0xCF] = "SYNCOBJ_EVENTFD", [DRM_COMMAND_BASE + 0x00] = "I915_INIT", [DRM_COMMAND_BASE + 0x01] = "I915_FLUSH", [DRM_COMMAND_BASE + 0x02] = "I915_FLIP", $ Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Simon Ser Link: https://lore.kernel.org/lkml/ZQGkh9qlhpKA%2FSMY@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 84 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index a87bbbbca2d4..794c1d857677 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -673,8 +673,11 @@ struct drm_gem_open { * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT * and &DRM_PRIME_CAP_EXPORT. * - * PRIME buffers are exposed as dma-buf file descriptors. See - * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. */ #define DRM_CAP_PRIME 0x5 /** @@ -682,6 +685,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_IMPORT 0x1 /** @@ -689,6 +694,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_EXPORT 0x2 /** @@ -756,15 +763,14 @@ struct drm_gem_open { /** * DRM_CAP_SYNCOBJ * - * If set to 1, the driver supports sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ 0x13 /** * DRM_CAP_SYNCOBJ_TIMELINE * * If set to 1, the driver supports timeline operations on sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 @@ -909,6 +915,27 @@ struct drm_syncobj_timeline_wait { __u32 pad; }; +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + struct drm_syncobj_array { __u64 handles; @@ -1169,6 +1196,8 @@ extern "C" { */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. @@ -1180,25 +1209,50 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/* - * Header for events written back to userspace on the drm fd. The - * type defines the type of event, the length specifies the total - * length of the event (including the header), and user_data is - * typically a 64 bit value passed with the ioctl that triggered the - * event. A read on the drm fd will always only return complete - * events, that is, if for example the read buffer is 100 bytes, and - * there are two 64 byte events pending, only one will be returned. +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). * - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and - * up are chipset specific. + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. */ struct drm_event { __u32 type; __u32 length; }; +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ #define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { -- cgit From 2d48c30176fa0fd61202b859d7454249f2b22bdc Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 12 Sep 2023 14:28:50 +0200 Subject: MAINTAINERS: gpio-regmap: make myself a maintainer of it When I've upstreamed the gpio-regmap driver, I didn't have that much experience with kernel maintenance, so I've just added myself as a reviewer. I've gained quite some experience, so I'd like to step up as a maintainer for it. Signed-off-by: Michael Walle Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..9ecfacc2a7c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8875,7 +8875,7 @@ F: drivers/gpio/gpio-mockup.c F: tools/testing/selftests/gpio/ GPIO REGMAP -R: Michael Walle +M: Michael Walle S: Maintained F: drivers/gpio/gpio-regmap.c F: include/linux/gpio/regmap.h -- cgit From a6a241764f69c62d23fc6960920cc662ae4069e9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 11 Sep 2023 11:32:51 +0100 Subject: swiotlb: use the calculated number of areas Commit 8ac04063354a ("swiotlb: reduce the number of areas to match actual memory pool size") calculated the reduced number of areas in swiotlb_init_remap() but didn't actually use the value. Replace usage of default_nareas accordingly. Fixes: 8ac04063354a ("swiotlb: reduce the number of areas to match actual memory pool size") Signed-off-by: Ross Lagerwall Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 394494a6b1f3..85dd94323b98 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -399,14 +399,13 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, } mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), - default_nareas), SMP_CACHE_BYTES); + nareas), SMP_CACHE_BYTES); if (!mem->areas) { pr_warn("%s: Failed to allocate mem->areas.\n", __func__); return; } - swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, - default_nareas); + swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); add_mem_pool(&io_tlb_default_mem, mem); if (flags & SWIOTLB_VERBOSE) -- cgit From cf0ba445f5e4dd74c1e9d7a83ca721ba69204a11 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Sep 2023 11:18:22 +0300 Subject: ASoC: codecs: aw88395: Fix some error codes These error paths should return -EINVAL instead of success. Fixes: 7f4ec77802aa ("ASoC: codecs: Add code for bin parsing compatible with aw88261") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/81476e78-05c2-4656-b754-f314c7ccdb81@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/codecs/aw88395/aw88395_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/aw88395/aw88395_lib.c b/sound/soc/codecs/aw88395/aw88395_lib.c index 8ee1baa03269..87dd0ccade4c 100644 --- a/sound/soc/codecs/aw88395/aw88395_lib.c +++ b/sound/soc/codecs/aw88395/aw88395_lib.c @@ -452,11 +452,13 @@ static int aw_dev_parse_reg_bin_with_hdr(struct aw_device *aw_dev, if ((aw_bin->all_bin_parse_num != 1) || (aw_bin->header_info[0].bin_data_type != DATA_TYPE_REGISTER)) { dev_err(aw_dev->dev, "bin num or type error"); + ret = -EINVAL; goto parse_bin_failed; } if (aw_bin->header_info[0].valid_data_len % 4) { dev_err(aw_dev->dev, "bin data len get error!"); + ret = -EINVAL; goto parse_bin_failed; } -- cgit From 2f9f488e7b1448f8e9732b12df9ffbf7d42ef304 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Tue, 29 Aug 2023 10:05:10 +0800 Subject: riscv: dts: starfive: visionfive 2: Enable usb0 usb0 was disabled by mistake when merging, so enable it. Fixes: e7c304c0346d ("riscv: dts: starfive: jh7110: add the node and pins configuration for tdm") Signed-off-by: Hal Feng Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi index d4ceda901f33..687dccd88f59 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -513,6 +513,7 @@ &usb0 { dr_mode = "peripheral"; + status = "okay"; }; &U74_1 { -- cgit From 1558209533f140624a00408bdab796ab3f309450 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Tue, 29 Aug 2023 10:05:11 +0800 Subject: riscv: dts: starfive: visionfive 2: Fix uart0 pins sort order Node uart0_pins should be sorted alphabetically. Signed-off-by: Hal Feng Signed-off-by: Conor Dooley --- .../dts/starfive/jh7110-starfive-visionfive-2.dtsi | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi index 687dccd88f59..12ebe9792356 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -440,30 +440,6 @@ }; }; - uart0_pins: uart0-0 { - tx-pins { - pinmux = ; - bias-disable; - drive-strength = <12>; - input-disable; - input-schmitt-disable; - slew-rate = <0>; - }; - - rx-pins { - pinmux = ; - bias-disable; /* external pull-up */ - drive-strength = <2>; - input-enable; - input-schmitt-enable; - slew-rate = <0>; - }; - }; - tdm_pins: tdm-0 { tx-pins { pinmux = ; + bias-disable; + drive-strength = <12>; + input-disable; + input-schmitt-disable; + slew-rate = <0>; + }; + + rx-pins { + pinmux = ; + bias-disable; /* external pull-up */ + drive-strength = <2>; + input-enable; + input-schmitt-enable; + slew-rate = <0>; + }; + }; }; &tdm { -- cgit From 41dac81b56c82c51a6d00fda5f3af7691ffee2d7 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:10 +0100 Subject: ASoC: cs42l42: Ensure a reset pulse meets minimum pulse width. The CS42L42 can accept very short reset pulses of a few microseconds but there's no reason to force a very short pulse. Allow a wide range for the usleep_range() so it can be relaxed about the choice of timing source. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index a0de0329406a..56d2857a4f01 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2320,6 +2320,10 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, if (cs42l42->reset_gpio) { dev_dbg(cs42l42->dev, "Found reset GPIO\n"); + + /* Ensure minimum reset pulse width */ + usleep_range(10, 500); + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); } usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); -- cgit From a479b44ac0a0ac25cd48e5356200078924d78022 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:11 +0100 Subject: ASoC: cs42l42: Don't rely on GPIOD_OUT_LOW to set RESET initially low The ACPI setting for a GPIO default state has higher priority than the flag passed to devm_gpiod_get_optional() so ACPI can override the GPIOD_OUT_LOW. Explicitly set the GPIO low when hard resetting. Although GPIOD_OUT_LOW can't be relied on this doesn't seem like a reason to stop passing it to devm_gpiod_get_optional(). So we still pass it to state our intent, but can deal with it having no effect. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 56d2857a4f01..dc93861ddfb0 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2321,6 +2321,12 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, if (cs42l42->reset_gpio) { dev_dbg(cs42l42->dev, "Found reset GPIO\n"); + /* + * ACPI can override the default GPIO state we requested + * so ensure that we start with RESET low. + */ + gpiod_set_value_cansleep(cs42l42->reset_gpio, 0); + /* Ensure minimum reset pulse width */ usleep_range(10, 500); -- cgit From 2d066c6a78654c179f95c9beda1985d4c6befa4e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 16:00:12 +0100 Subject: ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset In SoundWire mode leave hard RESET asserted when exiting probe, and wait for an UNATTACHED notification before deasserting RESET. If the boot state of the reset GPIO was deasserted it is possible that the SoundWire core had already enumerated the CS42L42 before cs42l42_sdw_probe() is called. When cs42l42_common_probe() hard resets the CS42L42 it triggers a race condition: 1) After cs42l42_sdw_probe() returns the thread that called it will call cs42l42_sdw_update_status() to report the last status recorded by the SoundWire core. 2) The SoundWire bus master will see a PING with the CS42L42 now reporting as unenumerated and will trigger the core SoundWire code to start enumerating CS42L42. These two threads are racing against each other. If (1) happens before (2) a stale ATTACHED notification will be reported to the cs42l42 driver when in fact the status of cs42l42 is now unattached. To avoid this race condition: - Leave RESET asserted on exit from cs42l42_sdw_probe(). This ensures that an UNATTACHED notification must be sent to the cs42l42 driver. If cs42l42 was already enumerated it will be seen to drop off the bus, causing an UNATTACH notification. If it was never enumerated the status is already UNATTACHED and this will be reported by thread (1). - When the UNATTACH notification is received, release RESET. This will cause CS42L42 to be enumerated and eventually report an ATTACHED notification. - The ATTACHED notification is now valid. Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20230913150012.604775-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42-sdw.c | 20 ++++++++++++++++++++ sound/soc/codecs/cs42l42.c | 11 ++++++++++- sound/soc/codecs/cs42l42.h | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42-sdw.c b/sound/soc/codecs/cs42l42-sdw.c index eeab07c850f9..974bae4abfad 100644 --- a/sound/soc/codecs/cs42l42-sdw.c +++ b/sound/soc/codecs/cs42l42-sdw.c @@ -344,6 +344,16 @@ static int cs42l42_sdw_update_status(struct sdw_slave *peripheral, switch (status) { case SDW_SLAVE_ATTACHED: dev_dbg(cs42l42->dev, "ATTACHED\n"); + + /* + * The SoundWire core can report stale ATTACH notifications + * if we hard-reset CS42L42 in probe() but it had already been + * enumerated. Reject the ATTACH if we haven't yet seen an + * UNATTACH report for the device being in reset. + */ + if (cs42l42->sdw_waiting_first_unattach) + break; + /* * Initialise codec, this only needs to be done once. * When resuming from suspend, resume callback will handle re-init of codec, @@ -354,6 +364,16 @@ static int cs42l42_sdw_update_status(struct sdw_slave *peripheral, break; case SDW_SLAVE_UNATTACHED: dev_dbg(cs42l42->dev, "UNATTACHED\n"); + + if (cs42l42->sdw_waiting_first_unattach) { + /* + * SoundWire core has seen that CS42L42 is not on + * the bus so release RESET and wait for ATTACH. + */ + cs42l42->sdw_waiting_first_unattach = false; + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + } + break; default: break; diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index dc93861ddfb0..2961340f15e2 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -2330,7 +2330,16 @@ int cs42l42_common_probe(struct cs42l42_private *cs42l42, /* Ensure minimum reset pulse width */ usleep_range(10, 500); - gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + /* + * On SoundWire keep the chip in reset until we get an UNATTACH + * notification from the SoundWire core. This acts as a + * synchronization point to reject stale ATTACH notifications + * if the chip was already enumerated before we reset it. + */ + if (cs42l42->sdw_peripheral) + cs42l42->sdw_waiting_first_unattach = true; + else + gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); } usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 4bd7b85a5747..7785125b73ab 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -53,6 +53,7 @@ struct cs42l42_private { u8 stream_use; bool hp_adc_up_pending; bool suspended; + bool sdw_waiting_first_unattach; bool init_done; }; -- cgit From 7c6339322ce0c6128acbe36aacc1eeb986dd7bf1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:38 -0400 Subject: NFS: Fix O_DIRECT locking issues The dreq fields are protected by the dreq->lock. Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ee88f0a6e7b8..e8a1645857dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -553,7 +553,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); if (dreq->error < 0) { desc.pg_error = dreq->error; } else if (desc.pg_error != -EAGAIN) { @@ -563,7 +563,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->error = desc.pg_error; } else dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); break; } nfs_release_request(req); @@ -871,9 +871,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, /* If the error is soft, defer remaining requests */ nfs_init_cinfo_from_dreq(&cinfo, dreq); - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); nfs_unlock_request(req); nfs_mark_request_commit(req, NULL, &cinfo, 0); desc.pg_error = 0; -- cgit From 8982f7aff39fb526aba4441fff2525fcedd5e1a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:39 -0400 Subject: NFS: More O_DIRECT accounting fixes for error paths If we hit a fatal error when retransmitting, we do need to record the removal of the request from the count of written bytes. Fixes: 031d73ed768a ("NFS: Fix O_DIRECT accounting of number of bytes read/written") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e8a1645857dd..a53e50123499 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -93,12 +93,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq, dreq->max_count = dreq_len; if (dreq->count > dreq_len) dreq->count = dreq_len; - - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - else /* Clear outstanding error if this is EOF */ - dreq->error = 0; } + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) + dreq->error = hdr->error; } static void @@ -120,6 +118,18 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq, dreq->count = dreq_len; } +static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, + struct nfs_page *req) +{ + loff_t offs = req_offset(req); + size_t req_start = (size_t)(offs - dreq->io_start); + + if (req_start < dreq->max_count) + dreq->max_count = req_start; + if (req_start < dreq->count) + dreq->count = req_start; +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -537,10 +547,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_direct_join_group(&reqs, dreq->inode); - dreq->count = 0; - dreq->max_count = 0; - list_for_each_entry(req, &reqs, wb_list) - dreq->max_count += req->wb_bytes; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); @@ -574,10 +580,14 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); - if (desc.pg_error == -EAGAIN) + if (desc.pg_error == -EAGAIN) { nfs_mark_request_commit(req, NULL, &cinfo, 0); - else + } else { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); nfs_release_request(req); + } } if (put_dreq(dreq)) @@ -597,8 +607,6 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; - dreq->max_count = 0; - dreq->count = 0; dreq->flags = NFS_ODIRECT_DONE; } else { status = dreq->error; @@ -609,7 +617,12 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - if (status >= 0 && !nfs_write_match_verf(verf, req)) { + if (status < 0) { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); + nfs_release_request(req); + } else if (!nfs_write_match_verf(verf, req)) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* * Despite the reboot, the write was successful, @@ -617,7 +630,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) */ req->wb_nio = 0; nfs_mark_request_commit(req, NULL, &cinfo, 0); - } else /* Error or match */ + } else nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -670,6 +683,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) while (!list_empty(&reqs)) { req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); + nfs_direct_truncate_request(dreq, req); nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -719,7 +733,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && + !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; -- cgit From b193a78ddb5ee7dba074d3f28dc050069ba083c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:40 -0400 Subject: NFS: Use the correct commit info in nfs_join_page_group() Ensure that nfs_clear_request_commit() updates the correct counters when it removes them from the commit list. Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 8 +++++--- fs/nfs/write.c | 23 ++++++++++++----------- include/linux/nfs_page.h | 4 +++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a53e50123499..3391c8b97da5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -498,7 +498,9 @@ static void nfs_direct_add_page_head(struct list_head *list, kref_get(&head->wb_kref); } -static void nfs_direct_join_group(struct list_head *list, struct inode *inode) +static void nfs_direct_join_group(struct list_head *list, + struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *req, *subreq; @@ -520,7 +522,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode) nfs_release_request(subreq); } } while ((subreq = subreq->wb_this_page) != req); - nfs_join_page_group(req, inode); + nfs_join_page_group(req, cinfo, inode); } } @@ -545,7 +547,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); - nfs_direct_join_group(&reqs, dreq->inode); + nfs_direct_join_group(&reqs, &cinfo, dreq->inode); nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f4cca8f00c0c..8c1ee1a1a28f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,7 +59,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_inode_remove_request(struct nfs_page *req); -static void nfs_clear_request_commit(struct nfs_page *req); +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); static struct nfs_page * @@ -502,8 +503,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. */ -void -nfs_join_page_group(struct nfs_page *head, struct inode *inode) +void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *subreq; struct nfs_page *destroy_list = NULL; @@ -533,7 +534,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) * Commit list removal accounting is done after locks are dropped */ subreq = head; do { - nfs_clear_request_commit(subreq); + nfs_clear_request_commit(cinfo, subreq); subreq = subreq->wb_this_page; } while (subreq != head); @@ -566,8 +567,10 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; struct nfs_page *head; + struct nfs_commit_info cinfo; int ret; + nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed @@ -584,7 +587,7 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) return ERR_PTR(ret); } - nfs_join_page_group(head, inode); + nfs_join_page_group(head, &cinfo, inode); return head; } @@ -955,18 +958,16 @@ static void nfs_folio_clear_commit(struct folio *folio) } /* Called holding the request lock on @req */ -static void -nfs_clear_request_commit(struct nfs_page *req) +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { struct nfs_open_context *ctx = nfs_req_openctx(req); struct inode *inode = d_inode(ctx->dentry); - struct nfs_commit_info cinfo; - nfs_init_cinfo_from_inode(&cinfo, inode); mutex_lock(&NFS_I(inode)->commit_mutex); - if (!pnfs_clear_request_commit(req, &cinfo)) { - nfs_request_remove_commit_list(req, &cinfo); + if (!pnfs_clear_request_commit(req, cinfo)) { + nfs_request_remove_commit_list(req, cinfo); } mutex_unlock(&NFS_I(inode)->commit_mutex); nfs_folio_clear_commit(nfs_page_to_folio(req)); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit From b11243f720ee5f9376861099019c8542969b6318 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:41 -0400 Subject: NFS: More fixes for nfs_direct_write_reschedule_io() Ensure that all requests are put back onto the commit list so that they can be rescheduled. Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3391c8b97da5..f6c74f424691 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -780,18 +780,23 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error) static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_page *req; + struct nfs_commit_info cinfo; trace_nfs_direct_write_reschedule_io(dreq); + nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); - if (dreq->error == 0) { + if (dreq->error == 0) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - /* fake unstable write to let common nfs resend pages */ - hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.offset + hdr->args.count - - hdr->io_start; - } + set_bit(NFS_IOHDR_REDO, &hdr->flags); spin_unlock(&dreq->lock); + while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); + nfs_list_remove_request(req); + nfs_unlock_request(req); + nfs_mark_request_commit(req, NULL, &cinfo, 0); + } } static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { -- cgit From dd7d7ee3ba2a70d12d02defb478790cf57d5b87b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:43:58 -0400 Subject: NFS/pNFS: Report EINVAL errors from connect() to the server With IPv6, connect() can occasionally return EINVAL if a route is unavailable. If this happens during I/O to a data server, we want to report it using LAYOUTERROR as an inability to connect. Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7deb3cd76abe..a1dc33864906 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1235,6 +1235,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -EPFNOSUPPORT: case -EPROTONOSUPPORT: case -EOPNOTSUPP: + case -EINVAL: case -ECONNREFUSED: case -ECONNRESET: case -EHOSTDOWN: -- cgit From 611fa42dfa9d2f3918ac5f4dd5705dfad81b323d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:50:09 -0400 Subject: SUNRPC: Mark the cred for revalidation if the server rejects it If the server rejects the credential as being stale, or bad, then we should mark it for revalidation before retransmitting. Fixes: 7f5667a5f8c4 ("SUNRPC: Clean up rpc_verify_header()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d75290f1a31..5c37621aa09a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2751,6 +2751,7 @@ out_msg_denied: case rpc_autherr_rejectedverf: case rpcsec_gsserr_credproblem: case rpcsec_gsserr_ctxproblem: + rpcauth_invalcred(task); if (!task->tk_cred_retry) break; task->tk_cred_retry--; -- cgit From e86fcf0820d914389b46658a5a7e8969c3af2d53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2023 21:03:28 -0400 Subject: Revert "SUNRPC: Fail faster on bad verifier" This reverts commit 0701214cd6e66585a999b132eb72ae0489beb724. The premise of this commit was incorrect. There are exactly 2 cases where rpcauth_checkverf() will return an error: 1) If there was an XDR decode problem (i.e. garbage data). 2) If gss_validate() had a problem verifying the RPCSEC_GSS MIC. In the second case, there are again 2 subcases: a) The GSS context expires, in which case gss_validate() will force a new context negotiation on retry by invalidating the cred. b) The sequence number check failed because an RPC call timed out, and the client retransmitted the request using a new sequence number, as required by RFC2203. In neither subcase is this a fatal error. Reported-by: Russell Cattelan Fixes: 0701214cd6e6 ("SUNRPC: Fail faster on bad verifier") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5c37621aa09a..edbcfdd84e1f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2725,7 +2725,7 @@ out_unparsable: out_verifier: trace_rpc_bad_verifier(task); - goto out_err; + goto out_garbage; out_msg_denied: error = -EACCES; -- cgit From 806a3bc421a115fbb287c1efce63a48c54ee804b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 30 Aug 2023 15:29:34 -0400 Subject: NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 6 +++++- include/linux/nfs_fs_sb.h | 1 + net/sunrpc/clnt.c | 11 +++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 27fb25567ce7..11e3a285594c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -417,6 +417,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) .net = old->cl_net, .servername = old->cl_hostname, }; + int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ? + clp->cl_max_connect : old->cl_max_connect; if (clp->cl_proto != old->cl_proto) return; @@ -430,7 +432,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) xprt_args.addrlen = clp_salen; rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, - rpc_clnt_test_and_add_xprt, NULL); + rpc_clnt_test_and_add_xprt, &max_connect); } /** @@ -1010,6 +1012,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); + __set_bit(NFS_CS_PNFS, &cl_init.init_flags); + cl_init.max_connect = NFS_MAX_TRANSPORTS; /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..cd628c4b011e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index edbcfdd84e1f..37b0b212b934 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2908,19 +2908,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = { * @clnt: pointer to struct rpc_clnt * @xps: pointer to struct rpc_xprt_switch, * @xprt: pointer struct rpc_xprt - * @dummy: unused + * @in_max_connect: pointer to the max_connect value for the passed in xprt transport */ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, - void *dummy) + void *in_max_connect) { struct rpc_cb_add_xprt_calldata *data; struct rpc_task *task; + int max_connect = clnt->cl_max_connect; - if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { + if (in_max_connect) + max_connect = *(int *)in_max_connect; + if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) { rcu_read_lock(); pr_warn("SUNRPC: reached max allowed number (%d) did not add " - "transport to server: %s\n", clnt->cl_max_connect, + "transport to server: %s\n", max_connect, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); rcu_read_unlock(); return -EINVAL; -- cgit From 06ed09351b67eb1114ae106a87a0ee3ea9adb3db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Aug 2023 18:52:08 +0100 Subject: btrfs: convert btrfs_read_merkle_tree_page() to use a folio Remove a number of hidden calls to compound_head() by using a folio throughout. Also follow core kernel coding style by adding the folio to the page cache immediately after allocation instead of doing the read first, then adding it to the page cache. This ordering makes subsequent readers block waiting for the first reader instead of duplicating the work only to throw it away when they find out they lost the race. Reviewed-by: Boris Burkov Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Sterba --- fs/btrfs/verity.c | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index c5ff16f9e9fa..744f4f4d4c68 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -715,7 +715,7 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct page *page; + struct folio *folio; u64 off = (u64)index << PAGE_SHIFT; loff_t merkle_pos = merkle_file_pos(inode); int ret; @@ -726,29 +726,36 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, return ERR_PTR(-EFBIG); index += merkle_pos >> PAGE_SHIFT; again: - page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); - if (page) { - if (PageUptodate(page)) - return page; + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (!IS_ERR(folio)) { + if (folio_test_uptodate(folio)) + goto out; - lock_page(page); - /* - * We only insert uptodate pages, so !Uptodate has to be - * an error - */ - if (!PageUptodate(page)) { - unlock_page(page); - put_page(page); + folio_lock(folio); + /* If it's not uptodate after we have the lock, we got a read error. */ + if (!folio_test_uptodate(folio)) { + folio_unlock(folio); + folio_put(folio); return ERR_PTR(-EIO); } - unlock_page(page); - return page; + folio_unlock(folio); + goto out; } - page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); - if (!page) + folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS), + 0); + if (!folio) return ERR_PTR(-ENOMEM); + ret = filemap_add_folio(inode->i_mapping, folio, index, GFP_NOFS); + if (ret) { + folio_put(folio); + /* Did someone else insert a folio here? */ + if (ret == -EEXIST) + goto again; + return ERR_PTR(ret); + } + /* * Merkle item keys are indexed from byte 0 in the merkle tree. * They have the form: @@ -756,28 +763,19 @@ again: * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ] */ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off, - page_address(page), PAGE_SIZE, page); + folio_address(folio), PAGE_SIZE, &folio->page); if (ret < 0) { - put_page(page); + folio_put(folio); return ERR_PTR(ret); } if (ret < PAGE_SIZE) - memzero_page(page, ret, PAGE_SIZE - ret); + folio_zero_segment(folio, ret, PAGE_SIZE); - SetPageUptodate(page); - ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS); + folio_mark_uptodate(folio); + folio_unlock(folio); - if (!ret) { - /* Inserted and ready for fsverity */ - unlock_page(page); - } else { - put_page(page); - /* Did someone race us into inserting this page? */ - if (ret == -EEXIST) - goto again; - page = ERR_PTR(ret); - } - return page; +out: + return folio_file_page(folio, index); } /* -- cgit From 9af86694fd5d387992699ec99007ed374966ce9a Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 6 Sep 2023 17:59:03 +0200 Subject: btrfs: file_remove_privs needs an exclusive lock in direct io write This was noticed by Miklos that file_remove_privs might call into notify_change(), which requires to hold an exclusive lock. The problem exists in FUSE and btrfs. We can fix it without any additional helpers from VFS, in case the privileges would need to be dropped, change the lock type to be exclusive and redo the loop. Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF") CC: Miklos Szeredi CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Christoph Hellwig Signed-off-by: Bernd Schubert Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6edad7b9a5d3..e8726a83b649 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1466,8 +1466,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1475,6 +1480,13 @@ relock: if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); -- cgit From b595d25996329427b2c09d4b90395a165fb3ef8e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Sep 2023 15:31:39 -0400 Subject: btrfs: don't clear uptodate on write errors We have been consistently seeing hangs with generic/648 in our subpage GitHub CI setup. This is a classic deadlock, we are calling btrfs_read_folio() on a folio, which requires holding the folio lock on the folio, and then finding a ordered extent that overlaps that range and calling btrfs_start_ordered_extent(), which then tries to write out the dirty page, which requires taking the folio lock and then we deadlock. The hang happens because we're writing to range [1271750656, 1271767040), page index [77621, 77622], and page 77621 is !Uptodate. It is also Dirty, so we call btrfs_read_folio() for 77621 and which does btrfs_lock_and_flush_ordered_range() for that range, and we find an ordered extent which is [1271644160, 1271746560), page index [77615, 77621]. The page indexes overlap, but the actual bytes don't overlap. We're holding the page lock for 77621, then call btrfs_lock_and_flush_ordered_range() which tries to flush the dirty page, and tries to lock 77621 again and then we deadlock. The byte ranges do not overlap, but with subpage support if we clear uptodate on any portion of the page we mark the entire thing as not uptodate. We have been clearing page uptodate on write errors, but no other file system does this, and is in fact incorrect. This doesn't hurt us in the !subpage case because we can't end up with overlapped ranges that don't also overlap on the page. Fix this by not clearing uptodate when we have a write error. The only thing we should be doing in this case is setting the mapping error and carrying on. This makes it so we would no longer call btrfs_read_folio() on the page as it's uptodate and eliminates the deadlock. With this patch we're now able to make it through a full fstests run on our subpage blocksize VMs. Note for stable backports: this probably goes beyond 6.1 but the code has been cleaned up and clearing the uptodate bit must be verified on each version independently. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 +-------- fs/btrfs/inode.c | 4 ---- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ac3fca5a5e41..6954ae763b86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) bvec->bv_offset, bvec->bv_len); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); - if (error) { - btrfs_page_clear_uptodate(fs_info, page, start, len); + if (error) mapping_set_error(page->mapping, error); - } btrfs_page_clear_writeback(fs_info, page, start, len); } @@ -1456,8 +1454,6 @@ done: if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page, - page_start, PAGE_SIZE); mapping_set_error(page->mapping, ret); } unlock_page(page); @@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) struct page *page = bvec->bv_page; u32 len = bvec->bv_len; - if (!uptodate) - btrfs_page_clear_uptodate(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len); bio_offset += len; } @@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, cur, cur_len, !ret); - btrfs_page_clear_uptodate(fs_info, page, cur, cur_len); mapping_set_error(page->mapping, ret); } btrfs_page_unlock_writer(fs_info, page, cur, cur_len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e211e88c6545..616fdcf40467 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, btrfs_mark_ordered_io_finished(inode, locked_page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(inode->root->fs_info, - locked_page, page_start, - PAGE_SIZE); mapping_set_error(locked_page->mapping, ret); unlock_page(locked_page); } @@ -2791,7 +2788,6 @@ out_page: mapping_set_error(page->mapping, ret); btrfs_mark_ordered_io_finished(inode, page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE); clear_page_dirty_for_io(page); } btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); -- cgit From 69343ce91435f222052015c5af86b550391bac85 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 17:05:23 +0100 Subject: firmware: cirrus: cs_dsp: Only log list of algorithms in debug build Change the logging of each algorithm from info level to debug level. On the original devices supported by this code there were typically only one or two algorithms in a firmware and one or two DSPs so this logging only used a small number of log lines. However, for the latest devices there could be 30-40 algorithms in a firmware and 8 DSPs being loaded in parallel, so using 300+ lines of log for information that isn't particularly important to have logged. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230913160523.3701189-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/cs_dsp.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 49b70c70dc69..79d4254d1f9b 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1863,15 +1863,15 @@ static int cs_dsp_adsp2_setup_algs(struct cs_dsp *dsp) return PTR_ERR(adsp2_alg); for (i = 0; i < n_algs; i++) { - cs_dsp_info(dsp, - "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n", - i, be32_to_cpu(adsp2_alg[i].alg.id), - (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16, - (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8, - be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff, - be32_to_cpu(adsp2_alg[i].xm), - be32_to_cpu(adsp2_alg[i].ym), - be32_to_cpu(adsp2_alg[i].zm)); + cs_dsp_dbg(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n", + i, be32_to_cpu(adsp2_alg[i].alg.id), + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff, + be32_to_cpu(adsp2_alg[i].xm), + be32_to_cpu(adsp2_alg[i].ym), + be32_to_cpu(adsp2_alg[i].zm)); alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM, adsp2_alg[i].alg.id, @@ -1996,14 +1996,14 @@ static int cs_dsp_halo_setup_algs(struct cs_dsp *dsp) return PTR_ERR(halo_alg); for (i = 0; i < n_algs; i++) { - cs_dsp_info(dsp, - "%d: ID %x v%d.%d.%d XM@%x YM@%x\n", - i, be32_to_cpu(halo_alg[i].alg.id), - (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16, - (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8, - be32_to_cpu(halo_alg[i].alg.ver) & 0xff, - be32_to_cpu(halo_alg[i].xm_base), - be32_to_cpu(halo_alg[i].ym_base)); + cs_dsp_dbg(dsp, + "%d: ID %x v%d.%d.%d XM@%x YM@%x\n", + i, be32_to_cpu(halo_alg[i].alg.id), + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16, + (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8, + be32_to_cpu(halo_alg[i].alg.ver) & 0xff, + be32_to_cpu(halo_alg[i].xm_base), + be32_to_cpu(halo_alg[i].ym_base)); ret = cs_dsp_halo_create_regions(dsp, halo_alg[i].alg.id, halo_alg[i].alg.ver, -- cgit From 781118bc2fc1026c8285f83ea7ecab07071a09c4 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 Sep 2023 17:02:50 +0100 Subject: ASoC: wm_adsp: Fix missing locking in wm_adsp_[read|write]_ctl() wm_adsp_read_ctl() and wm_adsp_write_ctl() must hold the cs_dsp pwr_lock mutex when calling cs_dsp_coeff_read_ctrl() and cs_dsp_coeff_write_ctrl(). Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230913160250.3700346-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 6fc34f41b175..d1b9238d391e 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -687,7 +687,10 @@ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, struct wm_coeff_ctl *ctl; int ret; + mutex_lock(&dsp->cs_dsp.pwr_lock); ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len); + mutex_unlock(&dsp->cs_dsp.pwr_lock); + if (ret < 0) return ret; @@ -703,8 +706,14 @@ EXPORT_SYMBOL_GPL(wm_adsp_write_ctl); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len) { - return cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg), - 0, buf, len); + int ret; + + mutex_lock(&dsp->cs_dsp.pwr_lock); + ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg), + 0, buf, len); + mutex_unlock(&dsp->cs_dsp.pwr_lock); + + return ret; } EXPORT_SYMBOL_GPL(wm_adsp_read_ctl); -- cgit From 8a19edd4fa6f5b22d5a35bb7c8bb3e7c571a74d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Sep 2023 13:47:11 +0200 Subject: selftests/bpf: Fix kprobe_multi_test/attach_override test We need to deny the attach_override test for arm64, denying the whole kprobe_multi_test suite. Also making attach_override static. Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230913114711.499829-1-jolsa@kernel.org --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 10 ++-------- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 7f768d335698..3babaf3eee5c 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,14 +1,8 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 fexit_sleep # The test never returns. The remaining tests cannot start. -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3 +kprobe_multi_bench_attach # needs CONFIG_FPROBE +kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e05477b210a5..4041cfa670eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -454,7 +454,7 @@ cleanup: } } -void test_attach_override(void) +static void test_attach_override(void) { struct kprobe_multi_override *skel = NULL; struct bpf_link *link = NULL; -- cgit From 4908d5af16676b9d2901830551c2af911e452524 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Sep 2023 10:56:07 +0200 Subject: netfilter: conntrack: fix extension size table The size table is incorrect due to copypaste error, this reserves more size than needed. TSTAMP reserved 32 instead of 16 bytes. TIMEOUT reserved 16 instead of 8 bytes. Fixes: 5f31edc0676b ("netfilter: conntrack: move extension sizes into core") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 0b513f7bf9f3..dd62cc12e775 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { [NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache), #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP - [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct), + [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp), #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp), + [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout), #endif #ifdef CONFIG_NF_CONNTRACK_LABELS [NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels), -- cgit From 7fb818f248cff996180b7cdcdcb86b6b4f6e44e2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 13 Sep 2023 15:51:36 +0200 Subject: netfilter: nf_tables: Fix entries val in rule reset audit log The value in idx and the number of rules handled in that particular __nf_tables_dump_rules() call is not identical. The former is a cursor to pick up from if multiple netlink messages are needed, so its value is ever increasing. Fixing this is not just a matter of subtracting s_idx from it, though: When resetting rules in multiple chains, __nf_tables_dump_rules() is called for each and cb->args[0] is not adjusted in between. Introduce a dedicated counter to record the number of rules reset in this call in a less confusing way. While being at it, prevent the direct return upon buffer exhaustion: Any rules previously dumped into that skb would evade audit logging otherwise. Fixes: 9b5ba5c9c5109 ("netfilter: nf_tables: Unbreak audit log reset") Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1e485aee763..d819b4d42962 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3451,6 +3451,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; + unsigned int entries = 0; + int ret = 0; u64 handle; prule = NULL; @@ -3473,9 +3475,11 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle, reset) < 0) - return 1; - + table, chain, rule, handle, reset) < 0) { + ret = 1; + break; + } + entries++; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: prule = rule; @@ -3483,10 +3487,10 @@ cont_skip: (*idx)++; } - if (reset && *idx) - audit_log_rule_reset(table, cb->seq, *idx); + if (reset && entries) + audit_log_rule_reset(table, cb->seq, entries); - return 0; + return ret; } static int nf_tables_dump_rules(struct sk_buff *skb, -- cgit From e8dbde59ca3fe925d0105bfb380e8429928b16dd Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 13 Sep 2023 15:51:37 +0200 Subject: selftests: netfilter: Test nf_tables audit logging Compare NETFILTER_CFG type audit logs emitted from kernel upon ruleset modifications against expected output. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/.gitignore | 1 + tools/testing/selftests/netfilter/Makefile | 4 +- tools/testing/selftests/netfilter/audit_logread.c | 165 ++++++++++++++++++++++ tools/testing/selftests/netfilter/config | 1 + tools/testing/selftests/netfilter/nft_audit.sh | 108 ++++++++++++++ 5 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/netfilter/audit_logread.c create mode 100755 tools/testing/selftests/netfilter/nft_audit.sh diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4cb887b57413..4b2928e1c19d 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue connect_close +audit_logread diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3686bfa6c58d..321db8850da0 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,13 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close +TEST_GEN_FILES = nf-queue connect_close audit_logread include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d9..7c42b1b2c69b 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 000000000000..83c271b1c735 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +logfile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + echo "$res" + ((RC++)) +} + +nft flush ruleset + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule' + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done | do_test 'nft -f -' \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +exit $RC -- cgit From 531108ec5b5cd45ec6272a6115e73275baef7d22 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 12 Sep 2023 19:23:21 +0300 Subject: uapi: stddef.h: Fix header guard location The #endif for the header guard wasn't at the end of the header. This was harmless since the define that escaped was already testing for its own redefinition. Regardless, move the #endif to the correct place. Signed-off-by: Alexey Dobriyan Fixes: c8248faf3ca2 ("Compiler Attributes: counted_by: Adjust name and identifier expansion") Link: https://lore.kernel.org/r/b1f5081e-339d-421d-81b2-cbb94e1f6f5f@p183 Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 7c3fc3980881..c027b2070d79 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -44,8 +44,9 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } -#endif #ifndef __counted_by #define __counted_by(m) #endif + +#endif /* _UAPI_LINUX_STDDEF_H */ -- cgit From 32a4ec211d4164e667d9d0b807fadf02053cd2e9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 12 Sep 2023 19:22:24 +0300 Subject: uapi: stddef.h: Fix __DECLARE_FLEX_ARRAY for C++ __DECLARE_FLEX_ARRAY(T, member) macro expands to struct { struct {} __empty_member; T member[]; }; which is subtly wrong in C++ because sizeof(struct{}) is 1 not 0, changing UAPI structures layouts. This can be fixed by expanding to T member[]; Now g++ doesn't like "T member[]" either, throwing errors on the following code: struct S { union { T1 member1[]; T2 member2[]; }; }; or struct S { T member[]; }; Use "T member[0];" which seems to work and does the right thing wrt structure layout. Signed-off-by: Alexey Dobriyan Fixes: 3080ea5553cc ("stddef: Introduce DECLARE_FLEX_ARRAY() helper") Link: https://lore.kernel.org/r/97242381-f1ec-4a4a-9472-1a464f575657@p183 Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index c027b2070d79..5c6c4269f7ef 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -29,6 +29,11 @@ struct TAG { MEMBERS } ATTRS NAME; \ } +#ifdef __cplusplus +/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ +#define __DECLARE_FLEX_ARRAY(T, member) \ + T member[0] +#else /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union * @@ -44,6 +49,7 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif #ifndef __counted_by #define __counted_by(m) -- cgit From 4b2d631236931550f2ab0abc9a666958853ae846 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Mon, 11 Sep 2023 22:32:56 +0800 Subject: memblock tests: Fix compilation errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fix the follow errors. commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") pass nid parameter to reserve_bootmem_region(), $ make -C tools/testing/memblock/ ... memblock.c: In function ‘memmap_init_reserved_pages’: memblock.c:2111:25: error: too many arguments to function ‘reserve_bootmem_region’ 2111 | reserve_bootmem_region(start, end, nid); | ^~~~~~~~~~~~~~~~~~~~~~ ../../include/linux/mm.h:32:6: note: declared here 32 | void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); | ^~~~~~~~~~~~~~~~~~~~~~ memblock.c:2122:17: error: too many arguments to function ‘reserve_bootmem_region’ 2122 | reserve_bootmem_region(start, end, nid); | ^~~~~~~~~~~~~~~~~~~~~~ commit dcdfdd40fa82 ("mm: Add support for unaccepted memory") call accept_memory() in memblock.c $ make -C tools/testing/memblock/ ... cc -fsanitize=address -fsanitize=undefined main.o memblock.o \ lib/slab.o mmzone.o slab.o tests/alloc_nid_api.o \ tests/alloc_helpers_api.o tests/alloc_api.o tests/basic_api.o \ tests/common.o tests/alloc_exact_nid_api.o -o main /usr/bin/ld: memblock.o: in function `memblock_alloc_range_nid': memblock.c:(.text+0x7ae4): undefined reference to `accept_memory' Signed-off-by: Rong Tao Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Link: https://lore.kernel.org/r/tencent_6F19BC082167F15DF2A8D8BEFE8EF220F60A@qq.com Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/mm.h | 2 +- tools/testing/memblock/internal.h | 4 ++++ tools/testing/memblock/mmzone.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index a03d9bba5151..2bc94079d616 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -29,7 +29,7 @@ static inline void *phys_to_virt(unsigned long address) return __va(address); } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); static inline void totalram_pages_inc(void) { diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..f6c6e5474c3a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -20,4 +20,8 @@ void memblock_free_pages(struct page *page, unsigned long pfn, { } +static inline void accept_memory(phys_addr_t start, phys_addr_t end) +{ +} + #endif diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c index 7b0909e8b759..d3d58851864e 100644 --- a/tools/testing/memblock/mmzone.c +++ b/tools/testing/memblock/mmzone.c @@ -11,7 +11,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) return NULL; } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end) +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid) { } -- cgit From 5e1bffbdb63baf89f3bf0b6bafb50903432a7434 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Thu, 14 Sep 2023 09:24:51 +0300 Subject: memblock tests: fix warning: "__ALIGN_KERNEL" redefined Building memblock tests produces the following warning: cc -I. -I../../include -Wall -O2 -fsanitize=address -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT -c -o main.o main.c In file included from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/linux/mm.h:14: warning: "__ALIGN_KERNEL" redefined 14 | #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) | In file included from ../../include/linux/mm.h:6, from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/uapi/linux/const.h:31: note: this is the location of the previous definition 31 | #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) | Remove definitions of __ALIGN_KERNEL and __ALIGN_KERNEL_MASK from tools/include/linux/mm.h to fix it. Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index 2bc94079d616..f3c82ab5b14c 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -11,8 +11,6 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -- cgit From 55122e0130e51eb71f5ec62d10525db0468f28e8 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Thu, 14 Sep 2023 10:45:40 +0300 Subject: memblock tests: fix warning ‘struct seq_file’ declared inside parameter list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building memblock tests produces the following warning: cc -I. -I../../include -Wall -O2 -fsanitize=address -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT -c -o main.o main.c In file included from tests/common.h:9, from tests/basic_api.h:5, from main.c:2: ./linux/memblock.h:601:50: warning: ‘struct seq_file’ declared inside parameter list will not be visible outside of this definition or declaration 601 | static inline void memtest_report_meminfo(struct seq_file *m) { } | ^~~~~~~~ Add declaration of 'struct seq_file' to tools/include/linux/seq_file.h to fix it. Signed-off-by: Mike Rapoport (IBM) --- tools/include/linux/seq_file.h | 2 ++ tools/testing/memblock/tests/basic_api.c | 2 +- tools/testing/memblock/tests/common.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h index 102fd9217f1f..f6bc226af0c1 100644 --- a/tools/include/linux/seq_file.h +++ b/tools/include/linux/seq_file.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H #define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H +struct seq_file; + #endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */ diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 411647094cc3..57bf2688edfd 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include "basic_api.h" #include #include -#include "basic_api.h" #define EXPECTED_MEMBLOCK_REGIONS 128 #define FUNC_ADD "memblock_add" diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 4f23302ee677..b5ec59aa62d7 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include -- cgit From f6c8a312ef0175ea67a1ace29e1d1e5d470ea45a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 18 Aug 2023 13:04:33 +0300 Subject: media: pci: ivsc: Select build dependencies Select MEDIA_CONTROLLER, VIDEO_V4L2_SUBDEV_API and V4L2_ASYNC as the IVSC driver depends on all these. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308170227.ymiFlMbT-lkp@intel.com/ Fixes: 29006e196a56 ("media: pci: intel: ivsc: Add CSI submodule") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ivsc/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 1ef1c4e3750d..92c975e98cb1 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -4,6 +4,9 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" depends on INTEL_MEI && ACPI + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API + select V4L2_ASYNC help This adds support for Intel Visual Sensing Controller (IVSC). -- cgit From 86e16b87afac20779da1228d690a95c54d7e2ad0 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 18 Aug 2023 12:51:49 +0300 Subject: media: v4l: Use correct dependency for camera sensor drivers The Kconfig option that enables compiling camera sensor drivers is VIDEO_CAMERA_SENSOR rather than MEDIA_CAMERA_SUPPORT as it was previously. Fix this. Also select VIDEO_OV7670 for marvell platform drivers only if MEDIA_SUBDRV_AUTOSELECT and VIDEO_CAMERA_SENSOR are enabled. Reported-by: Randy Dunlap Fixes: 7d3c7d2a2914 ("media: i2c: Add a camera sensor top level menu") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/platform/marvell/Kconfig | 4 ++-- drivers/media/usb/em28xx/Kconfig | 4 ++-- drivers/media/usb/go7007/Kconfig | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/marvell/Kconfig b/drivers/media/platform/marvell/Kconfig index ec1a16734a28..d6499ffe30e8 100644 --- a/drivers/media/platform/marvell/Kconfig +++ b/drivers/media/platform/marvell/Kconfig @@ -7,7 +7,7 @@ config VIDEO_CAFE_CCIC depends on V4L_PLATFORM_DRIVERS depends on PCI && I2C && VIDEO_DEV depends on COMMON_CLK - select VIDEO_OV7670 + select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR select VIDEOBUF2_VMALLOC select VIDEOBUF2_DMA_CONTIG select VIDEOBUF2_DMA_SG @@ -22,7 +22,7 @@ config VIDEO_MMP_CAMERA depends on I2C && VIDEO_DEV depends on ARCH_MMP || COMPILE_TEST depends on COMMON_CLK - select VIDEO_OV7670 + select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR select I2C_GPIO select VIDEOBUF2_VMALLOC select VIDEOBUF2_DMA_CONTIG diff --git a/drivers/media/usb/em28xx/Kconfig b/drivers/media/usb/em28xx/Kconfig index b3c472b8c5a9..cb61fd6cc6c6 100644 --- a/drivers/media/usb/em28xx/Kconfig +++ b/drivers/media/usb/em28xx/Kconfig @@ -12,8 +12,8 @@ config VIDEO_EM28XX_V4L2 select VIDEO_SAA711X if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TVP5150 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_MSP3400 if MEDIA_SUBDRV_AUTOSELECT - select VIDEO_MT9V011 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT - select VIDEO_OV2640 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT + select VIDEO_MT9V011 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR + select VIDEO_OV2640 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR help This is a video4linux driver for Empia 28xx based TV cards. diff --git a/drivers/media/usb/go7007/Kconfig b/drivers/media/usb/go7007/Kconfig index 4ff79940ad8d..b2a15d9fb1f3 100644 --- a/drivers/media/usb/go7007/Kconfig +++ b/drivers/media/usb/go7007/Kconfig @@ -12,8 +12,8 @@ config VIDEO_GO7007 select VIDEO_TW2804 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TW9903 if MEDIA_SUBDRV_AUTOSELECT select VIDEO_TW9906 if MEDIA_SUBDRV_AUTOSELECT - select VIDEO_OV7640 if MEDIA_SUBDRV_AUTOSELECT && MEDIA_CAMERA_SUPPORT select VIDEO_UDA1342 if MEDIA_SUBDRV_AUTOSELECT + select VIDEO_OV7640 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR help This is a video4linux driver for the WIS GO7007 MPEG encoder chip. -- cgit From 41425941dfcf47cc6df8e500af6ff16a7be6539f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 22 Aug 2023 11:10:34 +0300 Subject: media: via: Use correct dependency for camera sensor drivers The via camera controller driver selected ov7670 driver, however now that driver has dependencies and may no longer be selected unconditionally. Reported-by: Randy Dunlap Fixes: 7d3c7d2a2914 ("media: i2c: Add a camera sensor top level menu") Signed-off-by: Sakari Ailus Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Hans Verkuil --- drivers/media/platform/via/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/via/Kconfig b/drivers/media/platform/via/Kconfig index 8926eb0803b2..6e603c038248 100644 --- a/drivers/media/platform/via/Kconfig +++ b/drivers/media/platform/via/Kconfig @@ -7,7 +7,7 @@ config VIDEO_VIA_CAMERA depends on V4L_PLATFORM_DRIVERS depends on FB_VIA && VIDEO_DEV select VIDEOBUF2_DMA_SG - select VIDEO_OV7670 + select VIDEO_OV7670 if VIDEO_CAMERA_SENSOR help Driver support for the integrated camera controller in VIA Chrome9 chipsets. Currently only tested on OLPC xo-1.5 systems -- cgit From fac58baf8fcfcd7481e8f6d60206ce2a47c1476c Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 13 Sep 2023 18:26:56 +0800 Subject: ASoC: imx-rpmsg: Set ignore_pmdown_time for dai_link i.MX rpmsg sound cards work on codec slave mode. MCLK will be disabled by CPU DAI driver in hw_free(). Some codec requires MCLK present at power up/down sequence. So need to set ignore_pmdown_time to power down codec immediately before MCLK is turned off. Take WM8962 as an example, if MCLK is disabled before DAPM power down playback stream, FIFO error will arise in WM8962 which will have bad impact on playback next. Signed-off-by: Chancel Liu Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20230913102656.2966757-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-rpmsg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/fsl/imx-rpmsg.c b/sound/soc/fsl/imx-rpmsg.c index 3c7b95db2eac..b578f9a32d7f 100644 --- a/sound/soc/fsl/imx-rpmsg.c +++ b/sound/soc/fsl/imx-rpmsg.c @@ -89,6 +89,14 @@ static int imx_rpmsg_probe(struct platform_device *pdev) SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBC_CFC; + /* + * i.MX rpmsg sound cards work on codec slave mode. MCLK will be + * disabled by CPU DAI driver in hw_free(). Some codec requires MCLK + * present at power up/down sequence. So need to set ignore_pmdown_time + * to power down codec immediately before MCLK is turned off. + */ + data->dai.ignore_pmdown_time = 1; + /* Optional codec node */ ret = of_parse_phandle_with_fixed_args(np, "audio-codec", 0, 0, &args); if (ret) { -- cgit From 719606154c7033c068a5d4c1dc5f9163b814b3c8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:27 +0300 Subject: phy: mapphone-mdm6600: Fix runtime disable on probe Commit d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") caused a regression where we now unconditionally disable runtime PM at the end of the probe while it is only needed on errors. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Miaoqian Lin Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20230913060433.48373-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 1d567604b650..0147112f77b1 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -627,10 +627,12 @@ idle: pm_runtime_put_autosuspend(ddata->dev); cleanup: - if (error < 0) + if (error < 0) { phy_mdm6600_device_power_off(ddata); - pm_runtime_disable(ddata->dev); - pm_runtime_dont_use_autosuspend(ddata->dev); + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); + } + return error; } -- cgit From b99e0ba9633af51638e5ee1668da2e33620c134f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:28 +0300 Subject: phy: mapphone-mdm6600: Fix runtime PM for remove Otherwise we will get an underflow on remove. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: f7f50b2a7b05 ("phy: mapphone-mdm6600: Add runtime PM support for n_gsm on USB suspend") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-2-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 0147112f77b1..df48b1becebe 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -641,6 +641,7 @@ static void phy_mdm6600_remove(struct platform_device *pdev) struct phy_mdm6600 *ddata = platform_get_drvdata(pdev); struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + pm_runtime_get_noresume(ddata->dev); pm_runtime_dont_use_autosuspend(ddata->dev); pm_runtime_put_sync(ddata->dev); pm_runtime_disable(ddata->dev); -- cgit From 3b384cc74b00b5ac21d18e4c1efc3c1da5300971 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:29 +0300 Subject: phy: mapphone-mdm6600: Fix pinctrl_pm handling for sleep pins Looks like the driver sleep pins configuration is unusable. Adding the sleep pins causes the usb phy to not respond. We need to use the default pins in probe, and only set sleep pins at phy_mdm6600_device_power_off(). As the modem can also be booted to a serial port mode for firmware flashing, let's make the pin changes limited to probe and remove. For probe, we get the default pins automatically. We only need to set the sleep pins in phy_mdm6600_device_power_off() to prevent the modem from waking up because the gpio line glitches. If it turns out that we need a separate state for phy_mdm6600_power_on() and phy_mdm6600_power_off(), we can use the pinctrl idle state. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: 2ad2af081622 ("phy: mapphone-mdm6600: Improve phy related runtime PM calls") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-3-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index df48b1becebe..376d023a0aa9 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -122,16 +122,10 @@ static int phy_mdm6600_power_on(struct phy *x) { struct phy_mdm6600 *ddata = phy_get_drvdata(x); struct gpio_desc *enable_gpio = ddata->ctrl_gpios[PHY_MDM6600_ENABLE]; - int error; if (!ddata->enabled) return -ENODEV; - error = pinctrl_pm_select_default_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with default_state: %i\n", - __func__, error); - gpiod_set_value_cansleep(enable_gpio, 1); /* Allow aggressive PM for USB, it's only needed for n_gsm port */ @@ -160,11 +154,6 @@ static int phy_mdm6600_power_off(struct phy *x) gpiod_set_value_cansleep(enable_gpio, 0); - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - return 0; } @@ -456,6 +445,7 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) { struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + int error; ddata->enabled = false; phy_mdm6600_cmd(ddata, PHY_MDM6600_CMD_BP_SHUTDOWN_REQ); @@ -471,6 +461,17 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) } else { dev_err(ddata->dev, "Timed out powering down\n"); } + + /* + * Keep reset gpio high with padconf internal pull-up resistor to + * prevent modem from waking up during deeper SoC idle states. The + * gpio bank lines can have glitches if not in the always-on wkup + * domain. + */ + error = pinctrl_pm_select_sleep_state(ddata->dev); + if (error) + dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", + __func__, error); } static void phy_mdm6600_deferred_power_on(struct work_struct *work) @@ -571,12 +572,6 @@ static int phy_mdm6600_probe(struct platform_device *pdev) ddata->dev = &pdev->dev; platform_set_drvdata(pdev, ddata); - /* Active state selected in phy_mdm6600_power_on() */ - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - error = phy_mdm6600_init_lines(ddata); if (error) return error; -- cgit From 72ca56664e483de991ae4afa623e54570f81ebde Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 14 Sep 2023 15:08:52 +0100 Subject: ALSA: hda: cs35l56: Don't 'return ret' if ret is always zero The final return in cs35l56_hda_posture_get() was returning the value of 'ret', but ret is always zero at this point. So this can be a simple 'return 0'. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230914140852.7112-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 9e4976bdb5e0..bc75865b5de8 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -218,7 +218,7 @@ static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol, ucontrol->value.integer.value[0] = pos; - return ret; + return 0; } static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol, -- cgit From 6ba59c008f08e84b3c87be10f3391c9735e4f833 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 14 Sep 2023 16:25:04 +0300 Subject: ASoC: SOF: ipc4-topology: fix wrong sizeof argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit available_fmt is a pointer. Fixes: 4fdef47a44d6 ("ASoC: SOF: ipc4-topology: Add new tokens for input/output pin format count") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230914132504.18463-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index f2a30cd31378..7cb63e6b24dc 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -231,7 +231,7 @@ static int sof_ipc4_get_audio_fmt(struct snd_soc_component *scomp, ret = sof_update_ipc_object(scomp, available_fmt, SOF_AUDIO_FMT_NUM_TOKENS, swidget->tuples, - swidget->num_tuples, sizeof(available_fmt), 1); + swidget->num_tuples, sizeof(*available_fmt), 1); if (ret) { dev_err(scomp->dev, "Failed to parse audio format token count\n"); return ret; -- cgit From bb0216d4db9ecaa51af45d8504757becbe5c050d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 14 Sep 2023 15:47:25 +0300 Subject: ASoC: SOF: sof-audio: Fix DSP core put imbalance on widget setup failure In case the widget setup fails we should only decrement the core usage count if the sof_widget_free_unlocked() has not been called as part of the error handling. sof_widget_free_unlocked() calls snd_sof_dsp_core_put() and the additional core_put will cause imbalance in core usage count. Use the existing use_count_decremented to handle this issue. Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230914124725.17397-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-audio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index e7ef77012c35..e5405f854a91 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -212,7 +212,8 @@ widget_free: sof_widget_free_unlocked(sdev, swidget); use_count_decremented = true; core_put: - snd_sof_dsp_core_put(sdev, swidget->core); + if (!use_count_decremented) + snd_sof_dsp_core_put(sdev, swidget->core); pipe_widget_free: if (swidget->id != snd_soc_dapm_scheduler) sof_widget_free_unlocked(sdev, swidget->spipe->pipe_widget); -- cgit From e35059949daa83f8dadf710d0f829ab3c3a72fe2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Jun 2023 12:44:17 +0300 Subject: power: supply: ucs1002: fix error code in ucs1002_get_property() This function is supposed to return 0 for success instead of returning the val->intval. This makes it the same as the other case statements in this function. Fixes: 81196e2e57fc ("power: supply: ucs1002: fix some health status issues") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/687f64a4-4c6e-4536-8204-98ad1df934e5@moroto.mountain Signed-off-by: Sebastian Reichel --- drivers/power/supply/ucs1002_power.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/ucs1002_power.c b/drivers/power/supply/ucs1002_power.c index 954feba6600b..7970843a4f48 100644 --- a/drivers/power/supply/ucs1002_power.c +++ b/drivers/power/supply/ucs1002_power.c @@ -384,7 +384,8 @@ static int ucs1002_get_property(struct power_supply *psy, case POWER_SUPPLY_PROP_USB_TYPE: return ucs1002_get_usb_type(info, val); case POWER_SUPPLY_PROP_HEALTH: - return val->intval = info->health; + val->intval = info->health; + return 0; case POWER_SUPPLY_PROP_PRESENT: val->intval = info->present; return 0; -- cgit From cbcdfbf5a6cd66e47e5ee5d49c4c5a27a07ba082 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 12 Jun 2023 16:36:52 +0200 Subject: power: supply: rk817: Add missing module alias Similar to the rk817 codec alias that was missing, the rk817 charger driver is missing a module alias as well. This absence prevents the driver from autoprobing on OF systems when it is built as a module. Add the right MODULE_ALIAS to fix this. Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817") Cc: stable@vger.kernel.org Signed-off-by: Nicolas Frattaroli Reviewed-by: Chris Morgan Link: https://lore.kernel.org/r/20230612143651.959646-2-frattaroli.nicolas@gmail.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 8328bcea1a29..c2510078eb2d 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -1211,3 +1211,4 @@ MODULE_DESCRIPTION("Battery power supply driver for RK817 PMIC"); MODULE_AUTHOR("Maya Matuszczyk "); MODULE_AUTHOR("Chris Morgan "); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:rk817-charger"); -- cgit From cba320408d631422fef0ad8407954fb9d6f8f650 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 15 Feb 2023 13:43:04 +0100 Subject: power: supply: rt9467: Fix rt9467_run_aicl() It is spurious to bail-out on a wait_for_completion_timeout() call that does NOT timeout. Reverse the logic to return -ETIMEDOUT instead, in case of tiemout. Fixes: 6f7f70e3a8dd ("power: supply: rt9467: Add Richtek RT9467 charger driver") Signed-off-by: Christophe JAILLET Reviewed-by: ChiYuan Huang Link: https://lore.kernel.org/r/2ed01020fa8a135c36dbaa871095ded47d926507.1676464968.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sebastian Reichel --- drivers/power/supply/rt9467-charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/rt9467-charger.c b/drivers/power/supply/rt9467-charger.c index 683adb18253d..fdfdc83ab045 100644 --- a/drivers/power/supply/rt9467-charger.c +++ b/drivers/power/supply/rt9467-charger.c @@ -598,8 +598,8 @@ static int rt9467_run_aicl(struct rt9467_chg_data *data) reinit_completion(&data->aicl_done); ret = wait_for_completion_timeout(&data->aicl_done, msecs_to_jiffies(3500)); - if (ret) - return ret; + if (ret == 0) + return -ETIMEDOUT; ret = rt9467_get_value_from_ranges(data, F_IAICR, RT9467_RANGE_IAICR, &aicr_get); if (ret) { -- cgit From d2f706058826b803f5b9dc3f6d4c213ae0c54eb9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 3 Sep 2023 14:42:58 -0700 Subject: cxl/mbox: Fix CEL logic for poison and security commands The following debug output was observed while testing CXL cxl_core:cxl_walk_cel:721: cxl_mock_mem cxl_mem.0: Opcode 0x4300 unsupported by driver opcode 0x4300 (Get Poison) is supported by the driver and the mock device supports it. The logic should be checking that the opcode is both not poison and not security. Fix the logic to allow poison and security commands. Fixes: ad64f5952ce3 ("cxl/memdev: Only show sanitize sysfs files when supported") Cc: Signed-off-by: Ira Weiny Reviewed-by: Davidlohr Bueso Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230903-cxl-cel-fix-v1-1-e260c9467be3@intel.com [cleanup cxl_walk_cel() to centralized "enabled" checks] Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index ca60bb8114f2..4df4f614f490 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -715,24 +715,25 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) for (i = 0; i < cel_entries; i++) { u16 opcode = le16_to_cpu(cel_entry[i].opcode); struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); + int enabled = 0; - if (!cmd && (!cxl_is_poison_command(opcode) || - !cxl_is_security_command(opcode))) { - dev_dbg(dev, - "Opcode 0x%04x unsupported by driver\n", opcode); - continue; - } - - if (cmd) + if (cmd) { set_bit(cmd->info.id, mds->enabled_cmds); + enabled++; + } - if (cxl_is_poison_command(opcode)) + if (cxl_is_poison_command(opcode)) { cxl_set_poison_cmd_enabled(&mds->poison, opcode); + enabled++; + } - if (cxl_is_security_command(opcode)) + if (cxl_is_security_command(opcode)) { cxl_set_security_cmd_enabled(&mds->security, opcode); + enabled++; + } - dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode); + dev_dbg(dev, "Opcode 0x%04x %s\n", opcode, + enabled ? "enabled" : "unsupported by driver"); } } -- cgit From 357950361cbc6d54fb68ed878265c647384684ae Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 9 Sep 2023 13:08:31 +0100 Subject: btrfs: set last dir index to the current last index when opening dir When opening a directory for reading it, we set the last index where we stop iteration to the value in struct btrfs_inode::index_cnt. That value does not match the index of the most recently added directory entry but it's instead the index number that will be assigned the next directory entry. This means that if after the call to opendir(3) new directory entries are added, a readdir(3) call will return the first new directory entry. This is fine because POSIX says the following [1]: "If a file is removed from or added to the directory after the most recent call to opendir() or rewinddir(), whether a subsequent call to readdir() returns an entry for that file is unspecified." For example for the test script from commit 9b378f6ad48c ("btrfs: fix infinite directory reads"), where we have 2000 files in a directory, ext4 doesn't return any new directory entry after opendir(3), while xfs returns the first 13 new directory entries added after the opendir(3) call. If we move to a shorter example with an empty directory when opendir(3) is called, and 2 files added to the directory after the opendir(3) call, then readdir(3) on btrfs will return the first file, ext4 and xfs return the 2 files (but in a different order). A test program for this, reported by Ian Johnson, is the following: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } To make this less odd, change the behaviour to never return new entries that were added after the opendir(3) call. This is done by setting the last_index field of the struct btrfs_file_private attached to the directory's file handle with a value matching btrfs_inode::index_cnt minus 1, since that value always matches the index of the next new directory entry and not the index of the most recently added entry. [1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 616fdcf40467..dee4fce6ab72 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5780,7 +5780,8 @@ static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) } } - *index = dir->index_cnt; + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; return 0; } -- cgit From e60aa5da14d01fed8411202dbe4adf6c44bd2a57 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 9 Sep 2023 13:08:32 +0100 Subject: btrfs: refresh dir last index during a rewinddir(3) call When opening a directory we find what's the index of its last entry and then store it in the directory's file handle private data (struct btrfs_file_private::last_index), so that in the case new directory entries are added to a directory after an opendir(3) call we don't end up in an infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory reads")) when calling readdir(3). However once rewinddir(3) is called, POSIX states [1] that any new directory entries added after the previous opendir(3) call, must be returned by subsequent calls to readdir(3): "The rewinddir() function shall reset the position of the directory stream to which dirp refers to the beginning of the directory. It shall also cause the directory stream to refer to the current state of the corresponding directory, as a call to opendir() would have done." We currently don't refresh the last_index field of the struct btrfs_file_private associated to the directory, so after a rewinddir(3) we are not returning any new entries added after the opendir(3) call. Fix this by finding the current last index of the directory when llseek is called against the directory. This can be reproduced by the following C program provided by Ian Johnson: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); rewinddir(dir); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } Reported-by: Ian Johnson Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dee4fce6ab72..2961f1e1e778 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5818,6 +5818,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) return 0; } +static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct btrfs_file_private *private = file->private_data; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), + &private->last_index); + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + struct dir_entry { u64 ino; u64 offset; @@ -10891,7 +10904,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { }; static const struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_dir_llseek, .read = generic_read_dir, .iterate_shared = btrfs_real_readdir, .open = btrfs_opendir, -- cgit From 8e7f82deb0c0386a03b62e30082574347f8b57d5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 12 Sep 2023 11:45:39 +0100 Subject: btrfs: fix race between reading a directory and adding entries to it When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we are not holding the directory's inode locked, and this can result in later attempting to add two entries to the directory with the same index number, resulting in a transaction abort, with -EEXIST (-17), when inserting the second delayed dir index. This results in a trace like the following: Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17) Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------ Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504! Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1 Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018 Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...) Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282 Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000 Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540 Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938 Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014 Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000 Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0 Sep 11 22:34:59 myhostname kernel: Call Trace: Sep 11 22:34:59 myhostname kernel: Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90 Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280 Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0 Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0 Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80 Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0 Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0 Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0 Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0 Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140 Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50 Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90 Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40 Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90 Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc The race leading to the problem happens like this: 1) Directory inode X is loaded into memory, its ->index_cnt field is initialized to (u64)-1 (at btrfs_alloc_inode()); 2) Task A is adding a new file to directory X, holding its vfs inode lock, and calls btrfs_set_inode_index() to get an index number for the entry. Because the inode's index_cnt field is set to (u64)-1 it calls btrfs_inode_delayed_dir_index_count() which fails because no dir index entries were added yet to the delayed inode and then it calls btrfs_set_inode_index_count(). This functions finds the last dir index key and then sets index_cnt to that index value + 1. It found that the last index key has an offset of 100. However before it assigns a value of 101 to index_cnt... 3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS lock for inode X is not taken, so it calls btrfs_get_dir_last_index() and sees index_cnt still with a value of (u64)-1. Because of that it calls btrfs_inode_delayed_dir_index_count() which fails since no dir index entries were added to the delayed inode yet, and then it also calls btrfs_set_inode_index_count(). This also finds that the last index key has an offset of 100, and before it assigns the value 101 to the index_cnt field of inode X... 4) Task A assigns a value of 101 to index_cnt. And then the code flow goes to btrfs_set_inode_index() where it increments index_cnt from 101 to 102. Task A then creates a delayed dir index entry with a sequence number of 101 and adds it to the delayed inode; 5) Task B assigns 101 to the index_cnt field of inode X; 6) At some later point when someone tries to add a new entry to the directory, btrfs_set_inode_index() will return 101 again and shortly after an attempt to add another delayed dir index key with index number 101 will fail with -EEXIST resulting in a transaction abort. Fix this by locking the inode at btrfs_get_dir_last_index(), which is only only used when opening a directory or attempting to lseek on it. Reported-by: ken Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/ Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2961f1e1e778..5e1b15e69ed6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5769,21 +5769,24 @@ out: static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) { - if (dir->index_cnt == (u64)-1) { - int ret; + int ret = 0; + btrfs_inode_lock(dir, 0); + if (dir->index_cnt == (u64)-1) { ret = btrfs_inode_delayed_dir_index_count(dir); if (ret) { ret = btrfs_set_inode_index_count(dir); if (ret) - return ret; + goto out; } } /* index_cnt is the index number of next new entry, so decrement it. */ *index = dir->index_cnt - 1; +out: + btrfs_inode_unlock(dir, 0); - return 0; + return ret; } /* -- cgit From c42d116ccb72b6a33728e2b4b76ab175197ffb07 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 28 Aug 2023 10:57:18 +0200 Subject: media: ivsc: Depend on VIDEO_DEV CONFIG_VIDEO_DEV is required by other selected symbols. Depend on it. Link: https://lore.kernel.org/linux-media/20230828085718.3912335-1-sakari.ailus@linux.intel.com Reported-by: Randy Dunlap Fixes: 29006e196a56 ("media: pci: intel: ivsc: Add CSI submodule") Signed-off-by: Sakari Ailus Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ivsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 92c975e98cb1..212753450576 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -3,7 +3,7 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" - depends on INTEL_MEI && ACPI + depends on INTEL_MEI && ACPI && VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_ASYNC -- cgit From e784e78efba87571bcfaab09e8bd81a77c8feaa1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 22:57:36 +0200 Subject: media: i2c: max9286: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_handle_put() call in the error handling path of max9286_v4l2_register(). Remove the same call from max9286_v4l2_unregister(). Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/max9286.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c index 20e7c7cf5eeb..be84ff1e2b17 100644 --- a/drivers/media/i2c/max9286.c +++ b/drivers/media/i2c/max9286.c @@ -1110,7 +1110,6 @@ err_async: static void max9286_v4l2_unregister(struct max9286_priv *priv) { - fwnode_handle_put(priv->sd.fwnode); v4l2_ctrl_handler_free(&priv->ctrls); v4l2_async_unregister_subdev(&priv->sd); max9286_v4l2_notifier_unregister(priv); -- cgit From 5cb218ffc54f1865edbe0c2a5ac4e906753817fb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:45 +0300 Subject: media: i2c: imx219: Fix a typo referring to a wrong variable The imx219_init_cfg() function has stopped operating on the try format since commit 7e700847b1fe ("media: i2c: imx219: Switch from open to init_cfg"), but a comment in the function wasn't updated. Fix it. While at it, improve spelling in a second comment in the function. Fixes: 7e700847b1fe ("media: i2c: imx219: Switch from open to init_cfg") Signed-off-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index a1136fdfbed2..6f88e002c8d8 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -691,12 +691,12 @@ static int imx219_init_cfg(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *format; struct v4l2_rect *crop; - /* Initialize try_fmt */ + /* Initialize the format. */ format = v4l2_subdev_get_pad_format(sd, state, 0); imx219_update_pad_format(imx219, &supported_modes[0], format, MEDIA_BUS_FMT_SRGGB10_1X10); - /* Initialize crop rectangle. */ + /* Initialize the crop rectangle. */ crop = v4l2_subdev_get_pad_crop(sd, state, 0); crop->top = IMX219_PIXEL_ARRAY_TOP; crop->left = IMX219_PIXEL_ARRAY_LEFT; -- cgit From bb2d01127f5d8e5034daa60a08e68f719ad71ec2 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:46 +0300 Subject: media: i2c: imx219: Fix crop rectangle setting when changing format When moving the imx219 driver to the subdev active state, commit e8a5b1df000e ("media: i2c: imx219: Use subdev active state") used the pad crop rectangle stored in the subdev state to report the crop rectangle of the active mode. That crop rectangle was however not set in the state when setting the format, which resulted in reporting an incorrect crop rectangle to userspace. Fix it. Fixes: e8a5b1df000e ("media: i2c: imx219: Use subdev active state") Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 6f88e002c8d8..f19c828b6943 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -750,6 +750,7 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, const struct imx219_mode *mode; int exposure_max, exposure_def, hblank; struct v4l2_mbus_framefmt *format; + struct v4l2_rect *crop; mode = v4l2_find_nearest_size(supported_modes, ARRAY_SIZE(supported_modes), @@ -757,11 +758,16 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, fmt->format.width, fmt->format.height); imx219_update_pad_format(imx219, mode, &fmt->format, fmt->format.code); + format = v4l2_subdev_get_pad_format(sd, sd_state, 0); + crop = v4l2_subdev_get_pad_crop(sd, sd_state, 0); if (imx219->mode == mode && format->code == fmt->format.code) return 0; + *format = fmt->format; + *crop = mode->crop; + if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE) { imx219->mode = mode; /* Update limits and set FPS to default */ @@ -788,8 +794,6 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, hblank); } - *format = fmt->format; - return 0; } -- cgit From faece4ad72b06308101d7f9cacaf8dd6df4fdc1f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Aug 2023 16:57:47 +0300 Subject: media: i2c: imx219: Perform a full mode set unconditionally The .set_fmt() handler tries to avoid updating the sensor configuration when the mode hasn't changed. It does so by comparing both the mode and the media bus code. While the latter correctly uses the media bus code stored in the subdev state, the former compares the mode being set with the active mode, regardless of whether .set_fmt() is called for the ACTIVE or TRY format. This can lead to .set_fmt() returning early when operating on TRY formats. This could be fixed by replacing the mode comparison with width and height comparisons, using the frame size stored in the subdev state. However, the optimization that avoids updates to the sensor configuration is not very useful, and is not commonly found in sensor drivers. To improve consistency across sensor drivers, it is better, in addition to being easier, to simply drop it. Do so. Fixes: e8a5b1df000e ("media: i2c: imx219: Use subdev active state") Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/imx219.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index f19c828b6943..ec53abe2e84e 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -762,9 +762,6 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd, format = v4l2_subdev_get_pad_format(sd, sd_state, 0); crop = v4l2_subdev_get_pad_crop(sd, sd_state, 0); - if (imx219->mode == mode && format->code == fmt->format.code) - return 0; - *format = fmt->format; *crop = mode->crop; -- cgit From 12d21fc2ba88e3bb41167afb5c6c0e961f2ab0c9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 22:34:51 +0200 Subject: media: i2c: rdacm21: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_handle_put() call in the error handling path of the probe. Remove the same call from the remove function. Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Jacopo Mondi Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/rdacm21.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c index a36a709243fd..3e22df36354f 100644 --- a/drivers/media/i2c/rdacm21.c +++ b/drivers/media/i2c/rdacm21.c @@ -608,7 +608,6 @@ static void rdacm21_remove(struct i2c_client *client) v4l2_async_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->ctrls); i2c_unregister_device(dev->isp); - fwnode_handle_put(dev->sd.fwnode); } static const struct of_device_id rdacm21_of_ids[] = { -- cgit From 861ab817b5ebe5e34bfbf01943b86ded6bba97b3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 11 Sep 2023 11:55:55 +0200 Subject: media: bt8xx: bttv_risc_packed(): remove field checks Do not turn on the vcr_hack based on the btv->field value. This was a change in the bttv vb2 conversion that caused green lines at the bottom of the picture in tvtime. It was originally added to the vb2 conversion based on faulty information that without this there would be glitches in the video. However, later tests suggest that this is a problem in the utilities used to test this since tvtime behaves fine. This patch reverts the bttv driver to the original pre-vb2 behavior w.r.t. vcr_hack. Fixes: b7ec3212a73a ("media: bttv: convert to vb2") Signed-off-by: Hans Verkuil --- drivers/media/pci/bt8xx/bttv-risc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/pci/bt8xx/bttv-risc.c b/drivers/media/pci/bt8xx/bttv-risc.c index 436baf6c8b08..241a696e374a 100644 --- a/drivers/media/pci/bt8xx/bttv-risc.c +++ b/drivers/media/pci/bt8xx/bttv-risc.c @@ -68,9 +68,7 @@ bttv_risc_packed(struct bttv *btv, struct btcx_riscmem *risc, sg = sglist; for (line = 0; line < store_lines; line++) { if ((line >= (store_lines - VCR_HACK_LINES)) && - (btv->opt_vcr_hack || - (V4L2_FIELD_HAS_BOTH(btv->field) || - btv->field == V4L2_FIELD_ALTERNATE))) + btv->opt_vcr_hack) continue; while (offset && offset >= sg_dma_len(sg)) { offset -= sg_dma_len(sg); -- cgit From 41ebaa5e0eebea4c3bac96b72f9f8ae0d77c0bdb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 20 Jul 2023 17:46:54 +0000 Subject: media: uvcvideo: Fix OOB read If the index provided by the user is bigger than the mask size, we might do an out of bound read. CC: stable@kernel.org Fixes: 40140eda661e ("media: uvcvideo: Implement mask for V4L2_CTRL_TYPE_MENU") Reported-by: Zubin Mithra Signed-off-by: Ricardo Ribalda Reviewed-by: Sergey Senozhatsky Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/usb/uvc/uvc_ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 5e9d3da862dd..e59a463c2761 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1402,6 +1402,9 @@ int uvc_query_v4l2_menu(struct uvc_video_chain *chain, query_menu->id = id; query_menu->index = index; + if (index >= BITS_PER_TYPE(mapping->menu_mask)) + return -EINVAL; + ret = mutex_lock_interruptible(&chain->ctrl_mutex); if (ret < 0) return -ERESTARTSYS; -- cgit From 735de5caf79e06cc9fb96b1b4f4974674ae3e917 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 17 Aug 2023 12:41:32 +0200 Subject: media: vb2: frame_vector.c: replace WARN_ONCE with a comment The WARN_ONCE was issued also in cases that had nothing to do with VM_IO (e.g. if the start address was just a random value and uaccess fails with -EFAULT). There are no reports of WARN_ONCE being issued for actual VM_IO cases, so just drop it and instead add a note to the comment before the function. Signed-off-by: Hans Verkuil Reviewed-by: David Hildenbrand Reported-by: Yikebaer Aizezi --- drivers/media/common/videobuf2/frame_vector.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 0f430ddc1f67..fd87747be9b1 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -31,6 +31,10 @@ * different type underlying the specified range of virtual addresses. * When the function isn't able to map a single page, it returns error. * + * Note that get_vaddr_frames() cannot follow VM_IO mappings. It used + * to be able to do that, but that could (racily) return non-refcounted + * pfns. + * * This function takes care of grabbing mmap_lock as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write, @@ -59,8 +63,6 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write, if (likely(ret > 0)) return ret; - /* This used to (racily) return non-refcounted pfns. Let people know */ - WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); vec->nr_frames = 0; return ret ? ret : -EFAULT; } -- cgit From 9e4edf1a2196fa4bea6e8201f166785bd066446a Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 5 Sep 2023 14:10:07 -0700 Subject: cxl/region: Match auto-discovered region decoders by HPA range Currently, when the region driver attaches a region to a port, it selects the ports next available decoder to program. With the addition of auto-discovered regions, a port decoder has already been programmed so grabbing the next available decoder can be a mismatch when there is more than one region using the port. The failure appears like this with CXL DEBUG enabled: [] cxl_core:alloc_region_ref:754: cxl region0: endpoint9: HPA order violation region0:[mem 0x14780000000-0x1478fffffff flags 0x200] vs [mem 0x880000000-0x185fffffff flags 0x200] [] cxl_core:cxl_port_attach_region:972: cxl region0: endpoint9: failed to allocate region reference When CXL DEBUG is not enabled, there is no failure message. The region just never materializes. Users can suspect this issue if they know their firmware has programmed decoders so that more than one region is using a port. Note that the problem may appear intermittently, ie not on every reboot. Add a matching method for auto-discovered regions that finds a decoder based on an HPA range. The decoder range must exactly match the region resource parameter. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230905211007.256385-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e115ba382e04..b4c6a749406f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -717,13 +717,35 @@ static int match_free_decoder(struct device *dev, void *data) return 0; } +static int match_auto_decoder(struct device *dev, void *data) +{ + struct cxl_region_params *p = data; + struct cxl_decoder *cxld; + struct range *r; + + if (!is_switch_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + r = &cxld->hpa_range; + + if (p->res && p->res->start == r->start && p->res->end == r->end) + return 1; + + return 0; +} + static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, struct cxl_region *cxlr) { struct device *dev; int id = 0; - dev = device_find_child(&port->dev, &id, match_free_decoder); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + dev = device_find_child(&port->dev, &cxlr->params, + match_auto_decoder); + else + dev = device_find_child(&port->dev, &id, match_free_decoder); if (!dev) return NULL; /* -- cgit From 18f35dc9314db89e2d215951e5afa3e636b72baf Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 22 Aug 2023 11:09:28 -0700 Subject: cxl/region: Refactor granularity select in cxl_port_setup_targets() In cxl_port_setup_targets() the region driver validates the configuration of auto-discovered region decoders, as well as decoders the driver is preparing to program. The existing calculations use the encoded interleave granularity value to create an interleave granularity that properly fans out when routing an x1 interleave to a greater than x1 interleave. That all worked well, until this config came along: Host Bridge: 2 way at 256 granularity Switch Decoder_A: 1 way at 512 Endpoint_X: 2 way at 256 Switch Decoder_B: 1 way at 512 Endpoint_Y: 2 way at 256 When the Host Bridge interleave is greater than 1 and the root decoder interleave is exactly 1, the region driver needs to consider the number of targets in the region when calculating the expected granularity. While examining the existing logic, and trying to cover the case above, a couple of simplifications appeared, hence this proposed refactoring. The first simplification is to apply the logic to the nominal values and use the existing helper function granularity_to_eig() to translate the desired granularity to the encoded form. This means the comment and code regarding setting address bits is discarded. Although that logic is not wrong, it adds a level of complexity that is not required in the granularity selection. The eig and eiw are indeed part of the routing instructions programmed into the decoders. Up-level the discussion to nominal ways and granularity for clearer analysis. The second simplification reduces the logic to a single granularity calculation that works for all cases. The new calculation doesn't care if parent_iw => 1 because parent_iw is used as a multiplier. The refactor cleans up a useless assignment of eiw made after the iw is already calculated. Regression testing included an examination of all of the ways and granularity selections made during a run of the cxl_test unit tests. There were no differences in selections before and after this patch. Fixes: ("27b3f8d13830 cxl/region: Program target lists") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230822180928.117596-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b4c6a749406f..6d63b8798c29 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1176,16 +1176,15 @@ static int cxl_port_setup_targets(struct cxl_port *port, } /* - * If @parent_port is masking address bits, pick the next unused address - * bit to route @port's targets. + * Interleave granularity is a multiple of @parent_port granularity. + * Multiplier is the parent port interleave ways. */ - if (parent_iw > 1 && cxl_rr->nr_targets > 1) { - u32 address_bit = max(peig + peiw, eiw + peig); - - eig = address_bit - eiw + 1; - } else { - eiw = peiw; - eig = peig; + rc = granularity_to_eig(parent_ig * parent_iw, &eig); + if (rc) { + dev_dbg(&cxlr->dev, + "%s: invalid granularity calculation (%d * %d)\n", + dev_name(&parent_port->dev), parent_ig, parent_iw); + return rc; } rc = eig_to_granularity(eig, &ig); -- cgit From f037fc9905ffa6fa19b89bfbc86946798cede071 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:06 +0800 Subject: net: microchip: sparx5: Fix memory leak for vcap_api_rule_add_keyvalue_test() Inject fault while probing kunit-example-test.ko, the field which is allocated by kzalloc in vcap_rule_add_key() of vcap_rule_add_key_bit/u32/u128() is not freed, and it cause the memory leaks below. unreferenced object 0xffff0276c14b7240 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894220 (age 920.072s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000ff8002d3>] vcap_api_rule_add_keyvalue_test+0x100/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7280 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000f5ac9dc7>] vcap_api_rule_add_keyvalue_test+0x168/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b72c0 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<00000000c918ae7f>] vcap_api_rule_add_keyvalue_test+0x1d0/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7300 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<0000000003352814>] vcap_api_rule_add_keyvalue_test+0x240/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c14b7340 (size 64): comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s) hex dump (first 32 bytes): 28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff (] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180 [<000000001516f109>] vcap_api_rule_add_keyvalue_test+0x2cc/0xba8 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index c07f25e791c7..2fb0b8cf2b0c 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -995,6 +995,16 @@ static void vcap_api_encode_rule_actionset_test(struct kunit *test) KUNIT_EXPECT_EQ(test, (u32)0x00000000, actwords[11]); } +static void vcap_free_ckf(struct vcap_rule *rule) +{ + struct vcap_client_keyfield *ckf, *next_ckf; + + list_for_each_entry_safe(ckf, next_ckf, &rule->keyfields, ctrl.list) { + list_del(&ckf->ctrl.list); + kfree(ckf); + } +} + static void vcap_api_rule_add_keyvalue_test(struct kunit *test) { struct vcap_admin admin = { @@ -1027,6 +1037,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, VCAP_BIT_1); @@ -1039,6 +1050,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, @@ -1052,6 +1064,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value); KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_u32(rule, VCAP_KF_TYPE, 0x98765432, 0xff00ffab); @@ -1064,6 +1077,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, kf->ctrl.type); KUNIT_EXPECT_EQ(test, 0x98765432, kf->data.u32.value); KUNIT_EXPECT_EQ(test, 0xff00ffab, kf->data.u32.mask); + vcap_free_ckf(rule); INIT_LIST_HEAD(&rule->keyfields); ret = vcap_rule_add_key_u128(rule, VCAP_KF_L3_IP6_SIP, &dip); @@ -1078,6 +1092,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, dip.value[idx], kf->data.u128.value[idx]); for (idx = 0; idx < ARRAY_SIZE(dip.mask); ++idx) KUNIT_EXPECT_EQ(test, dip.mask[idx], kf->data.u128.mask[idx]); + vcap_free_ckf(rule); } static void vcap_api_rule_add_actionvalue_test(struct kunit *test) -- cgit From 39d0ccc185315408e7cecfcaf06d167927b51052 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:07 +0800 Subject: net: microchip: sparx5: Fix memory leak for vcap_api_rule_add_actionvalue_test() Inject fault while probing kunit-example-test.ko, the field which is allocated by kzalloc in vcap_rule_add_action() of vcap_rule_add_action_bit/u32() is not freed, and it cause the memory leaks below. unreferenced object 0xffff0276c496b300 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000ae66c16c>] vcap_api_rule_add_actionvalue_test+0xa4/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b2c0 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000607782aa>] vcap_api_rule_add_actionvalue_test+0x100/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b280 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<000000004e640602>] vcap_api_rule_add_actionvalue_test+0x15c/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b240 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<0000000011141bf8>] vcap_api_rule_add_actionvalue_test+0x1bc/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0276c496b200 (size 64): comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s) hex dump (first 32 bytes): 68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff h] slab_post_alloc_hook+0xb8/0x368 [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290 [<000000004620684a>] kmalloc_trace+0x40/0x164 [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178 [<00000000d5ed3088>] vcap_api_rule_add_actionvalue_test+0x22c/0x990 [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000026284079>] kthread+0x124/0x130 [<0000000024d4a996>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 2fb0b8cf2b0c..f268383a7570 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1095,6 +1095,17 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test) vcap_free_ckf(rule); } +static void vcap_free_caf(struct vcap_rule *rule) +{ + struct vcap_client_actionfield *caf, *next_caf; + + list_for_each_entry_safe(caf, next_caf, + &rule->actionfields, ctrl.list) { + list_del(&caf->ctrl.list); + kfree(caf); + } +} + static void vcap_api_rule_add_actionvalue_test(struct kunit *test) { struct vcap_admin admin = { @@ -1120,6 +1131,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_1); @@ -1131,6 +1143,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x1, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_ANY); @@ -1142,6 +1155,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_u32(rule, VCAP_AF_TYPE, 0x98765432); @@ -1153,6 +1167,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_TYPE, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0x98765432, af->data.u32.value); + vcap_free_caf(rule); INIT_LIST_HEAD(&rule->actionfields); ret = vcap_rule_add_action_u32(rule, VCAP_AF_MASK_MODE, 0xaabbccdd); @@ -1164,6 +1179,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test) KUNIT_EXPECT_EQ(test, VCAP_AF_MASK_MODE, af->ctrl.action); KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type); KUNIT_EXPECT_EQ(test, 0xaabbccdd, af->data.u32.value); + vcap_free_caf(rule); } static void vcap_api_rule_find_keyset_basic_test(struct kunit *test) -- cgit From 89e3af0277388f32d56915a6715c735e4afae5d6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:08 +0800 Subject: net: microchip: sparx5: Fix possible memory leak in vcap_api_encode_rule_test() Inject fault while probing kunit-example-test.ko, the duprule which is allocated in vcap_dup_rule() and the vcap enabled port which is allocated in vcap_enable() of vcap_enable_lookups in vcap_api_encode_rule_test() is not freed, and it cause the memory leaks below. Use vcap_enable_lookups() with false arg to free the vcap enabled port as other drivers do it. And use vcap_del_rule() to free the duprule. unreferenced object 0xffff677a0278bb00 (size 64): comm "kunit_try_catch", pid 388, jiffies 4294895987 (age 1101.840s) hex dump (first 32 bytes): 18 bd a5 82 00 80 ff ff 18 bd a5 82 00 80 ff ff ................ 40 fe c8 0e be c6 ff ff 00 00 00 00 00 00 00 00 @............... backtrace: [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368 [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290 [<0000000034d76721>] kmalloc_trace+0x40/0x164 [<00000000013380a5>] vcap_enable_lookups+0x1c8/0x70c [<00000000bbec496b>] vcap_api_encode_rule_test+0x2f8/0xb18 [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<000000004af845ca>] kthread+0x124/0x130 [<0000000038a000ca>] ret_from_fork+0x10/0x20 unreferenced object 0xffff677a027803c0 (size 192): comm "kunit_try_catch", pid 388, jiffies 4294895988 (age 1101.836s) hex dump (first 32 bytes): 00 12 7a 00 05 00 00 00 0a 00 00 00 64 00 00 00 ..z.........d... 00 00 00 00 00 00 00 00 d8 03 78 02 7a 67 ff ff ..........x.zg.. backtrace: [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368 [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290 [<0000000034d76721>] kmalloc_trace+0x40/0x164 [<00000000c1010131>] vcap_dup_rule+0x34/0x14c [<00000000d43c54a4>] vcap_add_rule+0x29c/0x32c [<0000000073f1c26d>] vcap_api_encode_rule_test+0x304/0xb18 [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<000000004af845ca>] kthread+0x124/0x130 [<0000000038a000ca>] ret_from_fork+0x10/0x20 Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index f268383a7570..8c61a5dbce55 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1439,6 +1439,10 @@ static void vcap_api_encode_rule_test(struct kunit *test) ret = list_empty(&is2_admin.rules); KUNIT_EXPECT_EQ(test, false, ret); KUNIT_EXPECT_EQ(test, 0, ret); + + vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0, + rule->cookie, false); + vcap_free_rule(rule); /* Check that the rule has been freed: tricky to access since this @@ -1449,6 +1453,8 @@ static void vcap_api_encode_rule_test(struct kunit *test) KUNIT_EXPECT_EQ(test, true, ret); ret = list_empty(&rule->actionfields); KUNIT_EXPECT_EQ(test, true, ret); + + vcap_del_rule(&test_vctrl, &test_netdev, id); } static void vcap_api_set_rule_counter_test(struct kunit *test) -- cgit From 20146fa73ab8db2ab9f4916bbaf4610646787a09 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:09 +0800 Subject: net: microchip: sparx5: Fix possible memory leaks in test_vcap_xn_rule_creator() Inject fault while probing kunit-example-test.ko, the rule which is allocated by kzalloc in vcap_alloc_rule(), the field which is allocated by kzalloc in vcap_rule_add_action() and vcap_rule_add_key() is not freed, and it cause the memory leaks below. Use vcap_free_rule() to free them as other drivers do it. And since the return rule of test_vcap_xn_rule_creator() is not used, remove it and switch to void. unreferenced object 0xffff058383334240 (size 192): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 81 93 84 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583849380c0 (size 64): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 40 81 93 84 83 05 ff ff 68 42 33 83 83 05 ff ff @.......hB3..... 22 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384938100 (size 64): comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s) hex dump (first 32 bytes): 80 81 93 84 83 05 ff ff 58 42 33 83 83 05 ff ff ........XB3..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583833b6240 (size 192): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 40 91 8f 84 83 05 ff ff ........@....... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848f9100 (size 64): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): 80 91 8f 84 83 05 ff ff 68 62 3b 83 83 05 ff ff ........hb;..... 22 00 00 00 01 00 00 00 00 00 00 00 a5 b4 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848f9140 (size 64): comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s) hex dump (first 32 bytes): c0 91 8f 84 83 05 ff ff 58 62 3b 83 83 05 ff ff ........Xb;..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838264e0c0 (size 192): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00 .'.............. 00 00 00 00 00 00 00 00 40 3a 97 84 83 05 ff ff ........@:...... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384973a80 (size 64): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s) hex dump (first 32 bytes): e8 e0 64 82 83 05 ff ff e8 e0 64 82 83 05 ff ff ..d.......d..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff058384973a40 (size 64): comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.880s) hex dump (first 32 bytes): 80 39 97 84 83 05 ff ff d8 e0 64 82 83 05 ff ff .9........d..... 7d 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<0000000094335477>] vcap_add_type_keyfield+0xbc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583832fa240 (size 192): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 a1 8b 84 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848ba0c0 (size 64): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 40 a1 8b 84 83 05 ff ff 68 a2 2f 83 83 05 ff ff @.......h./..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583848ba100 (size 64): comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s) hex dump (first 32 bytes): 80 a1 8b 84 83 05 ff ff 58 a2 2f 83 83 05 ff ff ........X./..... 7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00 }............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128 [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff0583827d2180 (size 192): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 00 e1 06 83 83 05 ff ff ................ backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838306e0c0 (size 64): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s) hex dump (first 32 bytes): 40 e1 06 83 83 05 ff ff a8 21 7d 82 83 05 ff ff @........!}..... 22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff "............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178 [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 unreferenced object 0xffff05838306e180 (size 64): comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.968s) hex dump (first 32 bytes): 98 21 7d 82 83 05 ff ff 00 e1 06 83 83 05 ff ff .!}............. 67 00 00 00 00 00 00 00 01 01 00 00 ff 00 00 00 g............... backtrace: [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368 [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290 [<0000000061886991>] kmalloc_trace+0x40/0x164 [<0000000043c78991>] vcap_rule_add_key+0x104/0x180 [<000000006ce4945d>] test_add_def_fields+0x84/0x8c [<00000000507e0ab6>] vcap_val_rule+0x294/0x3e8 [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328 [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0 [<00000000548b559e>] kunit_try_run_case+0x50/0xac [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<00000000e646f120>] kthread+0x124/0x130 [<000000005257599e>] ret_from_fork+0x10/0x20 Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules") Signed-off-by: Jinjie Ruan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309090950.uOTEKQq3-lkp@intel.com/ Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 8c61a5dbce55..99f04a53a442 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -243,10 +243,9 @@ static void vcap_test_api_init(struct vcap_admin *admin) } /* Helper function to create a rule of a specific size */ -static struct vcap_rule * -test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user, - u16 priority, - int id, int size, int expected_addr) +static void test_vcap_xn_rule_creator(struct kunit *test, int cid, + enum vcap_user user, u16 priority, + int id, int size, int expected_addr) { struct vcap_rule *rule; struct vcap_rule_internal *ri; @@ -311,7 +310,7 @@ test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user, ret = vcap_add_rule(rule); KUNIT_EXPECT_EQ(test, 0, ret); KUNIT_EXPECT_EQ(test, expected_addr, ri->addr); - return rule; + vcap_free_rule(rule); } /* Prepare testing rule deletion */ -- cgit From 2a2dffd911d4139258b828b9c5056cb64b826758 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 12 Sep 2023 19:03:10 +0800 Subject: net: microchip: sparx5: Fix possible memory leaks in vcap_api_kunit Inject fault while probing kunit-example-test.ko, the duprule which is allocated by kzalloc in vcap_dup_rule() of test_vcap_xn_rule_creator() is not freed, and it cause the memory leaks below. Use vcap_del_rule() to free them as other functions do it. unreferenced object 0xffff6eb4846f6180 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895522 (age 880.004s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00 .'.............. 00 00 00 00 00 00 00 00 98 61 6f 84 b4 6e ff ff .........ao..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000d2ac4ccb>] vcap_api_rule_insert_in_order_test+0xa4/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f6240 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00 .'.............. 00 00 00 00 00 00 00 00 58 62 6f 84 b4 6e ff ff ........Xbo..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<0000000052e6ad35>] vcap_api_rule_insert_in_order_test+0xbc/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f6300 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 18 63 6f 84 b4 6e ff ff .........co..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<000000001b0895d4>] vcap_api_rule_insert_in_order_test+0xd4/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4846f63c0 (size 192): comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 880.012s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00 .'......(....... 00 00 00 00 00 00 00 00 d8 63 6f 84 b4 6e ff ff .........co..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000134c151f>] vcap_api_rule_insert_in_order_test+0xec/0x114 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc180 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 14 00 00 00 c8 00 00 00 .'.............. 00 00 00 00 00 00 00 00 98 c1 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000fa5f64d3>] vcap_api_rule_insert_reverse_order_test+0xc8/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc240 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 58 c2 5f 84 b4 6e ff ff ........X._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000a7db42de>] vcap_api_rule_insert_reverse_order_test+0x108/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc300 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 90 01 00 00 .'......(....... 00 00 00 00 00 00 00 00 18 c3 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000ea416c94>] vcap_api_rule_insert_reverse_order_test+0x150/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb4845fc3c0 (size 192): comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.020s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 32 00 00 00 f4 01 00 00 .'......2....... 00 00 00 00 00 00 00 00 d8 c3 5f 84 b4 6e ff ff .........._..n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000453dcd80>] vcap_add_rule+0x134/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<00000000764a39b4>] vcap_api_rule_insert_reverse_order_test+0x198/0x600 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb484cd4240 (size 192): comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00 .'..........,... 00 00 00 00 00 00 00 00 58 42 cd 84 b4 6e ff ff ........XB...n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<0000000023976dd4>] vcap_api_rule_remove_in_front_test+0x158/0x658 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 unreferenced object 0xffff6eb484cd4300 (size 192): comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s) hex dump (first 32 bytes): 10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00 .'......(....... 00 00 00 00 00 00 00 00 18 43 cd 84 b4 6e ff ff .........C...n.. backtrace: [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368 [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290 [<0000000046ef1b64>] kmalloc_trace+0x40/0x164 [<000000008565145b>] vcap_dup_rule+0x38/0x210 [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330 [<000000000b4760ff>] vcap_api_rule_remove_in_front_test+0x170/0x658 [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c [<0000000058a88b6b>] kthread+0x124/0x130 [<00000000891cf28a>] ret_from_fork+0x10/0x20 Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 99f04a53a442..fe4e166de8a0 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1597,6 +1597,11 @@ static void vcap_api_rule_insert_in_order_test(struct kunit *test) test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 20, 400, 6, 774); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 30, 300, 3, 771); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 40, 200, 2, 768); + + vcap_del_rule(&test_vctrl, &test_netdev, 200); + vcap_del_rule(&test_vctrl, &test_netdev, 300); + vcap_del_rule(&test_vctrl, &test_netdev, 400); + vcap_del_rule(&test_vctrl, &test_netdev, 500); } static void vcap_api_rule_insert_reverse_order_test(struct kunit *test) @@ -1655,6 +1660,11 @@ static void vcap_api_rule_insert_reverse_order_test(struct kunit *test) ++idx; } KUNIT_EXPECT_EQ(test, 768, admin.last_used_addr); + + vcap_del_rule(&test_vctrl, &test_netdev, 500); + vcap_del_rule(&test_vctrl, &test_netdev, 400); + vcap_del_rule(&test_vctrl, &test_netdev, 300); + vcap_del_rule(&test_vctrl, &test_netdev, 200); } static void vcap_api_rule_remove_at_end_test(struct kunit *test) @@ -1855,6 +1865,9 @@ static void vcap_api_rule_remove_in_front_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 786, test_init_start); KUNIT_EXPECT_EQ(test, 8, test_init_count); KUNIT_EXPECT_EQ(test, 794, admin.last_used_addr); + + vcap_del_rule(&test_vctrl, &test_netdev, 200); + vcap_del_rule(&test_vctrl, &test_netdev, 300); } static struct kunit_case vcap_api_rule_remove_test_cases[] = { -- cgit From c326ca98446e0ae4fee43a40acf79412b74cfedb Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 12 Sep 2023 16:16:25 +0200 Subject: selftests: tls: swap the TX and RX sockets in some tests tls.sendmsg_large and tls.sendmsg_multiple are trying to send through the self->cfd socket (only configured with TLS_RX) and to receive through the self->fd socket (only configured with TLS_TX), so they're not using kTLS at all. Swap the sockets. Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 297d972558fb..464853a7f982 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -613,11 +613,11 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } while (recvs++ < sends) { - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); } free(mem); @@ -646,9 +646,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), -- cgit From 057a28ef93bdbe84326d34cdb5543afdaab49fe1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 7 Sep 2023 15:24:34 +0800 Subject: ALSA: hda: Disable power save for solving pop issue on Lenovo ThinkCentre M70q Lenovo ThinkCentre M70q had boot up pop noise. Disable power save will solve pop issue. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/315900e2efef42fd9855eacfeb443abd@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 765d95e79861..ca765ac4765f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2211,6 +2211,7 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), + SND_PCI_QUIRK(0x17aa, 0x316e, "Lenovo ThinkCentre M70q", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1689623 */ SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ -- cgit From 21484e43b936c4f323d232c6a71c1f47a6af3278 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 14 Sep 2023 16:25:25 +0100 Subject: ALSA: hda: cs35l56: Fix missing RESET GPIO if _SUB is missing In cs35l56_hda_read_acpi() do not return if ACPI _SUB is missing. A missing _SUB means that the driver cannot load a system-specific firmware, because the firmware is identified by the _SUB. But it can fallback to a generic firmware. Unfortunately this was being handled by immediately returning 0, which would skip the remaining ACPI configuration in cs35l56_hda_read_acpi() and so it would not get the RESET GPIO. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://lore.kernel.org/r/20230914152525.20829-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index bc75865b5de8..87ffe8fbff99 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -865,15 +865,13 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int id) sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); if (IS_ERR(sub)) { - /* If no ACPI SUB, return 0 and fallback to legacy firmware path, otherwise fail */ - if (PTR_ERR(sub) == -ENODATA) - return 0; - else - return PTR_ERR(sub); + dev_info(cs35l56->base.dev, + "Read ACPI _SUB failed(%ld): fallback to generic firmware\n", + PTR_ERR(sub)); + } else { + cs35l56->system_name = sub; } - cs35l56->system_name = sub; - cs35l56->base.reset_gpio = devm_gpiod_get_index_optional(cs35l56->base.dev, "reset", cs35l56->index, -- cgit From a8f367f7e131e76713b2949b168ac97f671fce7a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2023 20:54:51 +0200 Subject: net: ti: icssg-prueth: add PTP dependency The driver can now use PTP if enabled but fails to link built-in if PTP is a loadable module: aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_get_ptp_clock_idx': icss_iep.c:(.text+0x200): undefined reference to `ptp_clock_index' Add the usual dependency to avoid this. Fixes: 186734c158865 ("net: ti: icssg-prueth: add packet timestamping and ptp support") Signed-off-by: Arnd Bergmann Reviewed-by: MD Danish Anwar Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 88b5b1b47779..0a3346650e03 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -199,6 +199,7 @@ config TI_ICSSG_PRUETH config TI_ICSS_IEP tristate "TI PRU ICSS IEP driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on TI_PRUSS default TI_PRUSS help -- cgit From 75ad80ed88a182ab2ad5513e448cf07b403af5c3 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 13 Sep 2023 09:39:05 +0300 Subject: net/core: Fix ETH_P_1588 flow dissector When a PTP ethernet raw frame with a size of more than 256 bytes followed by a 0xff pattern is sent to __skb_flow_dissect, nhoff value calculation is wrong. For example: hdr->message_length takes the wrong value (0xffff) and it does not replicate real header length. In this case, 'nhoff' value was overridden and the PTP header was badly dissected. This leads to a kernel crash. net/core: flow_dissector net/core flow dissector nhoff = 0x0000000e net/core flow dissector hdr->message_length = 0x0000ffff net/core flow dissector nhoff = 0x0001000d (u16 overflow) ... skb linear: 00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88 skb frag: 00000000: f7 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Using the size of the ptp_header struct will allow the corrected calculation of the nhoff value. net/core flow dissector nhoff = 0x0000000e net/core flow dissector nhoff = 0x00000030 (sizeof ptp_header) ... skb linear: 00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88 f7 ff ff skb linear: 00000010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff skb linear: 00000020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff skb frag: 00000000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Kernel trace: [ 74.984279] ------------[ cut here ]------------ [ 74.989471] kernel BUG at include/linux/skbuff.h:2440! [ 74.995237] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 75.001098] CPU: 4 PID: 0 Comm: swapper/4 Tainted: G U 5.15.85-intel-ese-standard-lts #1 [ 75.011629] Hardware name: Intel Corporation A-Island (CPU:AlderLake)/A-Island (ID:06), BIOS SB_ADLP.01.01.00.01.03.008.D-6A9D9E73-dirty Mar 30 2023 [ 75.026507] RIP: 0010:eth_type_trans+0xd0/0x130 [ 75.031594] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9 [ 75.052612] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297 [ 75.058473] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003 [ 75.066462] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300 [ 75.074458] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800 [ 75.082466] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010 [ 75.090461] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800 [ 75.098464] FS: 0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000 [ 75.107530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.113982] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0 [ 75.121980] PKRU: 55555554 [ 75.125035] Call Trace: [ 75.127792] [ 75.130063] ? eth_get_headlen+0xa4/0xc0 [ 75.134472] igc_process_skb_fields+0xcd/0x150 [ 75.139461] igc_poll+0xc80/0x17b0 [ 75.143272] __napi_poll+0x27/0x170 [ 75.147192] net_rx_action+0x234/0x280 [ 75.151409] __do_softirq+0xef/0x2f4 [ 75.155424] irq_exit_rcu+0xc7/0x110 [ 75.159432] common_interrupt+0xb8/0xd0 [ 75.163748] [ 75.166112] [ 75.168473] asm_common_interrupt+0x22/0x40 [ 75.173175] RIP: 0010:cpuidle_enter_state+0xe2/0x350 [ 75.178749] Code: 85 c0 0f 8f 04 02 00 00 31 ff e8 39 6c 67 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 50 02 00 00 31 ff e8 52 b0 6d ff fb 45 85 f6 <0f> 88 b1 00 00 00 49 63 ce 4c 2b 2c 24 48 89 c8 48 6b d1 68 48 c1 [ 75.199757] RSP: 0018:ffff9948c013bea8 EFLAGS: 00000202 [ 75.205614] RAX: ffff8e4e8fb00000 RBX: ffffb948bfd23900 RCX: 000000000000001f [ 75.213619] RDX: 0000000000000004 RSI: ffffffff94206161 RDI: ffffffff94212e20 [ 75.221620] RBP: 0000000000000004 R08: 000000117568973a R09: 0000000000000001 [ 75.229622] R10: 000000000000afc8 R11: ffff8e4e8fb29ce4 R12: ffffffff945ae980 [ 75.237628] R13: 000000117568973a R14: 0000000000000004 R15: 0000000000000000 [ 75.245635] ? cpuidle_enter_state+0xc7/0x350 [ 75.250518] cpuidle_enter+0x29/0x40 [ 75.254539] do_idle+0x1d9/0x260 [ 75.258166] cpu_startup_entry+0x19/0x20 [ 75.262582] secondary_startup_64_no_verify+0xc2/0xcb [ 75.268259] [ 75.270721] Modules linked in: 8021q snd_sof_pci_intel_tgl snd_sof_intel_hda_common tpm_crb snd_soc_hdac_hda snd_sof_intel_hda snd_hda_ext_core snd_sof_pci snd_sof snd_sof_xtensa_dsp snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core snd_compress iTCO_wdt ac97_bus intel_pmc_bxt mei_hdcp iTCO_vendor_support snd_hda_codec_hdmi pmt_telemetry intel_pmc_core pmt_class snd_hda_intel x86_pkg_temp_thermal snd_intel_dspcfg snd_hda_codec snd_hda_core kvm_intel snd_pcm snd_timer kvm snd mei_me soundcore tpm_tis irqbypass i2c_i801 mei tpm_tis_core pcspkr intel_rapl_msr tpm i2c_smbus intel_pmt thermal sch_fq_codel uio uhid i915 drm_buddy video drm_display_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm fuse configfs [ 75.342736] ---[ end trace 3785f9f360400e3a ]--- [ 75.347913] RIP: 0010:eth_type_trans+0xd0/0x130 [ 75.352984] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9 [ 75.373994] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297 [ 75.379860] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003 [ 75.387856] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300 [ 75.395864] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800 [ 75.403857] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010 [ 75.411863] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800 [ 75.419875] FS: 0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000 [ 75.428946] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 75.435403] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0 [ 75.443410] PKRU: 55555554 [ 75.446477] Kernel panic - not syncing: Fatal exception in interrupt [ 75.453738] Kernel Offset: 0x11c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 75.465794] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: 4f1cc51f3488 ("net: flow_dissector: Parse PTP L2 packet header") Signed-off-by: Sasha Neftin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b3b3af0e7844..272f09251343 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1446,7 +1446,7 @@ proto_again: break; } - nhoff += ntohs(hdr->message_length); + nhoff += sizeof(struct ptp_header); fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } -- cgit From 582620d9f6b352552bc9a3316fe2b1c3acd8742d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 18 Aug 2023 15:27:46 +0300 Subject: thunderbolt: Workaround an IOMMU fault on certain systems with Intel Maple Ridge On some systems the IOMMU blocks the first couple of driver ready messages to the connection manager firmware as can be seen in below excerpts: thunderbolt 0000:06:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0010 address=0xbb0e3400 flags=0x0020] or DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [04:00.0] PASID ffffffff fault addr 69974000 [fault reason 05] PTE Write access is not set The reason is unknown and hard to debug because we were not able to reproduce this locally. This only happens on certain systems with Intel Maple Ridge Thunderbolt controller. If there is a device connected when the driver is loaded the issue does not happen either. Only when there is nothing connected (so typically when the system is booted up). We can work this around by sending the driver ready several times. After a couple of retries the message goes through and the controller works just fine. For this reason make the number of retries a parameter for icm_request() and then for Maple Ridge (and Titan Ridge as they us the same function but this should not matter) increase number of retries while shortening the timeout accordingly. Reported-by: Werner Sembach Reported-by: Konrad J Hambrick Reported-by: Calvin Walton Closes: https://bugzilla.kernel.org/show_bug.cgi?id=214259 Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/icm.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index dbdcad8d73bf..d8b9c734abd3 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -41,6 +41,7 @@ #define PHY_PORT_CS1_LINK_STATE_SHIFT 26 #define ICM_TIMEOUT 5000 /* ms */ +#define ICM_RETRIES 3 #define ICM_APPROVE_TIMEOUT 10000 /* ms */ #define ICM_MAX_LINK 4 @@ -296,10 +297,9 @@ static bool icm_copy(struct tb_cfg_request *req, const struct ctl_pkg *pkg) static int icm_request(struct tb *tb, const void *request, size_t request_size, void *response, size_t response_size, size_t npackets, - unsigned int timeout_msec) + int retries, unsigned int timeout_msec) { struct icm *icm = tb_priv(tb); - int retries = 3; do { struct tb_cfg_request *req; @@ -410,7 +410,7 @@ static int icm_fr_get_route(struct tb *tb, u8 link, u8 depth, u64 *route) return -ENOMEM; ret = icm_request(tb, &request, sizeof(request), switches, - sizeof(*switches), npackets, ICM_TIMEOUT); + sizeof(*switches), npackets, ICM_RETRIES, ICM_TIMEOUT); if (ret) goto err_free; @@ -463,7 +463,7 @@ icm_fr_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -488,7 +488,7 @@ static int icm_fr_approve_switch(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); /* Use larger timeout as establishing tunnels can take some time */ ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_APPROVE_TIMEOUT); + 1, ICM_RETRIES, ICM_APPROVE_TIMEOUT); if (ret) return ret; @@ -515,7 +515,7 @@ static int icm_fr_add_switch_key(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -543,7 +543,7 @@ static int icm_fr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -577,7 +577,7 @@ static int icm_fr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1020,7 +1020,7 @@ icm_tr_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, 20000); + 1, 10, 2000); if (ret) return ret; @@ -1053,7 +1053,7 @@ static int icm_tr_approve_switch(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_APPROVE_TIMEOUT); + 1, ICM_RETRIES, ICM_APPROVE_TIMEOUT); if (ret) return ret; @@ -1081,7 +1081,7 @@ static int icm_tr_add_switch_key(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1110,7 +1110,7 @@ static int icm_tr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1144,7 +1144,7 @@ static int icm_tr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1170,7 +1170,7 @@ static int icm_tr_xdomain_tear_down(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1496,7 +1496,7 @@ icm_ar_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1522,7 +1522,7 @@ static int icm_ar_get_route(struct tb *tb, u8 link, u8 depth, u64 *route) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1543,7 +1543,7 @@ static int icm_ar_get_boot_acl(struct tb *tb, uuid_t *uuids, size_t nuuids) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1604,7 +1604,7 @@ static int icm_ar_set_boot_acl(struct tb *tb, const uuid_t *uuids, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1626,7 +1626,7 @@ icm_icl_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, 20000); + 1, ICM_RETRIES, 20000); if (ret) return ret; @@ -2298,7 +2298,7 @@ static int icm_usb4_switch_op(struct tb_switch *sw, u16 opcode, u32 *metadata, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; -- cgit From a9fdf5f933a6f2b358fad0194b1287b67f6704b1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 22 Aug 2023 16:36:18 +0300 Subject: thunderbolt: Check that lane 1 is in CL0 before enabling lane bonding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marek reported that when BlackMagic UltraStudio device is connected the kernel repeatedly tries to enable lane bonding without success making the device non-functional. It looks like the device does not have lane 1 connected at all so even though it is enabled we should not try to bond the lanes. For this reason check that lane 1 is in fact CL0 (connected, active) before attempting to bond the lanes. Reported-by: Marek Šanta Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217737 Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 43171cc1cc2d..bd5815f8f23b 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2725,6 +2725,13 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw) !tb_port_is_width_supported(down, TB_LINK_WIDTH_DUAL)) return 0; + /* + * Both lanes need to be in CL0. Here we assume lane 0 already be in + * CL0 and check just for lane 1. + */ + if (tb_wait_for_port(down->dual_link_port, false) <= 0) + return -ENOTCONN; + ret = tb_port_lane_bonding_enable(up); if (ret) { tb_port_warn(up, "failed to enable lane bonding\n"); -- cgit From e19f714ea63f861d95d3d92d45d5fd5ca2e05c8c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 31 Aug 2023 14:10:46 +0300 Subject: thunderbolt: Correct TMU mode initialization from hardware David reported that cppcheck found following possible copy & paste error from tmu_mode_init(): tmu.c:385:50: style: Expression is always false because 'else if' condition matches previous condition at line 383. [multiCondition] And indeed this is a bug. Fix it to use correct index (TB_SWITCH_TMU_MODE_HIFI_UNI). Reported-by: David Binderman Fixes: d49b4f043d63 ("thunderbolt: Add support for enhanced uni-directional TMU mode") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c index 747f88703d5c..11f2aec2a5d3 100644 --- a/drivers/thunderbolt/tmu.c +++ b/drivers/thunderbolt/tmu.c @@ -382,7 +382,7 @@ static int tmu_mode_init(struct tb_switch *sw) } else if (ucap && tb_port_tmu_is_unidirectional(up)) { if (tmu_rates[TB_SWITCH_TMU_MODE_LOWRES] == rate) sw->tmu.mode = TB_SWITCH_TMU_MODE_LOWRES; - else if (tmu_rates[TB_SWITCH_TMU_MODE_LOWRES] == rate) + else if (tmu_rates[TB_SWITCH_TMU_MODE_HIFI_UNI] == rate) sw->tmu.mode = TB_SWITCH_TMU_MODE_HIFI_UNI; } else if (rate) { sw->tmu.mode = TB_SWITCH_TMU_MODE_HIFI_BI; -- cgit From 308092d080852f8997126e5b3507536162416f4a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 7 Sep 2023 16:02:30 +0300 Subject: thunderbolt: Restart XDomain discovery handshake after failure Alex reported that after rebooting the other host the peer-to-peer link does not come up anymore. The reason for this is that the host that was not rebooted tries to send the UUID request only 10 times according to the USB4 Inter-Domain spec and gives up if it does not get reply. Then when the other side is actually ready it cannot get the link established anymore. The USB4 Inter-Domain spec requires that the discovery protocol is restarted in that case so implement this now. Reported-by: Alex Balcanquall Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/xdomain.c | 58 ++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 5b5566862318..9803f0bbf20d 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -703,6 +703,27 @@ out_unlock: mutex_unlock(&xdomain_lock); } +static void start_handshake(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_INIT; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); +} + +/* Can be called from state_work */ +static void __stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->properties_changed_work); + xd->properties_changed_retries = 0; + xd->state_retries = 0; +} + +static void stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->state_work); + __stop_handshake(xd); +} + static void tb_xdp_handle_request(struct work_struct *work) { struct xdomain_request_work *xw = container_of(work, typeof(*xw), work); @@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct work_struct *work) case UUID_REQUEST: tb_dbg(tb, "%llx: received XDomain UUID request\n", route); ret = tb_xdp_uuid_response(ctl, route, sequence, uuid); + /* + * If we've stopped the discovery with an error such as + * timing out, we will restart the handshake now that we + * received UUID request from the remote host. + */ + if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) { + dev_dbg(&xd->dev, "restarting handshake\n"); + start_handshake(xd); + } break; case LINK_STATE_STATUS_REQUEST: @@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd) msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); } +static void tb_xdomain_failed(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_ERROR; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT)); +} + static void tb_xdomain_state_work(struct work_struct *work) { struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work); @@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct work_struct *work) if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { tb_xdomain_queue_properties_changed(xd); if (xd->bonding_possible) @@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct work_struct *work) if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { xd->state = XDOMAIN_STATE_ENUMERATED; } @@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct work_struct *work) break; case XDOMAIN_STATE_ERROR: + dev_dbg(&xd->dev, "discovery failed, stopping handshake\n"); + __stop_handshake(xd); break; default: @@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct device *dev) kfree(xd); } -static void start_handshake(struct tb_xdomain *xd) -{ - xd->state = XDOMAIN_STATE_INIT; - queue_delayed_work(xd->tb->wq, &xd->state_work, - msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); -} - -static void stop_handshake(struct tb_xdomain *xd) -{ - cancel_delayed_work_sync(&xd->properties_changed_work); - cancel_delayed_work_sync(&xd->state_work); - xd->properties_changed_retries = 0; - xd->state_retries = 0; -} - static int __maybe_unused tb_xdomain_suspend(struct device *dev) { stop_handshake(tb_to_xdomain(dev)); -- cgit From 0d42260867f9ff3e3a5bcfa8750fa06a658e0b1c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:50 +0200 Subject: ALSA: seq: ump: Fix -Wformat-truncation warning The filling of a port name string got a warning with W=1 due to the potentially too long group name. Add the string precision to limit the size. Fixes: 81fd444aa371 ("ALSA: seq: Bind UMP device") Link: https://lore.kernel.org/r/20230915082802.28684-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index a60e3f069a80..2db371d79930 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -207,7 +207,7 @@ static void fill_port_info(struct snd_seq_port_info *port, SNDRV_SEQ_PORT_TYPE_PORT; port->midi_channels = 16; if (*group->name) - snprintf(port->name, sizeof(port->name), "Group %d (%s)", + snprintf(port->name, sizeof(port->name), "Group %d (%.53s)", group->group + 1, group->name); else sprintf(port->name, "Group %d", group->group + 1); -- cgit From 9830c3851fd6f7ca977691632b786ba11a44be77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:51 +0200 Subject: ALSA: seq: midi: Fix -Wformat-truncation warning The compile warnings with -Wformat-truncation appearing at snd_seq_midisynth_probe() in seq_midi.c are false-positive; those must fit within the given string size. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_midi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c index 44302d98950e..18320a248aa7 100644 --- a/sound/core/seq/seq_midi.c +++ b/sound/core/seq/seq_midi.c @@ -349,9 +349,9 @@ snd_seq_midisynth_probe(struct device *_dev) if (! port->name[0]) { if (info->name[0]) { if (ports > 1) - snprintf(port->name, sizeof(port->name), "%s-%u", info->name, p); + scnprintf(port->name, sizeof(port->name), "%s-%u", info->name, p); else - snprintf(port->name, sizeof(port->name), "%s", info->name); + scnprintf(port->name, sizeof(port->name), "%s", info->name); } else { /* last resort */ if (ports > 1) -- cgit From 78bd8f5126f854872cc109cbac21c675d8539f3f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:52 +0200 Subject: ALSA: usb-audio: scarlett_gen2: Fix -Wformat-truncation warning The recent enablement of -Wformat-truncation leads to a false-positive warning for mixer_scarlett_gen2.c. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 9d11bb08667e..5c6f50f38840 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -3218,8 +3218,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) int from = i * info->inputs_per_phantom + 1; int to = (i + 1) * info->inputs_per_phantom; - snprintf(s, sizeof(s), fmt2, from, to, - "Phantom Power", "Switch"); + scnprintf(s, sizeof(s), fmt2, from, to, + "Phantom Power", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_phantom_ctl, i, 1, s, &private->phantom_ctls[i]); -- cgit From e9dde5a98288d05313bea24466f76484b8d324bb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:53 +0200 Subject: ALSA: caiaq: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 49f63f878e6f..b5cbf1f195c4 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -485,7 +485,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) } usb_make_path(usb_dev, usbpath, sizeof(usbpath)); - snprintf(card->longname, sizeof(card->longname), "%s %s (%s)", + scnprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); setup_card(cdev); -- cgit From 2a471452599a2e94fcc53ee2b7eac87fccd4ba04 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:54 +0200 Subject: ALSA: sscape: Fix -Wformat-truncation warning The warning with -Wformat-truncation at sscape_upload_microcode() is false-positive; the version number can be only a single digit, hence fitting with the given string size. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sscape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index 0bc0025f7c19..cc56fafd27b1 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -557,7 +557,7 @@ static int sscape_upload_microcode(struct snd_card *card, int version) char name[14]; int err; - snprintf(name, sizeof(name), "sndscape.co%d", version); + scnprintf(name, sizeof(name), "sndscape.co%d", version); err = request_firmware(&init_fw, name, card->dev); if (err < 0) { -- cgit From 399245d3046d7182a9ae97cb0671fc9ed8a579a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:55 +0200 Subject: ALSA: cs4236: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-7-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/cs423x/cs4236.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 10112e1bb25d..7226cbf2d7de 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -367,14 +367,14 @@ static int snd_cs423x_probe(struct snd_card *card, int dev) strscpy(card->driver, chip->pcm->name, sizeof(card->driver)); strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (dma2[dev] < 0) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i", - chip->pcm->name, chip->port, irq[dev], dma1[dev]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i", + chip->pcm->name, chip->port, irq[dev], dma1[dev]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i&%d", - chip->pcm->name, chip->port, irq[dev], dma1[dev], - dma2[dev]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i&%d", + chip->pcm->name, chip->port, irq[dev], dma1[dev], + dma2[dev]); err = snd_wss_timer(chip, 0); if (err < 0) -- cgit From 1e97acf3a6609c1329e23c2bf1e703c012bec848 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:56 +0200 Subject: ALSA: es1688: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-8-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/es1688/es1688.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index f935b56eeec7..97728bf45474 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -130,9 +130,9 @@ static int snd_es1688_probe(struct snd_card *card, unsigned int n) strscpy(card->driver, "ES1688", sizeof(card->driver)); strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i, dma %i", chip->pcm->name, chip->port, - chip->irq, chip->dma8); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i, dma %i", chip->pcm->name, chip->port, + chip->irq, chip->dma8); if (fm_port[n] == SNDRV_AUTO_PORT) fm_port[n] = port[n]; /* share the same port */ -- cgit From bc44e10abb90ac65a7b5337cb3323ec1ce79fb9a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:57 +0200 Subject: ALSA: opti9x: Fix -Wformat-truncation warning The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-9-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/opti9xx/miro.c | 8 ++++---- sound/isa/opti9xx/opti92x-ad1848.c | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 59242baed576..59792f2fada1 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -1344,10 +1344,10 @@ static int snd_miro_probe(struct snd_card *card) } strcpy(card->driver, "miro"); - snprintf(card->longname, sizeof(card->longname), - "%s: OPTi%s, %s at 0x%lx, irq %d, dma %d&%d", - card->shortname, miro->name, codec->pcm->name, - miro->wss_base + 4, miro->irq, miro->dma1, miro->dma2); + scnprintf(card->longname, sizeof(card->longname), + "%s: OPTi%s, %s at 0x%lx, irq %d, dma %d&%d", + card->shortname, miro->name, codec->pcm->name, + miro->wss_base + 4, miro->irq, miro->dma1, miro->dma2); if (mpu_port <= 0 || mpu_port == SNDRV_AUTO_PORT) rmidi = NULL; diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 4beeb32fe2a7..c33f67dd5133 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -859,15 +859,15 @@ static int snd_opti9xx_probe(struct snd_card *card) strcpy(card->driver, chip->name); sprintf(card->shortname, "OPTi %s", card->driver); #if defined(CS4231) || defined(OPTi93X) - snprintf(card->longname, sizeof(card->longname), - "%s, %s at 0x%lx, irq %d, dma %d&%d", - card->shortname, codec->pcm->name, - chip->wss_base + 4, irq, dma1, xdma2); + scnprintf(card->longname, sizeof(card->longname), + "%s, %s at 0x%lx, irq %d, dma %d&%d", + card->shortname, codec->pcm->name, + chip->wss_base + 4, irq, dma1, xdma2); #else - snprintf(card->longname, sizeof(card->longname), - "%s, %s at 0x%lx, irq %d, dma %d", - card->shortname, codec->pcm->name, chip->wss_base + 4, irq, - dma1); + scnprintf(card->longname, sizeof(card->longname), + "%s, %s at 0x%lx, irq %d, dma %d", + card->shortname, codec->pcm->name, chip->wss_base + 4, irq, + dma1); #endif /* CS4231 || OPTi93X */ if (mpu_port <= 0 || mpu_port == SNDRV_AUTO_PORT) -- cgit From 7272b8bfba35b2333b33d77f73ce75ee161880c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:58 +0200 Subject: ALSA: xen: Fix -Wformat-truncation warning The compile warning with -Wformat-truncation at xen_snd_front_cfg_card() is false-positive; the loop can be only for SNDRV_PCM_DEVICES which is at most 32. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-10-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/xen/xen_snd_front_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/xen/xen_snd_front_cfg.c b/sound/xen/xen_snd_front_cfg.c index 63b0398c3276..55ecf766ca67 100644 --- a/sound/xen/xen_snd_front_cfg.c +++ b/sound/xen/xen_snd_front_cfg.c @@ -483,7 +483,7 @@ int xen_snd_front_cfg_card(struct xen_snd_front_info *front_info, *stream_cnt = 0; num_devices = 0; do { - snprintf(node, sizeof(node), "%d", num_devices); + scnprintf(node, sizeof(node), "%d", num_devices); if (!xenbus_exists(XBT_NIL, xb_dev->nodename, node)) break; -- cgit From 641e969114c781ff269e1bf1b1f8d3cc33bc4a1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:27:59 +0200 Subject: ALSA: firewire: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915082802.28684-11-tiwai@suse.de Reviewed-by: Takashi Sakamoto Tested-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks.c | 10 +++++----- sound/firewire/oxfw/oxfw.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index dd4298876ac0..e3ed4e094ccd 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -93,11 +93,11 @@ get_hardware_info(struct snd_efw *efw) strcpy(efw->card->driver, "Fireworks"); strcpy(efw->card->shortname, hwinfo->model_name); strcpy(efw->card->mixername, hwinfo->model_name); - snprintf(efw->card->longname, sizeof(efw->card->longname), - "%s %s v%s, GUID %08x%08x at %s, S%d", - hwinfo->vendor_name, hwinfo->model_name, version, - hwinfo->guid_hi, hwinfo->guid_lo, - dev_name(&efw->unit->device), 100 << fw_dev->max_speed); + scnprintf(efw->card->longname, sizeof(efw->card->longname), + "%s %s v%s, GUID %08x%08x at %s, S%d", + hwinfo->vendor_name, hwinfo->model_name, version, + hwinfo->guid_hi, hwinfo->guid_lo, + dev_name(&efw->unit->device), 100 << fw_dev->max_speed); if (hwinfo->flags & BIT(FLAG_RESP_ADDR_CHANGABLE)) efw->resp_addr_changable = true; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 63d40f1a914f..241a697ce26b 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -108,11 +108,11 @@ static int name_card(struct snd_oxfw *oxfw, const struct ieee1394_device_id *ent strcpy(oxfw->card->mixername, m); strcpy(oxfw->card->shortname, m); - snprintf(oxfw->card->longname, sizeof(oxfw->card->longname), - "%s %s (OXFW%x %04x), GUID %08x%08x at %s, S%d", - v, m, firmware >> 20, firmware & 0xffff, - fw_dev->config_rom[3], fw_dev->config_rom[4], - dev_name(&oxfw->unit->device), 100 << fw_dev->max_speed); + scnprintf(oxfw->card->longname, sizeof(oxfw->card->longname), + "%s %s (OXFW%x %04x), GUID %08x%08x at %s, S%d", + v, m, firmware >> 20, firmware & 0xffff, + fw_dev->config_rom[3], fw_dev->config_rom[4], + dev_name(&oxfw->unit->device), 100 << fw_dev->max_speed); end: return err; } -- cgit From ea77850e98410987525eb392c229949c87779835 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:00 +0200 Subject: ALSA: firewire: Fix -Wformat-truncation warning for MIDI stream names The compile warnings at filling MIDI stream name strings are all false-positive; the number of streams can't go so high. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Link: https://lore.kernel.org/r/20230915082802.28684-12-tiwai@suse.de Reviewed-by: Takashi Sakamoto Tested-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/bebob/bebob_midi.c | 6 +++--- sound/firewire/dice/dice-midi.c | 4 ++-- sound/firewire/digi00x/digi00x-midi.c | 14 +++++++------- sound/firewire/fireface/ff-midi.c | 4 ++-- sound/firewire/fireworks/fireworks_midi.c | 4 ++-- sound/firewire/motu/motu-midi.c | 4 ++-- sound/firewire/oxfw/oxfw-midi.c | 6 +++--- sound/firewire/tascam/tascam-midi.c | 12 ++++++------ 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/sound/firewire/bebob/bebob_midi.c b/sound/firewire/bebob/bebob_midi.c index 6f597d03e7c1..b1425bf98c3b 100644 --- a/sound/firewire/bebob/bebob_midi.c +++ b/sound/firewire/bebob/bebob_midi.c @@ -84,9 +84,9 @@ static void set_midi_substream_names(struct snd_bebob *bebob, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - bebob->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + bebob->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/dice/dice-midi.c b/sound/firewire/dice/dice-midi.c index 4c2998034313..78988e44b8bc 100644 --- a/sound/firewire/dice/dice-midi.c +++ b/sound/firewire/dice/dice-midi.c @@ -88,8 +88,8 @@ static void set_midi_substream_names(struct snd_dice *dice, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", dice->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", dice->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/digi00x/digi00x-midi.c b/sound/firewire/digi00x/digi00x-midi.c index 68eb8c39afa6..8f4bace16050 100644 --- a/sound/firewire/digi00x/digi00x-midi.c +++ b/sound/firewire/digi00x/digi00x-midi.c @@ -100,14 +100,14 @@ static void set_substream_names(struct snd_dg00x *dg00x, list_for_each_entry(subs, &str->substreams, list) { if (!is_console) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - dg00x->card->shortname, - subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + dg00x->card->shortname, + subs->number + 1); } else { - snprintf(subs->name, sizeof(subs->name), - "%s control", - dg00x->card->shortname); + scnprintf(subs->name, sizeof(subs->name), + "%s control", + dg00x->card->shortname); } } } diff --git a/sound/firewire/fireface/ff-midi.c b/sound/firewire/fireface/ff-midi.c index 25821d186b87..da3054fdcc7d 100644 --- a/sound/firewire/fireface/ff-midi.c +++ b/sound/firewire/fireface/ff-midi.c @@ -79,8 +79,8 @@ static void set_midi_substream_names(struct snd_rawmidi_str *stream, struct snd_rawmidi_substream *substream; list_for_each_entry(substream, &stream->substreams, list) { - snprintf(substream->name, sizeof(substream->name), - "%s MIDI %d", name, substream->number + 1); + scnprintf(substream->name, sizeof(substream->name), + "%s MIDI %d", name, substream->number + 1); } } diff --git a/sound/firewire/fireworks/fireworks_midi.c b/sound/firewire/fireworks/fireworks_midi.c index 84621e356848..350bf4d299c2 100644 --- a/sound/firewire/fireworks/fireworks_midi.c +++ b/sound/firewire/fireworks/fireworks_midi.c @@ -84,8 +84,8 @@ static void set_midi_substream_names(struct snd_efw *efw, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", efw->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", efw->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/motu/motu-midi.c b/sound/firewire/motu/motu-midi.c index 2365f7dfde26..eebc7e790ee2 100644 --- a/sound/firewire/motu/motu-midi.c +++ b/sound/firewire/motu/motu-midi.c @@ -88,8 +88,8 @@ static void set_midi_substream_names(struct snd_motu *motu, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", motu->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", motu->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/oxfw/oxfw-midi.c b/sound/firewire/oxfw/oxfw-midi.c index 775cba3f1f02..c215fa6f7a03 100644 --- a/sound/firewire/oxfw/oxfw-midi.c +++ b/sound/firewire/oxfw/oxfw-midi.c @@ -129,9 +129,9 @@ static void set_midi_substream_names(struct snd_oxfw *oxfw, struct snd_rawmidi_substream *subs; list_for_each_entry(subs, &str->substreams, list) { - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - oxfw->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + oxfw->card->shortname, subs->number + 1); } } diff --git a/sound/firewire/tascam/tascam-midi.c b/sound/firewire/tascam/tascam-midi.c index 02eed2dce435..c57fac4f1968 100644 --- a/sound/firewire/tascam/tascam-midi.c +++ b/sound/firewire/tascam/tascam-midi.c @@ -108,9 +108,9 @@ int snd_tscm_create_midi_devices(struct snd_tscm *tscm) /* TODO: support virtual MIDI ports. */ if (subs->number < tscm->spec->midi_capture_ports) { /* Hardware MIDI ports. */ - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - tscm->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + tscm->card->shortname, subs->number + 1); } } @@ -123,9 +123,9 @@ int snd_tscm_create_midi_devices(struct snd_tscm *tscm) list_for_each_entry(subs, &stream->substreams, list) { if (subs->number < tscm->spec->midi_playback_ports) { /* Hardware MIDI ports only. */ - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - tscm->card->shortname, subs->number + 1); + scnprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + tscm->card->shortname, subs->number + 1); } } -- cgit From 28329936d1e2ff4b962daca1c943f0150890d51e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:01 +0200 Subject: ALSA: cmipci: Fix -Wformat-truncation warning CMIPCI driver got compile warnings with -Wformat-truncation at a couple of plain sprintf() usages. Use scnprintf() for filling the longname string for avoiding the warnings. Link: https://lore.kernel.org/r/20230915082802.28684-13-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cmipci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 1415baac9c36..08e34b184780 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -3102,11 +3102,13 @@ static int snd_cmipci_create(struct snd_card *card, struct pci_dev *pci, } sprintf(card->shortname, "C-Media CMI%d", val); if (cm->chip_version < 68) - sprintf(modelstr, " (model %d)", cm->chip_version); + scnprintf(modelstr, sizeof(modelstr), + " (model %d)", cm->chip_version); else modelstr[0] = '\0'; - sprintf(card->longname, "%s%s at %#lx, irq %i", - card->shortname, modelstr, cm->iobase, cm->irq); + scnprintf(card->longname, sizeof(card->longname), + "%s%s at %#lx, irq %i", + card->shortname, modelstr, cm->iobase, cm->irq); if (cm->chip_version >= 39) { val = snd_cmipci_read_b(cm, CM_REG_MPU_PCI + 1); -- cgit From 5f6af0050a7a6f8d7972267ddd3a75970e13931e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 10:28:02 +0200 Subject: ALSA: hda: generic: Check potential mixer name string truncation add_control_with_pfx() constructs a mixer name element with the fixed size, and it got compile warnings with -Wformat-truncation. Although the size overflow is very unlikely, let's have a sanity check of the string size and returns the error if it really doesn't fit instead of silent truncation. Link: https://lore.kernel.org/r/20230915082802.28684-14-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index dbf7aa88e0e3..bf685d01259d 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -998,7 +998,11 @@ static int add_control_with_pfx(struct hda_gen_spec *spec, int type, const char *sfx, int cidx, unsigned long val) { char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); + int len; + + len = snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); + if (snd_BUG_ON(len >= sizeof(name))) + return -EINVAL; if (!add_control(spec, type, name, cidx, val)) return -ENOMEM; return 0; -- cgit From 60a9c7f7fb98163150964236e07bc57e731ad4f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:11 +0200 Subject: ALSA: ad1848: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/ad1848/ad1848.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index c471ac2aa450..401d8df28d87 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -96,13 +96,13 @@ static int snd_ad1848_probe(struct device *dev, unsigned int n) strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (!thinkpad[n]) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d", + chip->pcm->name, chip->port, irq[n], dma1[n]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d [Thinkpad]", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d [Thinkpad]", + chip->pcm->name, chip->port, irq[n], dma1[n]); error = snd_card_register(card); if (error < 0) -- cgit From ba8bb7dce1b2d2e3d582733f015778e3c31d042b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:12 +0200 Subject: ALSA: cs4231: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/cs423x/cs4231.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index 1e8923385366..c87be4be6df1 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -98,13 +98,13 @@ static int snd_cs4231_probe(struct device *dev, unsigned int n) strscpy(card->shortname, chip->pcm->name, sizeof(card->shortname)); if (dma2[n] < 0) - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d", - chip->pcm->name, chip->port, irq[n], dma1[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d", + chip->pcm->name, chip->port, irq[n], dma1[n]); else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %d, dma %d&%d", - chip->pcm->name, chip->port, irq[n], dma1[n], dma2[n]); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %d, dma %d&%d", + chip->pcm->name, chip->port, irq[n], dma1[n], dma2[n]); error = snd_wss_mixer(chip); if (error < 0) -- cgit From 322e0c500073ac4b2f74d8c850c0aeee81be8df3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Sep 2023 11:13:13 +0200 Subject: ALSA: riptide: Fix -Wformat-truncation warning for longname string The filling of card->longname can be gracefully truncated, as it's only informative. Use scnprintf() and suppress the superfluous compile warning with -Wformat-truncation. Link: https://lore.kernel.org/r/20230915091313.5988-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/riptide/riptide.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index b37c877c2c16..9dee0345f22c 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2105,15 +2105,15 @@ __snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id strcpy(card->driver, "RIPTIDE"); strcpy(card->shortname, "Riptide"); #ifdef SUPPORT_JOYSTICK - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x gameport 0x%x", - card->shortname, chip->port, chip->irq, chip->mpuaddr, - chip->opladdr, chip->gameaddr); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x gameport 0x%x", + card->shortname, chip->port, chip->irq, chip->mpuaddr, + chip->opladdr, chip->gameaddr); #else - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x", - card->shortname, chip->port, chip->irq, chip->mpuaddr, - chip->opladdr); + scnprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, irq %i mpu 0x%x opl3 0x%x", + card->shortname, chip->port, chip->irq, chip->mpuaddr, + chip->opladdr); #endif snd_riptide_proc_init(chip); err = snd_card_register(card); -- cgit From c04efbfd76d23157e64e6d6147518c187ab4233a Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 15 Sep 2023 02:13:44 +0000 Subject: ASoC: hdaudio.c: Add missing check for devm_kstrdup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because of the potential failure of the devm_kstrdup(), the dl[i].codecs->name could be NULL. Therefore, we need to check it and return -ENOMEM in order to transfer the error. Fixes: 97030a43371e ("ASoC: Intel: avs: Add HDAudio machine board") Signed-off-by: Chen Ni Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230915021344.3078-1-nichen@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/hdaudio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/avs/boards/hdaudio.c b/sound/soc/intel/avs/boards/hdaudio.c index cb00bc86ac94..8876558f19a1 100644 --- a/sound/soc/intel/avs/boards/hdaudio.c +++ b/sound/soc/intel/avs/boards/hdaudio.c @@ -55,6 +55,9 @@ static int avs_create_dai_links(struct device *dev, struct hda_codec *codec, int return -ENOMEM; dl[i].codecs->name = devm_kstrdup(dev, cname, GFP_KERNEL); + if (!dl[i].codecs->name) + return -ENOMEM; + dl[i].codecs->dai_name = pcm->name; dl[i].num_codecs = 1; dl[i].num_cpus = 1; -- cgit From b19a5733de255cabba5feecabf6e900638b582d1 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 15 Sep 2023 14:02:11 +0800 Subject: ASoC: imx-audmix: Fix return error with devm_clk_get() The devm_clk_get() can return -EPROBE_DEFER error, modify the error code to be -EINVAL is not correct, which cause the -EPROBE_DEFER error is not correctly handled. This patch is to fix the return error code. Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver") Signed-off-by: Shengjiu Wang Reviewed-by: Daniel Baluta Link: https://lore.kernel.org/r/1694757731-18308-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-audmix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 0b58df56f4da..aeb81aa61184 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -315,7 +315,7 @@ static int imx_audmix_probe(struct platform_device *pdev) if (IS_ERR(priv->cpu_mclk)) { ret = PTR_ERR(priv->cpu_mclk); dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret); - return -EINVAL; + return ret; } priv->audmix_pdev = audmix_pdev; -- cgit From e0b65f9b81fef180cf5f103adecbe5505c961153 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 13 Sep 2023 08:26:47 +0300 Subject: net: thunderbolt: Fix TCPv6 GSO checksum calculation Alex reported that running ssh over IPv6 does not work with Thunderbolt/USB4 networking driver. The reason for that is that driver should call skb_is_gso() before calling skb_is_gso_v6(), and it should not return false after calculates the checksum successfully. This probably was a copy paste error from the original driver where it was done properly. Reported-by: Alex Balcanquall Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/thunderbolt/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c index 0c1e8970ee58..0a53ec293d04 100644 --- a/drivers/net/thunderbolt/main.c +++ b/drivers/net/thunderbolt/main.c @@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, ip_hdr(skb)->protocol, 0); - } else if (skb_is_gso_v6(skb)) { + } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) { tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - return false; } else if (protocol == htons(ETH_P_IPV6)) { tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset; *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, -- cgit From 350db8a59eb392bf42e62b6b2a37d56b5833012b Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Wed, 13 Sep 2023 01:41:56 -0700 Subject: octeon_ep: fix tx dma unmap len values in SG Lengths of SG pointers are kept in the following order in the SG entries in hardware. 63 48|47 32|31 16|15 0 ----------------------------------------- | Len 0 | Len 1 | Len 2 | Len 3 | ----------------------------------------- | Ptr 0 | ----------------------------------------- | Ptr 1 | ----------------------------------------- | Ptr 2 | ----------------------------------------- | Ptr 3 | ----------------------------------------- Dma pointers have to be unmapped based on their respective lengths given in this format. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Shinas Rasheed Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 8 ++++---- drivers/net/ethernet/marvell/octeon_ep/octep_tx.c | 8 ++++---- drivers/net/ethernet/marvell/octeon_ep/octep_tx.h | 16 +++++++++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 4424de2ffd70..dbc518ff8276 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -734,13 +734,13 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, dma_map_sg_err: if (si > 0) { dma_unmap_single(iq->dev, sglist[0].dma_ptr[0], - sglist[0].len[0], DMA_TO_DEVICE); - sglist[0].len[0] = 0; + sglist[0].len[3], DMA_TO_DEVICE); + sglist[0].len[3] = 0; } while (si > 1) { dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3], - sglist[si >> 2].len[si & 3], DMA_TO_DEVICE); - sglist[si >> 2].len[si & 3] = 0; + sglist[si >> 2].len[3 - (si & 3)], DMA_TO_DEVICE); + sglist[si >> 2].len[3 - (si & 3)] = 0; si--; } tx_buffer->gather = 0; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c index 5a520d37bea0..d0adb82d65c3 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c @@ -69,12 +69,12 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) compl_sg++; dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[0], DMA_TO_DEVICE); + tx_buffer->sglist[0].len[3], DMA_TO_DEVICE); i = 1; /* entry 0 is main skb, unmapped above */ while (frags--) { dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE); i++; } @@ -131,13 +131,13 @@ static void octep_iq_free_pending(struct octep_iq *iq) dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[0], + tx_buffer->sglist[0].len[3], DMA_TO_DEVICE); i = 1; /* entry 0 is main skb, unmapped above */ while (frags--) { dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE); i++; } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h index 2ef57980eb47..21e75ff9f5e7 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -17,7 +17,21 @@ #define TX_BUFTYPE_NET_SG 2 #define NUM_TX_BUFTYPES 3 -/* Hardware format for Scatter/Gather list */ +/* Hardware format for Scatter/Gather list + * + * 63 48|47 32|31 16|15 0 + * ----------------------------------------- + * | Len 0 | Len 1 | Len 2 | Len 3 | + * ----------------------------------------- + * | Ptr 0 | + * ----------------------------------------- + * | Ptr 1 | + * ----------------------------------------- + * | Ptr 2 | + * ----------------------------------------- + * | Ptr 3 | + * ----------------------------------------- + */ struct octep_tx_sglist_desc { u16 len[4]; dma_addr_t dma_ptr[4]; -- cgit From 2ba157983974ae1b6aaef7d4953812020d6f1eb5 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 11 Sep 2023 15:03:24 +0200 Subject: drm/tests: Fix incorrect argument in drm_test_mm_insert_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While drm_mm test was converted form igt selftest to kunit, unexpected value of "end" argument equal "start" was introduced to one of calls to a function that executes the drm_test_mm_insert_range for specific start/end pair of arguments. As a consequence, DRM_MM_BUG_ON(end <= start) is triggered. Fix it by restoring the original value. Fixes: fc8d29e298cf ("drm: selftest: convert drm_mm selftest to KUnit") Signed-off-by: Janusz Krzysztofik Cc: "Maíra Canal" Cc: Arthur Grillo Cc: Javier Martinez Canillas Cc: Daniel Latypov Cc: stable@vger.kernel.org # v6.1+ Reviewed-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20230911130323.7037-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/tests/drm_mm_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 186b28dc7038..05d5e7af6d25 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -939,7 +939,7 @@ static void drm_test_mm_insert_range(struct kunit *test) KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max - 1)); KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max / 2)); KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, - max / 2, max / 2)); + max / 2, max)); KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, max / 4 + 1, 3 * max / 4 - 1)); -- cgit From 4506f23e117161a20104c8fa04f33e1ca63c26af Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 13 Jul 2023 15:54:16 -0400 Subject: NFSv4.1: fix zero value filehandle in post open getattr Currently, if the OPEN compound experiencing an error and needs to get the file attributes separately, it will send a stand alone GETATTR but it would use the filehandle from the results of the OPEN compound. In case of the CLAIM_FH OPEN, nfs_openres's fh is zero value. That generate a GETATTR that's sent with a zero value filehandle, and results in the server returning an error. Instead, for the CLAIM_FH OPEN, take the filehandle that was used in the PUTFH of the OPEN compound. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 794343790ea8..3508d8238826 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2703,8 +2703,12 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, return status; } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { + struct nfs_fh *fh = &o_res->fh; + nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL); + if (o_arg->claim == NFS4_OPEN_CLAIM_FH) + fh = NFS_FH(d_inode(data->dentry)); + nfs4_proc_getattr(server, fh, o_res->f_attr, NULL); } return 0; } -- cgit From bd2767ec3df2775bc336f441f9068a989ccb919d Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 23 Aug 2023 09:59:48 +0530 Subject: coresight: Fix run time warnings while reusing ETR buffer Fix the below warning by avoding calls to tmc_etr_enable_hw, if we are reusing the ETR buffer for multiple sources in sysfs mode. echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink echo 1 > /sys/bus/coresight/devices/ete1/enable_source echo 1 > /sys/bus/coresight/devices/ete2/enable_source [ 166.918290] ------------[ cut here ]------------ [ 166.922905] WARNING: CPU: 4 PID: 2288 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1037 tmc_etr_enable_hw+0xb0/0xc8 [ 166.933862] Modules linked in: [ 166.936911] CPU: 4 PID: 2288 Comm: bash Not tainted 6.5.0-rc7 #132 [ 166.943084] Hardware name: Marvell CN106XX board (DT) [ 166.948127] pstate: 834000c9 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--) [ 166.955083] pc : tmc_etr_enable_hw+0xb0/0xc8 [ 166.959345] lr : tmc_enable_etr_sink+0x134/0x210 snip.. 167.038545] Call trace: [ 167.040982] tmc_etr_enable_hw+0xb0/0xc8 [ 167.044897] tmc_enable_etr_sink+0x134/0x210 [ 167.049160] coresight_enable_path+0x160/0x278 [ 167.053596] coresight_enable+0xd4/0x298 [ 167.057510] enable_source_store+0x54/0xa0 [ 167.061598] dev_attr_store+0x20/0x40 [ 167.065254] sysfs_kf_write+0x4c/0x68 [ 167.068909] kernfs_fop_write_iter+0x128/0x200 [ 167.073345] vfs_write+0x1ac/0x2f8 [ 167.076739] ksys_write+0x74/0x110 [ 167.080132] __arm64_sys_write+0x24/0x38 [ 167.084045] invoke_syscall.constprop.0+0x58/0xf8 [ 167.088744] do_el0_svc+0x60/0x160 [ 167.092137] el0_svc+0x40/0x170 [ 167.095273] el0t_64_sync_handler+0x100/0x130 [ 167.099621] el0t_64_sync+0x190/0x198 [ 167.103277] ---[ end trace 0000000000000000 ]--- -bash: echo: write error: Device or resource busy Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR") Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230823042948.12879-1-lcherian@marvell.com --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 66dc5f97a009..6132c5b3db9c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1173,16 +1173,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) goto out; } - /* - * In sysFS mode we can have multiple writers per sink. Since this - * sink is already enabled no memory is needed and the HW need not be - * touched, even if the buffer size has changed. - */ - if (drvdata->mode == CS_MODE_SYSFS) { - atomic_inc(&csdev->refcnt); - goto out; - } - /* * If we don't have a buffer or it doesn't match the requested size, * use the buffer allocated above. Otherwise reuse the existing buffer. @@ -1204,7 +1194,7 @@ out: static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { - int ret; + int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev); @@ -1213,12 +1203,24 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) return PTR_ERR(sysfs_buf); spin_lock_irqsave(&drvdata->spinlock, flags); + + /* + * In sysFS mode we can have multiple writers per sink. Since this + * sink is already enabled no memory is needed and the HW need not be + * touched, even if the buffer size has changed. + */ + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(&csdev->refcnt); + goto out; + } + ret = tmc_etr_enable_hw(drvdata, sysfs_buf); if (!ret) { drvdata->mode = CS_MODE_SYSFS; atomic_inc(&csdev->refcnt); } +out: spin_unlock_irqrestore(&drvdata->spinlock, flags); if (!ret) -- cgit From 5f8456b1faefb06fcf6028dced9f37aa880c779d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 30 Aug 2023 14:56:34 -0500 Subject: arm64: dts: mediatek: Fix "mediatek,merge-mute" and "mediatek,merge-fifo-en" types "mediatek,merge-mute" and "mediatek,merge-fifo-en" properties are defined and used as boolean properties which in DT have no value. Signed-off-by: Rob Herring Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230830195650.704737-1-robh@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 4dbbf8fdab75..a9e52b50c8c4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2957,7 +2957,7 @@ clock-names = "merge","merge_async"; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xc000 0x1000>; - mediatek,merge-mute = <1>; + mediatek,merge-mute; resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE0_DL_ASYNC>; }; @@ -2970,7 +2970,7 @@ clock-names = "merge","merge_async"; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xd000 0x1000>; - mediatek,merge-mute = <1>; + mediatek,merge-mute; resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE1_DL_ASYNC>; }; @@ -2983,7 +2983,7 @@ clock-names = "merge","merge_async"; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xe000 0x1000>; - mediatek,merge-mute = <1>; + mediatek,merge-mute; resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE2_DL_ASYNC>; }; @@ -2996,7 +2996,7 @@ clock-names = "merge","merge_async"; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xf000 0x1000>; - mediatek,merge-mute = <1>; + mediatek,merge-mute; resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE3_DL_ASYNC>; }; @@ -3009,7 +3009,7 @@ clock-names = "merge","merge_async"; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x0000 0x1000>; - mediatek,merge-fifo-en = <1>; + mediatek,merge-fifo-en; resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE4_DL_ASYNC>; }; -- cgit From c8de44b577eb540e8bfea55afe1d0904bb571b7a Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 7 Aug 2023 14:59:40 +0200 Subject: iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set Prevent schedule operations for adminq during device remove and when __IAVF_IN_REMOVE_TASK flag is set. Currently, the iavf_down function adds operations for adminq that shouldn't be processed when the device is in the __IAVF_REMOVE state. Reproduction: echo 4 > /sys/bus/pci/devices/0000:17:00.0/sriov_numvfs ip link set dev ens1f0 vf 0 trust on ip link set dev ens1f0 vf 1 trust on ip link set dev ens1f0 vf 2 trust on ip link set dev ens1f0 vf 3 trust on ip link set dev ens1f0 vf 0 mac 00:22:33:44:55:66 ip link set dev ens1f0 vf 1 mac 00:22:33:44:55:67 ip link set dev ens1f0 vf 2 mac 00:22:33:44:55:68 ip link set dev ens1f0 vf 3 mac 00:22:33:44:55:69 echo 0000:17:02.0 > /sys/bus/pci/devices/0000\:17\:02.0/driver/unbind echo 0000:17:02.1 > /sys/bus/pci/devices/0000\:17\:02.1/driver/unbind echo 0000:17:02.2 > /sys/bus/pci/devices/0000\:17\:02.2/driver/unbind echo 0000:17:02.3 > /sys/bus/pci/devices/0000\:17\:02.3/driver/unbind sleep 10 echo 0000:17:02.0 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.1 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.2 > /sys/bus/pci/drivers/iavf/bind echo 0000:17:02.3 > /sys/bus/pci/drivers/iavf/bind modprobe vfio-pci echo 8086 154c > /sys/bus/pci/drivers/vfio-pci/new_id qemu-system-x86_64 -accel kvm -m 4096 -cpu host \ -drive file=centos9.qcow2,if=none,id=virtio-disk0 \ -device virtio-blk-pci,drive=virtio-disk0,bootindex=0 -smp 4 \ -device vfio-pci,host=17:02.0 -net none \ -device vfio-pci,host=17:02.1 -net none \ -device vfio-pci,host=17:02.2 -net none \ -device vfio-pci,host=17:02.3 -net none \ -daemonize -vnc :5 Current result: There is a probability that the mac of VF in guest is inconsistent with it in host Expected result: When passthrough NIC VF to guest, the VF in guest should always get the same mac as it in host. Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage") Signed-off-by: Radoslaw Tyl Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7b300c86ceda..b23ca9d80189 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1421,7 +1421,8 @@ void iavf_down(struct iavf_adapter *adapter) iavf_clear_fdir_filters(adapter); iavf_clear_adv_rss_conf(adapter); - if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) { + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && + !(test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))) { /* cancel any current operation */ adapter->current_op = VIRTCHNL_OP_UNKNOWN; /* Schedule operations to close down the HW. Don't wait -- cgit From ed4cad33df9e272feaa6698b33359b29c2929564 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Thu, 7 Sep 2023 17:02:50 +0200 Subject: iavf: add iavf_schedule_aq_request() helper Add helper for set iavf aq request AVF_FLAG_AQ_* and immediately schedule watchdog_task. Helper will be used in cases where it is necessary to run aq requests asap Signed-off-by: Petr Oros Co-developed-by: Michal Schmidt Signed-off-by: Michal Schmidt Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 85fba85fbb23..e110ba346185 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -521,7 +521,7 @@ void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags); -void iavf_schedule_request_stats(struct iavf_adapter *adapter); +void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags); void iavf_schedule_finish_config(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index a34303ad057d..90397293525f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -362,7 +362,7 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, unsigned int i; /* Explicitly request stats refresh */ - iavf_schedule_request_stats(adapter); + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_REQUEST_STATS); iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b23ca9d80189..4b02a8cd77e9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -314,15 +314,13 @@ void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags) } /** - * iavf_schedule_request_stats - Set the flags and schedule statistics request + * iavf_schedule_aq_request - Set the flags and schedule aq request * @adapter: board private structure - * - * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly - * request and refresh ethtool stats + * @flags: requested aq flags **/ -void iavf_schedule_request_stats(struct iavf_adapter *adapter) +void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags) { - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS; + adapter->aq_required |= flags; mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } -- cgit From c923e7759a29cf67aa4dda77b816263771380f86 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 15 Sep 2023 15:43:00 +0100 Subject: ASoC: cs42l43: Add shared IRQ flag for shutters The microphone and speaker shutters on cs42l43 can be configured to trigger from the same GPIO, in this case the current code returns an error as we attempt to request two IRQ handlers for the same IRQ. Fix this by always requesting the shutter IRQs with the IRQF_SHARED flag. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230915144300.120100-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 1a95c370fc4c..5643c666d7d0 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2077,7 +2077,8 @@ static const struct cs42l43_irq cs42l43_irqs[] = { static int cs42l43_request_irq(struct cs42l43_codec *priv, struct irq_domain *dom, const char * const name, - unsigned int irq, irq_handler_t handler) + unsigned int irq, irq_handler_t handler, + unsigned long flags) { int ret; @@ -2087,8 +2088,8 @@ static int cs42l43_request_irq(struct cs42l43_codec *priv, dev_dbg(priv->dev, "Request IRQ %d for %s\n", ret, name); - ret = devm_request_threaded_irq(priv->dev, ret, NULL, handler, IRQF_ONESHOT, - name, priv); + ret = devm_request_threaded_irq(priv->dev, ret, NULL, handler, + IRQF_ONESHOT | flags, name, priv); if (ret) return dev_err_probe(priv->dev, ret, "Failed to request IRQ %s\n", name); @@ -2124,11 +2125,11 @@ static int cs42l43_shutter_irq(struct cs42l43_codec *priv, return 0; } - ret = cs42l43_request_irq(priv, dom, close_name, close_irq, handler); + ret = cs42l43_request_irq(priv, dom, close_name, close_irq, handler, IRQF_SHARED); if (ret) return ret; - return cs42l43_request_irq(priv, dom, open_name, open_irq, handler); + return cs42l43_request_irq(priv, dom, open_name, open_irq, handler, IRQF_SHARED); } static int cs42l43_codec_probe(struct platform_device *pdev) @@ -2178,7 +2179,8 @@ static int cs42l43_codec_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) { ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name, - cs42l43_irqs[i].irq, cs42l43_irqs[i].handler); + cs42l43_irqs[i].irq, + cs42l43_irqs[i].handler, 0); if (ret) goto err_pm; } -- cgit From e0f96246c4402514acda040be19ee24c1619e01a Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 15 Sep 2023 16:41:53 +0300 Subject: ASoC: SOF: Intel: MTL: Reduce the DSP init timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 20s seems unnecessarily large for the DSP init timeout. This coupled with multiple FW boot attempts causes an excessive delay in the error path when booting in recovery mode. Reduce it to 0.5s and use the existing HDA_DSP_INIT_TIMEOUT_US. Link: https://github.com/thesofproject/linux/issues/4565 Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230915134153.9688-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/mtl.c | 2 +- sound/soc/sof/intel/mtl.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c index b84ca58da9d5..f9412517eaf2 100644 --- a/sound/soc/sof/intel/mtl.c +++ b/sound/soc/sof/intel/mtl.c @@ -460,7 +460,7 @@ int mtl_dsp_cl_init(struct snd_sof_dev *sdev, int stream_tag, bool imr_boot) /* step 3: wait for IPC DONE bit from ROM */ ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, chip->ipc_ack, status, ((status & chip->ipc_ack_mask) == chip->ipc_ack_mask), - HDA_DSP_REG_POLL_INTERVAL_US, MTL_DSP_PURGE_TIMEOUT_US); + HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_INIT_TIMEOUT_US); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, "timeout waiting for purge IPC done\n"); diff --git a/sound/soc/sof/intel/mtl.h b/sound/soc/sof/intel/mtl.h index 02181490f12a..95696b3d7c4c 100644 --- a/sound/soc/sof/intel/mtl.h +++ b/sound/soc/sof/intel/mtl.h @@ -62,7 +62,6 @@ #define MTL_DSP_IRQSTS_IPC BIT(0) #define MTL_DSP_IRQSTS_SDW BIT(6) -#define MTL_DSP_PURGE_TIMEOUT_US 20000000 /* 20s */ #define MTL_DSP_REG_POLL_INTERVAL_US 10 /* 10 us */ /* Memory windows */ -- cgit From 31bb7bd9ffee50d09ec931998b823a86132ab807 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Sep 2023 15:40:15 +0300 Subject: ASoC: SOF: core: Only call sof_ops_free() on remove if the probe was successful All the fail paths during probe will free up the ops, on remove we should only free it if the probe was successful. Fixes: bc433fd76fae ("ASoC: SOF: Add ops_free") Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Rander Wang Link: https://lore.kernel.org/r/20230915124015.19637-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 30db685cc5f4..2d1616b81485 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -486,10 +486,9 @@ int snd_sof_device_remove(struct device *dev) snd_sof_ipc_free(sdev); snd_sof_free_debug(sdev); snd_sof_remove(sdev); + sof_ops_free(sdev); } - sof_ops_free(sdev); - /* release firmware */ snd_sof_fw_unload(sdev); -- cgit From 5f3d319a248654a805bafc9e7094bcea47dac6c7 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Thu, 7 Sep 2023 17:02:51 +0200 Subject: iavf: schedule a request immediately after add/delete vlan When the iavf driver wants to reconfigure the VLAN filters (iavf_add_vlan, iavf_del_vlan), it sets a flag in aq_required: adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; or: adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; This is later processed by the watchdog_task, but it runs periodically every 2 seconds, so it can be a long time before it processes the request. In the worst case, the interface is unable to receive traffic for more than 2 seconds for no objective reason. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Petr Oros Co-developed-by: Michal Schmidt Signed-off-by: Michal Schmidt Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Reviewed-by: Ahmed Zaki Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4b02a8cd77e9..6a2e6d64bc3a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -821,7 +821,7 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->vlan_filter_list); f->state = IAVF_VLAN_ADD; adapter->num_vlan_filters++; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } clearout: @@ -843,7 +843,7 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) f = iavf_find_vlan(adapter, vlan); if (f) { f->state = IAVF_VLAN_REMOVE; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; + iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } spin_unlock_bh(&adapter->mac_vlan_list_lock); -- cgit From d0d362ffa33da4acdcf7aee2116ceef8c8fef658 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 7 Sep 2023 17:44:57 +0200 Subject: i40e: Fix VF VLAN offloading when port VLAN is configured If port VLAN is configured on a VF then any other VLANs on top of this VF are broken. During i40e_ndo_set_vf_port_vlan() call the i40e driver reset the VF and iavf driver asks PF (using VIRTCHNL_OP_GET_VF_RESOURCES) for VF capabilities but this reset occurs too early, prior setting of vf->info.pvid field and because this field can be zero during i40e_vc_get_vf_resources_msg() then VIRTCHNL_VF_OFFLOAD_VLAN capability is reported to iavf driver. This is wrong because iavf driver should not report VLAN offloading capability when port VLAN is configured as i40e does not support QinQ offloading. Fix the issue by moving VF reset after setting of vf->port_vlan_id field. Without this patch: $ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs $ ip link set enp2s0f0 vf 0 vlan 3 $ ip link set enp2s0f0v0 up $ ip link add link enp2s0f0v0 name vlan4 type vlan id 4 $ ip link set vlan4 up ... $ ethtool -k enp2s0f0v0 | grep vlan-offload rx-vlan-offload: on tx-vlan-offload: on $ dmesg -l err | grep iavf [1292500.742914] iavf 0000:02:02.0: Failed to add VLAN filter, error IAVF_ERR_INVALID_QP_ID With this patch: $ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs $ ip link set enp2s0f0 vf 0 vlan 3 $ ip link set enp2s0f0v0 up $ ip link add link enp2s0f0v0 name vlan4 type vlan id 4 $ ip link set vlan4 up ... $ ethtool -k enp2s0f0v0 | grep vlan-offload rx-vlan-offload: off [requested on] tx-vlan-offload: off [requested on] $ dmesg -l err | grep iavf Fixes: f9b4b6278d51 ("i40e: Reset the VF upon conflicting VLAN configuration") Signed-off-by: Ivan Vecera Reviewed-by: Jesse Brandeburg Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8ea1a238dcef..d3d6415553ed 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4475,9 +4475,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, goto error_pvid; i40e_vlan_stripping_enable(vsi); - i40e_vc_reset_vf(vf, true); - /* During reset the VF got a new VSI, so refresh a pointer. */ - vsi = pf->vsi[vf->lan_vsi_idx]; + /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); @@ -4563,6 +4561,10 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, */ vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); + i40e_vc_reset_vf(vf, true); + /* During reset the VF got a new VSI, so refresh a pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; + ret = i40e_config_vf_promiscuous_mode(vf, vsi->id, allmulti, alluni); if (ret) { dev_err(&pf->pdev->dev, "Unable to config vf promiscuous mode\n"); -- cgit From 837723b22a63cfbff584655b009b9d488d0e9087 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 30 Aug 2023 03:07:43 +0200 Subject: netfilter, bpf: Adjust timeouts of non-confirmed CTs in bpf_ct_insert_entry() bpf_nf testcase fails on s390x: bpf_skb_ct_lookup() cannot find the entry that was added by bpf_ct_insert_entry() within the same BPF function. The reason is that this entry is deleted by nf_ct_gc_expired(). The CT timeout starts ticking after the CT confirmation; therefore nf_conn.timeout is initially set to the timeout value, and __nf_conntrack_confirm() sets it to the deadline value. bpf_ct_insert_entry() sets IPS_CONFIRMED_BIT, but does not adjust the timeout, making its value meaningless and causing false positives. Fix the problem by making bpf_ct_insert_entry() adjust the timeout, like __nf_conntrack_confirm(). Fixes: 2cdaa3eefed8 ("netfilter: conntrack: restore IPS_CONFIRMED out of nf_conntrack_hash_check_insert()") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Cc: Florian Westphal Link: https://lore.kernel.org/bpf/20230830011128.1415752-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- net/netfilter/nf_conntrack_bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index c7a6114091ae..b21799d468d2 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -381,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + if (!nf_ct_is_confirmed(nfct)) + nfct->timeout += nfct_time_stamp; nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { -- cgit From dca7acd84e93f2881e3f63465bbb5d89a40b5d17 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 13 Sep 2023 21:59:43 +0800 Subject: bpf: Skip unit_size checking for global per-cpu allocator For global per-cpu allocator, the size of free object in free list doesn't match with unit_size and now there is no way to get the size of per-cpu pointer saved in free object, so just skip the checking. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/bpf/20230913133436.0eeec4cb@canb.auug.org.au/ Signed-off-by: Hou Tao Tested-by: Biju Das Link: https://lore.kernel.org/r/20230913135943.3137292-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 1c22b90e754a..cf1941516643 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -491,6 +491,13 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) struct llist_node *first; unsigned int obj_size; + /* For per-cpu allocator, the size of free objects in free list doesn't + * match with unit_size and now there is no way to get the size of + * per-cpu pointer saved in free object, so just skip the checking. + */ + if (c->percpu_size) + return 0; + first = c->free_llist.first; if (!first) return 0; -- cgit From 57eb5e1c5c57972c95e8efab6bc81b87161b0b07 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 12:14:20 +0200 Subject: bpf: Fix uprobe_multi get_pid_task error path Dan reported Smatch static checker warning due to missing error value set in uprobe multi link's get_pid_task error path. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/bpf/c5ffa7c0-6b06-40d5-aca2-63833b5cd9af@moroto.mountain/ Signed-off-by: Jiri Olsa Reviewed-by: Song Liu Link: https://lore.kernel.org/r/20230915101420.1193800-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c1c1af63ced2..868008f56fec 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3223,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); rcu_read_unlock(); - if (!task) + if (!task) { + err = -ESRCH; goto error_path_put; + } } err = -ENOMEM; -- cgit From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 10:34:27 -0700 Subject: bpf: Fix BTF_ID symbol generation collision Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Jiri Olsa Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a3462a9b8e18..a9cb10b0e2e9 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -49,7 +49,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing -- cgit From c0bb9fb0e52a64601d38b3739b729d9138d4c8a1 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 15 Sep 2023 10:34:28 -0700 Subject: bpf: Fix BTF_ID symbol generation collision in tools/ Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Co-developed-by: Jiri Olsa Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-2-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- tools/include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 71e54b1e3796..2f882d5cb30f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -38,7 +38,7 @@ asm( \ ____BTF_ID(symbol) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing -- cgit From c656a4d5484ad99e97de549a9affc12a91d94963 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Sep 2023 15:46:08 -0400 Subject: Revert "SUNRPC: clean up integer overflow check" This reverts commit e87cf8a28e7592bd19064e8181324ae26bc02932. This commit was added to silence a tautological comparison warning, but removing the 'len' value check before calling xdr_inline_decode() is really not what we want. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5b4fb3c791bc..896a6d2a9cf0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,9 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; if (array == NULL) -- cgit From 993b5662f302628db4eb358d69b2720c88cbfaf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Sep 2023 16:12:33 -0400 Subject: SUNRPC: Silence compiler complaints about tautological comparisons On 64-bit systems, the compiler will complain that the comparison between SIZE_MAX and the 32-bit unsigned int 'len' is unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 896a6d2a9cf0..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) -- cgit From b2ce0027d7b2905495021c5208f92043eb493146 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 16 Sep 2023 08:07:25 +0200 Subject: ALSA: rawmidi: Fix NULL dereference at proc read At the implementation of the optional proc fs in rawmidi, I forgot that rmidi->ops itself is optional and can be NULL. Add the proper NULL check for avoiding the Oops. Fixes: fa030f666d24 ("ALSA: ump: Additional proc output") Reported-and-tested-by: Mark Hills Closes: https://lore.kernel.org/r/ef9118c3-a2eb-d0ff-1efa-cc5fb6416bde@xwax.org Cc: Link: https://lore.kernel.org/r/20230916060725.11726-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index ba06484ac4aa..1431cb997808 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1770,7 +1770,7 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, if (IS_ENABLED(CONFIG_SND_UMP)) snd_iprintf(buffer, "Type: %s\n", rawmidi_is_ump(rmidi) ? "UMP" : "Legacy"); - if (rmidi->ops->proc_read) + if (rmidi->ops && rmidi->ops->proc_read) rmidi->ops->proc_read(entry, buffer); mutex_lock(&rmidi->open_mutex); if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_OUTPUT) { -- cgit From 8f6b846b0a86c3cbae8a25b772651cfc2270ad0a Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 14 Sep 2023 18:02:52 -0400 Subject: ionic: fix 16bit math issue when PAGE_SIZE >= 64KB The ionic device supports a maximum buffer length of 16 bits (see ionic_rxq_desc or ionic_rxq_sg_elem). When adding new buffers to the receive rings, the function ionic_rx_fill() uses 16bit math when calculating the number of pages to allocate for an RX descriptor, given the interface's MTU setting. If the system PAGE_SIZE >= 64KB, and the buf_info->page_offset is 0, the remain_len value will never decrement from the original MTU value and the frag_len value will always be 0, causing additional pages to be allocated as scatter- gather elements unnecessarily. A similar math issue exists in ionic_rx_frags(), but no failures have been observed here since a 64KB page should not normally require any scatter-gather elements at any legal Ethernet MTU size. Fixes: 4b0a7539a372 ("ionic: implement Rx page reuse") Signed-off-by: David Christensen Reviewed-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_dev.h | 1 + drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 6aac98bcb9f4..aae4131f146a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -187,6 +187,7 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg); +#define IONIC_MAX_BUF_LEN ((u16)-1) #define IONIC_PAGE_SIZE PAGE_SIZE #define IONIC_PAGE_SPLIT_SZ (PAGE_SIZE / 2) #define IONIC_PAGE_GFP_MASK (GFP_ATOMIC | __GFP_NOWARN |\ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 26798fc635db..44466e8c5d77 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -207,7 +207,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, return NULL; } - frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - buf_info->page_offset)); len -= frag_len; dma_sync_single_for_cpu(dev, @@ -452,7 +453,8 @@ void ionic_rx_fill(struct ionic_queue *q) /* fill main descriptor - buf[0] */ desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); - frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - buf_info->page_offset)); desc->len = cpu_to_le16(frag_len); remain_len -= frag_len; buf_info++; @@ -471,7 +473,9 @@ void ionic_rx_fill(struct ionic_queue *q) } sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); - frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset); + frag_len = min_t(u16, remain_len, min_t(u32, IONIC_MAX_BUF_LEN, + IONIC_PAGE_SIZE - + buf_info->page_offset)); sg_elem->len = cpu_to_le16(frag_len); remain_len -= frag_len; buf_info++; -- cgit From aabb4af9bb29f8532e19c872b48ad1e7fd208617 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Sep 2023 14:09:57 +0300 Subject: net: core: Use the bitmap API to allocate bitmaps Use bitmap_zalloc() and bitmap_free() instead of hand-writing them. It is less verbose and it improves the type checking and semantic. While at it, add missing header inclusion (should be bitops.h, but with the above change it becomes bitmap.h). Suggested-by: Sergey Ryazanov Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230911154534.4174265-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index ccff2b6ef958..85df22f05c38 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -69,7 +69,7 @@ */ #include -#include +#include #include #include #include @@ -1080,7 +1080,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) return -EINVAL; /* Use one page as a bit array of possible slots */ - inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); + inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); if (!inuse) return -ENOMEM; @@ -1109,7 +1109,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } i = find_first_zero_bit(inuse, max_netdevices); - free_page((unsigned long) inuse); + bitmap_free(inuse); } snprintf(buf, IFNAMSIZ, name, i); -- cgit From cb47b1f679c4d83a5fa5f1852e472f844e41a3da Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 13 Sep 2023 11:06:15 -0700 Subject: igc: Fix infinite initialization loop with early XDP redirect When an XDP redirect happens before the link is ready, that transmission will not finish and will timeout, causing an adapter reset. If the redirects do not stop, the adapter will not stop resetting. Wait for the driver to signal that there's a carrier before allowing transmissions to proceed. Previous code was relying that when __IGC_DOWN is cleared, the NIC is ready to transmit as all the queues are ready, what happens is that the carrier presence will only be signaled later, after the watchdog workqueue has a chance to run. And during this interval (between clearing __IGC_DOWN and the watchdog running) if any transmission happens the timeout is emitted (detected by igc_tx_timeout()) which causes the reset, with the potential for the infinite loop. Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Reported-by: Ferenc Fejes Closes: https://lore.kernel.org/netdev/0caf33cf6adb3a5bf137eeaa20e89b167c9986d5.camel@ericsson.com/ Signed-off-by: Vinicius Costa Gomes Tested-by: Ferenc Fejes Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 293b45717683..98de34d0ce07 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6491,7 +6491,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, struct igc_ring *ring; int i, drops; - if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) + if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) -- cgit From 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 14 Sep 2023 22:12:57 -0700 Subject: ipv4: fix null-deref in ipv4_link_failure Currently, we assume the skb is associated with a device before calling __ip_options_compile, which is not always the case if it is re-routed by ipvs. When skb->dev is NULL, dev_net(skb->dev) will become null-dereference. This patch adds a check for the edge case and switch to use the net_device from the rtable when skb->dev is NULL. Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Suggested-by: David Ahern Signed-off-by: Kyle Zeng Cc: Stephen Suryaputra Cc: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66f419e7f9a7..a57062283219 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) -- cgit From f4f82c52a0ead5ab363d207d06f81b967d09ffb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Sep 2023 17:11:11 +0000 Subject: scsi: iscsi_tcp: restrict to TCP sockets Nothing prevents iscsi_sw_tcp_conn_bind() to receive file descriptor pointing to non TCP socket (af_unix for example). Return -EINVAL if this is attempted, instead of crashing the kernel. Fixes: 7ba247138907 ("[SCSI] open-iscsi/linux-iscsi-5 Initiator: Initiator code") Signed-off-by: Eric Dumazet Cc: Lee Duncan Cc: Chris Leech Cc: Mike Christie Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: open-iscsi@googlegroups.com Cc: linux-scsi@vger.kernel.org Reviewed-by: Mike Christie Signed-off-by: David S. Miller --- drivers/scsi/iscsi_tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 9ab8555180a3..8e14cea15f98 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -724,6 +724,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, return -EEXIST; } + err = -EINVAL; + if (!sk_is_tcp(sock->sk)) + goto free_socket; + err = iscsi_conn_bind(cls_session, cls_conn, is_leading); if (err) goto free_socket; -- cgit From 4a73fca226925d5cae8ee032d37e9de637b25ed6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Sep 2023 11:45:34 -0700 Subject: perf bpf-prologue: Remove unused file Commit 3d6dfae88917 ("perf parse-events: Remove BPF event support") removed building bpf-prologue.c but failed to remove the actual file. Fixes: 3d6dfae88917 ("perf parse-events: Remove BPF event support") Signed-off-by: Ian Rogers Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230913184534.227961-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf-prologue.c | 508 ----------------------------------------- 1 file changed, 508 deletions(-) delete mode 100644 tools/perf/util/bpf-prologue.c diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c deleted file mode 100644 index 9887ae09242d..000000000000 --- a/tools/perf/util/bpf-prologue.c +++ /dev/null @@ -1,508 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-prologue.c - * - * Copyright (C) 2015 He Kuang - * Copyright (C) 2015 Wang Nan - * Copyright (C) 2015 Huawei Inc. - */ - -#include -#include "debug.h" -#include "bpf-loader.h" -#include "bpf-prologue.h" -#include "probe-finder.h" -#include -#include -#include -#include - -#define BPF_REG_SIZE 8 - -#define JMP_TO_ERROR_CODE -1 -#define JMP_TO_SUCCESS_CODE -2 -#define JMP_TO_USER_CODE -3 - -struct bpf_insn_pos { - struct bpf_insn *begin; - struct bpf_insn *end; - struct bpf_insn *pos; -}; - -static inline int -pos_get_cnt(struct bpf_insn_pos *pos) -{ - return pos->pos - pos->begin; -} - -static int -append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) -{ - if (!pos->pos) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - if (pos->pos + 1 >= pos->end) { - pr_err("bpf prologue: prologue too long\n"); - pos->pos = NULL; - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - } - - *(pos->pos)++ = new_insn; - return 0; -} - -static int -check_pos(struct bpf_insn_pos *pos) -{ - if (!pos->pos || pos->pos >= pos->end) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - return 0; -} - -/* - * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM - * instruction (BPF_{B,H,W,DW}). - */ -static int -argtype_to_ldx_size(const char *type) -{ - int arg_size = type ? atoi(&type[1]) : 64; - - switch (arg_size) { - case 8: - return BPF_B; - case 16: - return BPF_H; - case 32: - return BPF_W; - case 64: - default: - return BPF_DW; - } -} - -static const char * -insn_sz_to_str(int insn_sz) -{ - switch (insn_sz) { - case BPF_B: - return "BPF_B"; - case BPF_H: - return "BPF_H"; - case BPF_W: - return "BPF_W"; - case BPF_DW: - return "BPF_DW"; - default: - return "UNKNOWN"; - } -} - -/* Give it a shorter name */ -#define ins(i, p) append_insn((i), (p)) - -/* - * Give a register name (in 'reg'), generate instruction to - * load register into an eBPF register rd: - * 'ldd target_reg, offset(ctx_reg)', where: - * ctx_reg is pre initialized to pointer of 'struct pt_regs'. - */ -static int -gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, - const char *reg, int target_reg) -{ - int offset = regs_query_register_offset(reg); - - if (offset < 0) { - pr_err("bpf: prologue: failed to get register %s\n", - reg); - return offset; - } - ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); - - return check_pos(pos); -} - -/* - * Generate a BPF_FUNC_probe_read function call. - * - * src_base_addr_reg is a register holding base address, - * dst_addr_reg is a register holding dest address (on stack), - * result is: - * - * *[dst_addr_reg] = *([src_base_addr_reg] + offset) - * - * Arguments of BPF_FUNC_probe_read: - * ARG1: ptr to stack (dest) - * ARG2: size (8) - * ARG3: unsafe ptr (src) - */ -static int -gen_read_mem(struct bpf_insn_pos *pos, - int src_base_addr_reg, - int dst_addr_reg, - long offset, - int probeid) -{ - /* mov arg3, src_base_addr_reg */ - if (src_base_addr_reg != BPF_REG_ARG3) - ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); - /* add arg3, #offset */ - if (offset) - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); - - /* mov arg2, #reg_size */ - ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); - - /* mov arg1, dst_addr_reg */ - if (dst_addr_reg != BPF_REG_ARG1) - ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); - - /* Call probe_read */ - ins(BPF_EMIT_CALL(probeid), pos); - /* - * Error processing: if read fail, goto error code, - * will be relocated. Target should be the start of - * error processing code. - */ - ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), - pos); - - return check_pos(pos); -} - -/* - * Each arg should be bare register. Fetch and save them into argument - * registers (r3 - r5). - * - * BPF_REG_1 should have been initialized with pointer to - * 'struct pt_regs'. - */ -static int -gen_prologue_fastpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int i, err = 0; - - for (i = 0; i < nargs; i++) { - err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, - BPF_PROLOGUE_START_ARG_REG + i); - if (err) - goto errout; - } - - return check_pos(pos); -errout: - return err; -} - -/* - * Slow path: - * At least one argument has the form of 'offset($rx)'. - * - * Following code first stores them into stack, then loads all of then - * to r2 - r5. - * Before final loading, the final result should be: - * - * low address - * BPF_REG_FP - 24 ARG3 - * BPF_REG_FP - 16 ARG2 - * BPF_REG_FP - 8 ARG1 - * BPF_REG_FP - * high address - * - * For each argument (described as: offn(...off2(off1(reg)))), - * generates following code: - * - * r7 <- fp - * r7 <- r7 - stack_offset // Ideal code should initialize r7 using - * // fp before generating args. However, - * // eBPF won't regard r7 as stack pointer - * // if it is generated by minus 8 from - * // another stack pointer except fp. - * // This is why we have to set r7 - * // to fp for each variable. - * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() - * (r7) <- r3 // skip following instructions for bare reg - * r3 <- r3 + off1 . // skip if off1 == 0 - * r2 <- 8 \ - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * r3 <- (r7) - * r3 <- r3 + off2 . // skip if off2 == 0 - * r2 <- 8 \ // r2 may be broken by probe_read, so set again - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * ... - */ -static int -gen_prologue_slowpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int err, i, probeid; - - for (i = 0; i < nargs; i++) { - struct probe_trace_arg *arg = &args[i]; - const char *reg = arg->value; - struct probe_trace_arg_ref *ref = NULL; - int stack_offset = (i + 1) * -8; - - pr_debug("prologue: fetch arg %d, base reg is %s\n", - i, reg); - - /* value of base register is stored into ARG3 */ - err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, - BPF_REG_ARG3); - if (err) { - pr_err("prologue: failed to get offset of register %s\n", - reg); - goto errout; - } - - /* Make r7 the stack pointer. */ - ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); - /* r7 += -8 */ - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); - /* - * Store r3 (base register) onto stack - * Ensure fp[offset] is set. - * fp is the only valid base register when storing - * into stack. We are not allowed to use r7 as base - * register here. - */ - ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, - stack_offset), pos); - - ref = arg->ref; - probeid = BPF_FUNC_probe_read_kernel; - while (ref) { - pr_debug("prologue: arg %d: offset %ld\n", - i, ref->offset); - - if (ref->user_access) - probeid = BPF_FUNC_probe_read_user; - - err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, - ref->offset, probeid); - if (err) { - pr_err("prologue: failed to generate probe_read function call\n"); - goto errout; - } - - ref = ref->next; - /* - * Load previous result into ARG3. Use - * BPF_REG_FP instead of r7 because verifier - * allows FP based addressing only. - */ - if (ref) - ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, - BPF_REG_FP, stack_offset), pos); - } - } - - /* Final pass: read to registers */ - for (i = 0; i < nargs; i++) { - int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW; - - pr_debug("prologue: load arg %d, insn_sz is %s\n", - i, insn_sz_to_str(insn_sz)); - ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i, - BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); - } - - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); - - return check_pos(pos); -errout: - return err; -} - -static int -prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, - struct bpf_insn *success_code, struct bpf_insn *user_code) -{ - struct bpf_insn *insn; - - if (check_pos(pos)) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - for (insn = pos->begin; insn < pos->pos; insn++) { - struct bpf_insn *target; - u8 class = BPF_CLASS(insn->code); - u8 opcode; - - if (class != BPF_JMP) - continue; - opcode = BPF_OP(insn->code); - if (opcode == BPF_CALL) - continue; - - switch (insn->off) { - case JMP_TO_ERROR_CODE: - target = error_code; - break; - case JMP_TO_SUCCESS_CODE: - target = success_code; - break; - case JMP_TO_USER_CODE: - target = user_code; - break; - default: - pr_err("bpf prologue: internal error: relocation failed\n"); - return -BPF_LOADER_ERRNO__PROLOGUE; - } - - insn->off = target - (insn + 1); - } - return 0; -} - -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space) -{ - struct bpf_insn *success_code = NULL; - struct bpf_insn *error_code = NULL; - struct bpf_insn *user_code = NULL; - struct bpf_insn_pos pos; - bool fastpath = true; - int err = 0, i; - - if (!new_prog || !new_cnt) - return -EINVAL; - - if (cnt_space > BPF_MAXINSNS) - cnt_space = BPF_MAXINSNS; - - pos.begin = new_prog; - pos.end = new_prog + cnt_space; - pos.pos = new_prog; - - if (!nargs) { - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), - &pos); - - if (check_pos(&pos)) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; - } - - if (nargs > BPF_PROLOGUE_MAX_ARGS) { - pr_warning("bpf: prologue: %d arguments are dropped\n", - nargs - BPF_PROLOGUE_MAX_ARGS); - nargs = BPF_PROLOGUE_MAX_ARGS; - } - - /* First pass: validation */ - for (i = 0; i < nargs; i++) { - struct probe_trace_arg_ref *ref = args[i].ref; - - if (args[i].value[0] == '@') { - /* TODO: fetch global variable */ - pr_err("bpf: prologue: global %s%+ld not support\n", - args[i].value, ref ? ref->offset : 0); - return -ENOTSUP; - } - - while (ref) { - /* fastpath is true if all args has ref == NULL */ - fastpath = false; - - /* - * Instruction encodes immediate value using - * s32, ref->offset is long. On systems which - * can't fill long in s32, refuse to process if - * ref->offset too large (or small). - */ -#ifdef __LP64__ -#define OFFSET_MAX ((1LL << 31) - 1) -#define OFFSET_MIN ((1LL << 31) * -1) - if (ref->offset > OFFSET_MAX || - ref->offset < OFFSET_MIN) { - pr_err("bpf: prologue: offset out of bound: %ld\n", - ref->offset); - return -BPF_LOADER_ERRNO__PROLOGUEOOB; - } -#endif - ref = ref->next; - } - } - pr_debug("prologue: pass validation\n"); - - if (fastpath) { - /* If all variables are registers... */ - pr_debug("prologue: fast path\n"); - err = gen_prologue_fastpath(&pos, args, nargs); - if (err) - goto errout; - } else { - pr_debug("prologue: slow path\n"); - - /* Initialization: move ctx to a callee saved register. */ - ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); - - err = gen_prologue_slowpath(&pos, args, nargs); - if (err) - goto errout; - /* - * start of ERROR_CODE (only slow pass needs error code) - * mov r2 <- 1 // r2 is error number - * mov r3 <- 0 // r3, r4... should be touched or - * // verifier would complain - * mov r4 <- 0 - * ... - * goto usercode - */ - error_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), - &pos); - - for (i = 0; i < nargs; i++) - ins(BPF_ALU64_IMM(BPF_MOV, - BPF_PROLOGUE_START_ARG_REG + i, - 0), - &pos); - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), - &pos); - } - - /* - * start of SUCCESS_CODE: - * mov r2 <- 0 - * goto usercode // skip - */ - success_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); - - /* - * start of USER_CODE: - * Restore ctx to r1 - */ - user_code = pos.pos; - if (!fastpath) { - /* - * Only slow path needs restoring of ctx. In fast path, - * register are loaded directly from r1. - */ - ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); - err = prologue_relocate(&pos, error_code, success_code, - user_code); - if (err) - goto errout; - } - - err = check_pos(&pos); - if (err) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; -errout: - return err; -} -- cgit From 33b725ce7b9887569749fbbcdafc4ab089a09741 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Sep 2023 11:49:57 -0700 Subject: perf trace: Avoid compile error wrt redefining bool Make part of an existing TODO conditional to avoid the following build error: ``` tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c:26:14: error: cannot combine with previous 'char' declaration specifier 26 | typedef char bool; | ^ include/stdbool.h:20:14: note: expanded from macro 'bool' 20 | #define bool _Bool | ^ tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c:26:1: error: typedef requires a name [-Werror,-Wmissing-declarations] 26 | typedef char bool; | ^~~~~~~~~~~~~~~~~ 2 errors generated. ``` Signed-off-by: Ian Rogers Cc: Leo Yan Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230913184957.230076-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 90ce22f9c1a9..939ec769bf4a 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -23,7 +23,9 @@ #define MAX_CPUS 4096 // FIXME: These should come from system headers +#ifndef bool typedef char bool; +#endif typedef int pid_t; typedef long long int __s64; typedef __s64 time64_t; -- cgit From d1bac78e26e513ce2cf1b47fcc667b33f51c6a36 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Sep 2023 19:22:04 -0700 Subject: perf jevents metric: Fix type of strcmp_cpuid_str The parser wraps all strings as Events, so the input is an Event. Using a string would be bad as functions like Simplify are called on the arguments, which wouldn't be present on a string. Fixes: 9d5da30e4ae9 ("perf jevents: Add a new expression builtin strcmp_cpuid_str()") Signed-off-by: Ian Rogers Cc: James Clark Cc: Kajol Jain Cc: John Garry Link: https://lore.kernel.org/r/20230914022204.1488383-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 0e9ec65d92ae..3e673f25d5fd 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -413,10 +413,10 @@ def has_event(event: Event) -> Function: # pylint: disable=invalid-name return Function('has_event', event) -def strcmp_cpuid_str(event: str) -> Function: +def strcmp_cpuid_str(cpuid: Event) -> Function: # pylint: disable=redefined-builtin # pylint: disable=invalid-name - return Function('strcmp_cpuid_str', event) + return Function('strcmp_cpuid_str', cpuid) class Metric: """An individual metric that will specifiable on the perf command line.""" -- cgit From eaaebb01a7a6d7d422f80e0dacf9a07fb90f22dc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Sep 2023 19:24:25 -0700 Subject: perf pmu: Ensure all alias variables are initialized Fix an error detected by memory sanitizer: ``` ==4033==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x55fb0fbedfc7 in read_alias_info tools/perf/util/pmu.c:457:6 #1 0x55fb0fbea339 in check_info_data tools/perf/util/pmu.c:1434:2 #2 0x55fb0fbea339 in perf_pmu__check_alias tools/perf/util/pmu.c:1504:9 #3 0x55fb0fbdca85 in parse_events_add_pmu tools/perf/util/parse-events.c:1429:32 #4 0x55fb0f965230 in parse_events_parse tools/perf/util/parse-events.y:299:6 #5 0x55fb0fbdf6b2 in parse_events__scanner tools/perf/util/parse-events.c:1822:8 #6 0x55fb0fbdf8c1 in __parse_events tools/perf/util/parse-events.c:2094:8 #7 0x55fb0fa8ffa9 in parse_events tools/perf/util/parse-events.h:41:9 #8 0x55fb0fa8ffa9 in test_event tools/perf/tests/parse-events.c:2393:8 #9 0x55fb0fa8f458 in test__pmu_events tools/perf/tests/parse-events.c:2551:15 #10 0x55fb0fa6d93f in run_test tools/perf/tests/builtin-test.c:242:9 #11 0x55fb0fa6d93f in test_and_print tools/perf/tests/builtin-test.c:271:8 #12 0x55fb0fa6d082 in __cmd_test tools/perf/tests/builtin-test.c:442:5 #13 0x55fb0fa6d082 in cmd_test tools/perf/tests/builtin-test.c:564:9 #14 0x55fb0f942720 in run_builtin tools/perf/perf.c:322:11 #15 0x55fb0f942486 in handle_internal_command tools/perf/perf.c:375:8 #16 0x55fb0f941dab in run_argv tools/perf/perf.c:419:2 #17 0x55fb0f941dab in main tools/perf/perf.c:535:3 ``` Fixes: 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs") Signed-off-by: Ian Rogers Cc: James Clark Cc: Kan Liang Link: https://lore.kernel.org/r/20230914022425.1489035-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d85602aa4b9f..8de6f39abd1b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -520,7 +520,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, pmu_name = pe->pmu; } - alias = malloc(sizeof(*alias)); + alias = zalloc(sizeof(*alias)); if (!alias) return -ENOMEM; -- cgit From e47749f1796d1df39a7eaae95f2784aaa43df57d Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 13 Sep 2023 14:51:57 +0200 Subject: perf jevent: fix core dump on software events on s390 Running commands such as # ./perf stat -e cs -- true Segmentation fault (core dumped) # ./perf stat -e cpu-clock-- true Segmentation fault (core dumped) # dump core. This should not happen as these events are defined even when no hardware PMU is available. Debugging this reveals this call chain: perf_pmus__find_by_type(type=1) +--> pmu_read_sysfs(core_only=false) +--> perf_pmu__find2(dirfd=3, name=0x152a113 "software") +--> perf_pmu__lookup(pmus=0x14f0568 , dirfd=3, lookup_name=0x152a113 "software") +--> perf_pmu__find_events_table (pmu=0x1532130) Now the pmu is "software" and it tries to find a proper table generated by the pmu-event generation process for s390: # cd pmu-events/ # ./jevents.py s390 all /root/linux/tools/perf/pmu-events/arch |\ grep -E '^const struct pmu_table_entry' const struct pmu_table_entry pmu_events__cf_z10[] = { const struct pmu_table_entry pmu_events__cf_z13[] = { const struct pmu_table_entry pmu_metrics__cf_z13[] = { const struct pmu_table_entry pmu_events__cf_z14[] = { const struct pmu_table_entry pmu_metrics__cf_z14[] = { const struct pmu_table_entry pmu_events__cf_z15[] = { const struct pmu_table_entry pmu_metrics__cf_z15[] = { const struct pmu_table_entry pmu_events__cf_z16[] = { const struct pmu_table_entry pmu_metrics__cf_z16[] = { const struct pmu_table_entry pmu_events__cf_z196[] = { const struct pmu_table_entry pmu_events__cf_zec12[] = { const struct pmu_table_entry pmu_metrics__cf_zec12[] = { const struct pmu_table_entry pmu_events__test_soc_cpu[] = { const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { const struct pmu_table_entry pmu_events__test_soc_sys[] = { # However event "software" is not listed, as can be seen in the generated const struct pmu_events_map pmu_events_map[]. So in function perf_pmu__find_events_table(), the variable table is initialized to NULL, but never set to a proper value. The function scans all generated &pmu_events_map[] tables, but no table matches, because the tables are s390 CPU Measurement unit specific: i = 0; for (;;) { const struct pmu_events_map *map = &pmu_events_map[i++]; if (!map->arch) break; --> the maps are there because the build generated them if (!strcmp_cpuid_str(map->cpuid, cpuid)) { table = &map->event_table; break; } --> Since no matching CPU string the table var remains 0x0 } free(cpuid); if (!pmu) return table; --> The pmu is "software" so it exists and no return --> and here perf dies because table is 0x0 for (i = 0; i < table->num_pmus; i++) { ... } return NULL; Fix this and do not access the table variable. Instead return 0x0 which is the same return code when the for-loop was not successful. Output after: # ./perf stat -e cs -- true Performance counter stats for 'true': 0 cs 0.000853105 seconds time elapsed 0.000061000 seconds user 0.000827000 seconds sys # ./perf stat -e cpu-clock -- true Performance counter stats for 'true': 0.25 msec cpu-clock # 0.341 CPUs utilized 0.000728383 seconds time elapsed 0.000055000 seconds user 0.000706000 seconds sys # ./perf stat -e cycles -- true Performance counter stats for 'true': cycles 0.000767298 seconds time elapsed 0.000055000 seconds user 0.000739000 seconds sys # Fixes: 7c52f10c0d4d8 ("perf pmu: Cache JSON events table") Signed-off-by: Thomas Richter Reviewed-by: Ian Rogers Cc: dengler@linux.ibm.com Cc: gor@linux.ibm.com Cc: hca@linux.ibm.com Cc: sumanthk@linux.ibm.com Cc: svens@linux.ibm.com Link: https://lore.kernel.org/r/20230913125157.2790375-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/jevents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index a7e88332276d..72ba4a9239c6 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -991,7 +991,7 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) } } free(cpuid); - if (!pmu) + if (!pmu || !table) return table; for (i = 0; i < table->num_pmus; i++) { -- cgit From 4ff3ba4db5943cac1045e3e4a3c0463ea10f6930 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 25 Aug 2023 11:26:01 +0530 Subject: powerpc/perf/hv-24x7: Update domain value check Valid domain value is in range 1 to HV_PERF_DOMAIN_MAX. Current code has check for domain value greater than or equal to HV_PERF_DOMAIN_MAX. But the check for domain value 0 is missing. Fix this issue by adding check for domain value 0. Before: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Error: The sys_perf_event_open() syscall returned with 5 (Input/output error) for event (hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/). /bin/dmesg | grep -i perf may provide additional information. Result from dmesg: [ 37.819387] hv-24x7: hcall failed: [0 0x60040000 0x100 0] => ret 0xfffffffffffffffc (-4) detail=0x2000000 failing ix=0 After: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Warning: hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ event is not supported by the kernel. failed to read counter hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ Fixes: ebd4a5a3ebd9 ("powerpc/perf/hv-24x7: Minor improvements") Reported-by: Krishan Gopal Sarawast Signed-off-by: Kajol Jain Tested-by: Disha Goel Signed-off-by: Michael Ellerman Link: https://msgid.link/20230825055601.360083-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 317175791d23..3449be7c0d51 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1418,7 +1418,7 @@ static int h_24x7_event_init(struct perf_event *event) } domain = event_get_domain(event); - if (domain >= HV_PERF_DOMAIN_MAX) { + if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) { pr_devel("invalid domain %d\n", domain); return -EINVAL; } -- cgit From cc879ab3ce39bc39f9b1d238b283f43a5f6f957d Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:55 +1000 Subject: powerpc/watchpoints: Disable preemption in thread_change_pc() thread_change_pc() uses CPU local data, so must be protected from swapping CPUs while it is reading the breakpoint struct. The error is more noticeable after 1e60f3564bad ("powerpc/watchpoints: Track perf single step directly on the breakpoint"), which added an unconditional __this_cpu_read() call in thread_change_pc(). However the existing __this_cpu_read() that runs if a breakpoint does need to be re-inserted has the same issue. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-2-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b8513dc3e53a..2854376870cf 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -230,13 +230,15 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) struct arch_hw_breakpoint *info; int i; + preempt_disable(); + for (i = 0; i < nr_wp_slots(); i++) { struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } - return; + goto out; reset: regs_set_return_msr(regs, regs->msr & ~MSR_SE); @@ -245,6 +247,9 @@ reset: __set_breakpoint(i, info); info->perf_single_step = false; } + +out: + preempt_enable(); } static bool is_larx_stcx_instr(int type) -- cgit From 3241f260eb830d27d09cc604690ec24533fdb433 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:56 +1000 Subject: powerpc/watchpoint: Disable pagefaults when getting user instruction This is called in an atomic context, so is not allowed to sleep if a user page needs to be faulted in and has nowhere it can be deferred to. The pagefault_disabled() function is documented as preventing user access methods from sleeping. In practice the page will be mapped in nearly always because we are reading the instruction that just triggered the watchpoint trap. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-3-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint_constraints.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index a74623025f3a..9e51801c4915 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -131,8 +131,13 @@ void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; + int err; - if (__get_user_instr(*instr, (void __user *)regs->nip)) + pagefault_disable(); + err = __get_user_instr(*instr, (void __user *)regs->nip); + pagefault_enable(); + + if (err) return; analyse_instr(&op, regs, *instr); -- cgit From 27646b2e02b096a6936b3e3b6ba334ae20763eab Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 29 Aug 2023 16:34:57 +1000 Subject: powerpc/watchpoints: Annotate atomic context in more places It can be easy to miss that the notifier mechanism invokes the callbacks in an atomic context, so add some comments to that effect on the two handlers we register here. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829063457.54157-4-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2854376870cf..a1318ce18d0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -368,6 +368,11 @@ static void handle_p10dd1_spurious_exception(struct perf_event **bp, } } +/* + * Handle a DABR or DAWR exception. + * + * Called in atomic context. + */ int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -495,6 +500,8 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. + * + * Called in atomic context. */ static int single_step_dabr_instruction(struct die_args *args) { @@ -546,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. + * + * Called in atomic context. */ int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) -- cgit From 60d77ed24bb3068c0837fe45b8921b0a6598829d Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 13 Sep 2023 19:11:29 +0530 Subject: powerpc: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION and FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY We recently added support for -fpatchable-function-entry and it is enabled by default on ppc32 (ppc64 needs gcc v13.1.0). When building the kernel for ppc32 and also enabling CONFIG_LD_DEAD_CODE_DATA_ELIMINATION, we see the below build error with older gcc versions: powerpc-linux-gnu-ld: init/main.o(__patchable_function_entries): error: need linked-to section for --gc-sections This error is thrown since __patchable_function_entries section would be garbage collected with --gc-sections since it does not reference any other kept sections. This has subsequently been fixed with: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=b7d072167715829eed0622616f6ae0182900de3e Disable LD_DEAD_CODE_DATA_ELIMINATION for gcc versions before v11.1.0 if using -fpatchable-function-entry to avoid this bug. Fixes: 0f71dcfb4aef ("powerpc/ftrace: Add support for -fpatchable-function-entry") Reported-by: Michael Ellerman Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20230913134129.2782088-1-naveen@kernel.org --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 54b9387c3691..3aaadfd2c8eb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -255,7 +255,7 @@ config PPC select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100)) select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) -- cgit From 6901a9f9ef1561111283a0d8c8d1cea634d089ef Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 14 Sep 2023 17:23:45 +0200 Subject: powerpc/82xx: Select FSL_SOC It used to be impossible to select CONFIG_CPM2 without selecting CONFIG_FSL_SOC at the same time because CONFIG_CPM2 was dependent on CONFIG_8260 and CONFIG_8260 was selecting CONFIG_FSL_SOC. But after commit eb5aa2137275 ("powerpc/82xx: Remove CONFIG_8260 and CONFIG_8272") CONFIG_CPM2 depends on CONFIG_PPC_82xx instead but CONFIG_PPC_82xx doesn't directly selects CONFIG_FSL_SOC. Fix it by forcing CONFIG_PPC_82xx to select CONFIG_FSL_SOC just like already done by PPC_8xx, PPC_MPC512x, PPC_83xx, PPC_86xx. Reported-by: Randy Dunlap Fixes: eb5aa2137275 ("powerpc/82xx: Remove CONFIG_8260 and CONFIG_8272") Signed-off-by: Christophe Leroy Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://msgid.link/7ab513546148ebe33ddd4b0ea92c7bfd3cce3ad7.1694705016.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/82xx/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index d9f1a2a83158..1824536cf6f2 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -2,6 +2,7 @@ menuconfig PPC_82xx bool "82xx-based boards (PQ II)" depends on PPC_BOOK3S_32 + select FSL_SOC if PPC_82xx @@ -9,7 +10,6 @@ config EP8248E bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC select PHYLIB if NETDEVICES select MDIO_BITBANG if PHYLIB help @@ -22,7 +22,6 @@ config MGCOGE bool "Keymile MGCOGE" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC help This enables support for the Keymile MGCOGE board. -- cgit From c3f4309693758b13fbb34b3741c2e2801ad28769 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 15 Sep 2023 13:46:04 +1000 Subject: powerpc/dexcr: Move HASHCHK trap handler Syzkaller reported a sleep in atomic context bug relating to the HASHCHK handler logic: BUG: sleeping function called from invalid context at arch/powerpc/kernel/traps.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 25040, name: syz-executor preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 no locks held by syz-executor/25040. irq event stamp: 34 hardirqs last enabled at (33): [] prep_irq_for_enabled_exit arch/powerpc/kernel/interrupt.c:56 [inline] hardirqs last enabled at (33): [] interrupt_exit_user_prepare_main+0x148/0x600 arch/powerpc/kernel/interrupt.c:230 hardirqs last disabled at (34): [] interrupt_enter_prepare+0x144/0x4f0 arch/powerpc/include/asm/interrupt.h:176 softirqs last enabled at (0): [] copy_process+0x16e4/0x4750 kernel/fork.c:2436 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 15 PID: 25040 Comm: syz-executor Not tainted 6.5.0-rc5-00001-g3ccdff6bb06d #3 Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1040.00 (NL1040_021) hv:phyp pSeries Call Trace: [c0000000a8247ce0] [c00000000032b0e4] __might_resched+0x3b4/0x400 kernel/sched/core.c:10189 [c0000000a8247d80] [c0000000008c7dc8] __might_fault+0xa8/0x170 mm/memory.c:5853 [c0000000a8247dc0] [c00000000004160c] do_program_check+0x32c/0xb20 arch/powerpc/kernel/traps.c:1518 [c0000000a8247e50] [c000000000009b2c] program_check_common_virt+0x3bc/0x3c0 To determine if a trap was caused by a HASHCHK instruction, we inspect the user instruction that triggered the trap. However this may sleep if the page needs to be faulted in (get_user_instr() reaches __get_user(), which calls might_fault() and triggers the bug message). Move the HASHCHK handler logic to after we allow IRQs, which is fine because we are only interested in HASHCHK if it's a user space trap. Fixes: 5bcba4e6c13f ("powerpc/dexcr: Handle hashchk exception") Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230915034604.45393-1-bgray@linux.ibm.com --- arch/powerpc/kernel/traps.c | 56 +++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index eeff136b83d9..64ff37721fd0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1512,23 +1512,11 @@ static void do_program_check(struct pt_regs *regs) return; } - if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) { - ppc_inst_t insn; - - if (get_user_instr(insn, (void __user *)regs->nip)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } - - if (ppc_inst_primary_opcode(insn) == 31 && - get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { - _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; - } + /* User mode considers other cases after enabling IRQs */ + if (!user_mode(regs)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; } - - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1561,16 +1549,44 @@ static void do_program_check(struct pt_regs *regs) /* * If we took the program check in the kernel skip down to sending a - * SIGILL. The subsequent cases all relate to emulating instructions - * which we should only do for userspace. We also do not want to enable - * interrupts for kernel faults because that might lead to further - * faults, and loose the context of the original exception. + * SIGILL. The subsequent cases all relate to user space, such as + * emulating instructions which we should only do for user space. We + * also do not want to enable interrupts for kernel faults because that + * might lead to further faults, and loose the context of the original + * exception. */ if (!user_mode(regs)) goto sigill; interrupt_cond_local_irq_enable(regs); + /* + * (reason & REASON_TRAP) is mostly handled before enabling IRQs, + * except get_user_instr() can sleep so we cannot reliably inspect the + * current instruction in that context. Now that we know we are + * handling a user space trap and can sleep, we can check if the trap + * was a hashchk failure. + */ + if (reason & REASON_TRAP) { + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + ppc_inst_t insn; + + if (get_user_instr(insn, (void __user *)regs->nip)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + + if (ppc_inst_primary_opcode(insn) == 31 && + get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } + } + + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to -- cgit From e28a0974d749e5105d77233c0a84d35c37da047e Mon Sep 17 00:00:00 2001 From: Max Nguyen Date: Sun, 17 Sep 2023 22:21:53 -0700 Subject: Input: xpad - add HyperX Clutch Gladiate Support Add HyperX controller support to xpad_device and xpad_table. Suggested-by: Chris Toledanes Reviewed-by: Carl Ng Signed-off-by: Max Nguyen Reviewed-by: Rahul Rameshbabu Link: https://lore.kernel.org/r/20230906231514.4291-1-hphyperxdev@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index ede380551e55..b22f9c6e7fa5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -130,6 +130,7 @@ static const struct xpad_device { { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, + { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, @@ -459,6 +460,7 @@ static const struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ + XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ -- cgit From 3780bb29311eccb7a1c9641032a112eed237f7e3 Mon Sep 17 00:00:00 2001 From: Johnathan Mantey Date: Fri, 15 Sep 2023 09:12:35 -0700 Subject: ncsi: Propagate carrier gain/loss events to the NCSI controller Report the carrier/no-carrier state for the network interface shared between the BMC and the passthrough channel. Without this functionality the BMC is unable to reconfigure the NIC in the event of a re-cabling to a different subnet. Signed-off-by: Johnathan Mantey Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 62fb1031763d..f8854bff286c 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -89,6 +89,11 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, if ((had_link == has_link) || chained) return 0; + if (had_link) + netif_carrier_off(ndp->ndev.dev); + else + netif_carrier_on(ndp->ndev.dev); + if (!ndp->multi_package && !nc->package->multi_channel) { if (had_link) { ndp->flags |= NCSI_DEV_RESHUFFLE; -- cgit From 6af289746a636f71f4c0535a9801774118486c7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Sep 2023 19:00:35 +0000 Subject: dccp: fix dccp_v4_err()/dccp_v6_err() again dh->dccph_x is the 9th byte (offset 8) in "struct dccp_hdr", not in the "byte 7" as Jann claimed. We need to make sure the ICMP messages are big enough, using more standard ways (no more assumptions). syzbot reported: BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2681 [inline] BUG: KMSAN: uninit-value in dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] pskb_may_pull include/linux/skbuff.h:2681 [inline] dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 icmpv6_notify+0x4c7/0x880 net/ipv6/icmp.c:867 icmpv6_rcv+0x19d5/0x30d0 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:304 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:468 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:304 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5523 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637 netif_receive_skb_internal net/core/dev.c:5723 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5782 tun_rx_batched+0x83b/0x920 tun_get_user+0x564c/0x6940 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559 __alloc_skb+0x318/0x740 net/core/skbuff.c:650 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6313 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2795 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x23cf/0x6940 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 0 PID: 4995 Comm: syz-executor153 Not tainted 6.6.0-rc1-syzkaller-00014-ga747acc0b752 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 Fixes: 977ad86c2a1b ("dccp: Fix out of bounds access in DCCP error handler") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jann Horn Reviewed-by: Jann Horn Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 9 ++------- net/dccp/ipv6.c | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8f56e8723c73..69453b936bd5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -254,13 +254,8 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return -EINVAL; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return -EINVAL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 33f6ccf6ba77..c693a570682f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -83,13 +83,8 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmpv6_notify()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return -EINVAL; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return -EINVAL; -- cgit From 6b5f0749ce48c13d7f53b27c39d00bba46e1fd1c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Sep 2023 14:23:52 +0300 Subject: RDMA/erdma: Fix error code in erdma_create_scatter_mtt() The erdma_create_scatter_mtt() function is supposed to return error pointers. Returning NULL will lead to an Oops. Fixes: ed10435d3583 ("RDMA/erdma: Implement hierarchical MTT") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/1eb400d5-d8a3-4a8e-b3da-c43c6c377f86@moroto.mountain Acked-by: Cheng Xu Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index dcccb6015232..70eaed59a67c 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -655,7 +655,7 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); if (!mtt) - return NULL; + return ERR_PTR(-ENOMEM); mtt->size = ALIGN(size, PAGE_SIZE); mtt->buf = vzalloc(mtt->size); -- cgit From 34cf99c250d5cd2530b93a57b0de31d3aaf8685b Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 17 Aug 2023 13:55:58 -0400 Subject: x86/mm, kexec, ima: Use memblock_free_late() from ima_free_kexec_buffer() The code calling ima_free_kexec_buffer() runs long after the memblock allocator has already been torn down, potentially resulting in a use after free in memblock_isolate_range(). With KASAN or KFENCE, this use after free will result in a BUG from the idle task, and a subsequent kernel panic. Switch ima_free_kexec_buffer() over to memblock_free_late() to avoid that bug. Fixes: fee3ff99bc67 ("powerpc: Move arch independent ima kexec functions to drivers/of/kexec.c") Suggested-by: Mike Rappoport Signed-off-by: Rik van Riel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230817135558.67274c83@imladris.surriel.com --- arch/x86/kernel/setup.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b9145a63da77..b098b1fa2470 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -358,15 +358,11 @@ static void __init add_early_ima_buffer(u64 phys_addr) #if defined(CONFIG_HAVE_IMA_KEXEC) && !defined(CONFIG_OF_FLATTREE) int __init ima_free_kexec_buffer(void) { - int rc; - if (!ima_kexec_buffer_size) return -ENOENT; - rc = memblock_phys_free(ima_kexec_buffer_phys, - ima_kexec_buffer_size); - if (rc) - return rc; + memblock_free_late(ima_kexec_buffer_phys, + ima_kexec_buffer_size); ima_kexec_buffer_phys = 0; ima_kexec_buffer_size = 0; -- cgit From 295de650d3aaf9e50258465c5f1c84b465d836f6 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Fri, 15 Sep 2023 20:10:02 +0200 Subject: net: hsr: Properly parse HSRv1 supervisor frames. While adding support for parsing the redbox supervision frames, the author added `pull_size' and `total_pull_size' to track the amount of bytes that were pulled from the skb during while parsing the skb so it can be reverted/ pushed back at the end. In the process probably copy&paste error occurred and for the HSRv1 case the ethhdr was used instead of the hsr_tag. Later the hsr_tag was used instead of hsr_sup_tag. The later error didn't matter because both structs have the size so HSRv0 was still working. It broke however HSRv1 parsing because struct ethhdr is larger than struct hsr_tag. Reinstate the old pulling flow and pull first ethhdr, hsr_tag in v1 case followed by hsr_sup_tag. [bigeasy: commit message] Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames")' Suggested-by: Tristram.Ha@microchip.com Signed-off-by: Lukasz Majewski Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index b77f1189d19d..6d14d935ee82 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) /* And leave the HSR tag. */ if (ethhdr->h_proto == htons(ETH_P_HSR)) { - pull_size = sizeof(struct ethhdr); + pull_size = sizeof(struct hsr_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; } /* And leave the HSR sup tag. */ - pull_size = sizeof(struct hsr_tag); + pull_size = sizeof(struct hsr_sup_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; -- cgit From fbd825fcd7dd4c11d4c48c3d0adc248a4a0ce90b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:03 +0200 Subject: net: hsr: Add __packed to struct hsr_sup_tlv. Struct hsr_sup_tlv describes HW layout and therefore it needs a __packed attribute to ensure the compiler does not add any padding. Due to the size and __packed attribute of the structs that use hsr_sup_tlv it has no functional impact. Add __packed to struct hsr_sup_tlv. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/hsr/hsr_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 6851e33df7d1..18e01791ad79 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr { struct hsr_sup_tlv { u8 HSR_TLV_type; u8 HSR_TLV_length; -}; +} __packed; /* HSR/PRP Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. -- cgit From 5c3ce539a11185268aff3bb30d2fad8c7fa42f86 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:04 +0200 Subject: selftests: hsr: Use `let' properly. The timeout in the while loop is never subtracted due wrong usage of `let' leading to an endless loop if the former condition never gets true. Put the statement for let in quotes so it is parsed as a single statement. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index df9143538708..183f4a0f19dd 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -197,7 +197,7 @@ do break fi sleep 1 - let WAIT = WAIT - 1 + let "WAIT = WAIT - 1" done # Just a safety delay in case the above check didn't handle it. -- cgit From d53f23fe164c24335d001cf725599a95e6fdf92d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:05 +0200 Subject: selftests: hsr: Reorder the testsuite. Move the code and group into functions so it will be easier to extend the test to HSRv1 so that both versions are covered. Move the ping/test part into do_complete_ping_test() and the interface setup into setup_hsr_interfaces(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 255 ++++++++++++++-------------- 1 file changed, 132 insertions(+), 123 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 183f4a0f19dd..d4613b7b7188 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -41,61 +41,6 @@ cleanup() done } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done - -echo "INFO: preparing interfaces." -# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. -# -# ns1eth1 ----- ns2eth1 -# hsr1 hsr2 -# ns1eth2 ns2eth2 -# | | -# ns3eth1 ns3eth2 -# \ / -# hsr3 -# -# Interfaces -ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" -ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" -ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - -# HSRv0. -ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 -ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 -ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 - -# IP for HSR -ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 -ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad -ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 -ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad -ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 -ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad - -# All Links up -ip -net "$ns1" link set ns1eth1 up -ip -net "$ns1" link set ns1eth2 up -ip -net "$ns1" link set hsr1 up - -ip -net "$ns2" link set ns2eth1 up -ip -net "$ns2" link set ns2eth2 up -ip -net "$ns2" link set hsr2 up - -ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth2 up -ip -net "$ns3" link set hsr3 up - # $1: IP address is_v6() { @@ -164,93 +109,157 @@ stop_if_error() fi } - -echo "INFO: Initial validation ping." -# Each node has to be able each one. -do_ping "$ns1" 100.64.0.2 -do_ping "$ns2" 100.64.0.1 -do_ping "$ns3" 100.64.0.1 -stop_if_error "Initial validation failed." - -do_ping "$ns1" 100.64.0.3 -do_ping "$ns2" 100.64.0.3 -do_ping "$ns3" 100.64.0.2 - -do_ping "$ns1" dead:beef:1::2 -do_ping "$ns1" dead:beef:1::3 -do_ping "$ns2" dead:beef:1::1 -do_ping "$ns2" dead:beef:1::2 -do_ping "$ns3" dead:beef:1::1 -do_ping "$ns3" dead:beef:1::2 - -stop_if_error "Initial validation failed." +do_complete_ping_test() +{ + echo "INFO: Initial validation ping." + # Each node has to be able each one. + do_ping "$ns1" 100.64.0.2 + do_ping "$ns2" 100.64.0.1 + do_ping "$ns3" 100.64.0.1 + stop_if_error "Initial validation failed." + + do_ping "$ns1" 100.64.0.3 + do_ping "$ns2" 100.64.0.3 + do_ping "$ns3" 100.64.0.2 + + do_ping "$ns1" dead:beef:1::2 + do_ping "$ns1" dead:beef:1::3 + do_ping "$ns2" dead:beef:1::1 + do_ping "$ns2" dead:beef:1::2 + do_ping "$ns3" dead:beef:1::1 + do_ping "$ns3" dead:beef:1::2 + + stop_if_error "Initial validation failed." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. -WAIT=5 -while [ ${WAIT} -gt 0 ] -do - grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table - if [ $? -ne 0 ] - then - break - fi - sleep 1 - let "WAIT = WAIT - 1" -done + WAIT=5 + while [ ${WAIT} -gt 0 ] + do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let "WAIT = WAIT - 1" + done # Just a safety delay in case the above check didn't handle it. -sleep 1 + sleep 1 + + echo "INFO: Longer ping test." + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" dead:beef:1::2 + do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" dead:beef:1::3 + + stop_if_error "Longer ping test failed." + + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" dead:beef:1::1 + do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" dead:beef:1::2 + stop_if_error "Longer ping test failed." + + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" dead:beef:1::1 + do_ping_long "$ns3" 100.64.0.2 + do_ping_long "$ns3" dead:beef:1::2 + stop_if_error "Longer ping test failed." + + echo "INFO: Cutting one link." + do_ping_long "$ns1" 100.64.0.3 & -echo "INFO: Longer ping test." -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" dead:beef:1::2 -do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" dead:beef:1::3 + sleep 3 + ip -net "$ns3" link set ns3eth1 down + wait -stop_if_error "Longer ping test failed." + ip -net "$ns3" link set ns3eth1 up -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" dead:beef:1::1 -do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" dead:beef:1::2 -stop_if_error "Longer ping test failed." + stop_if_error "Failed with one link down." -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" dead:beef:1::1 -do_ping_long "$ns3" 100.64.0.2 -do_ping_long "$ns3" dead:beef:1::2 -stop_if_error "Longer ping test failed." + echo "INFO: Delay the link and drop a few packages." + tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms + tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% -echo "INFO: Cutting one link." -do_ping_long "$ns1" 100.64.0.3 & + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" 100.64.0.3 -sleep 3 -ip -net "$ns3" link set ns3eth1 down -wait + stop_if_error "Failed with delay and packetloss." -ip -net "$ns3" link set ns3eth1 up + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" 100.64.0.3 -stop_if_error "Failed with one link down." + stop_if_error "Failed with delay and packetloss." -echo "INFO: Delay the link and drop a few packages." -tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms -tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" 100.64.0.2 + stop_if_error "Failed with delay and packetloss." -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" 100.64.0.3 + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + echo "INFO: preparing interfaces." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# + # Interfaces + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" + ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + + # HSRv0. + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + + # IP for HSR + ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 + ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + + # All Links up + ip -net "$ns1" link set ns1eth1 up + ip -net "$ns1" link set ns1eth2 up + ip -net "$ns1" link set hsr1 up + + ip -net "$ns2" link set ns2eth1 up + ip -net "$ns2" link set ns2eth2 up + ip -net "$ns2" link set hsr2 up + + ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth2 up + ip -net "$ns3" link set hsr3 up +} -stop_if_error "Failed with delay and packetloss." +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" 100.64.0.3 +trap cleanup EXIT -stop_if_error "Failed with delay and packetloss." +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" 100.64.0.2 -stop_if_error "Failed with delay and packetloss." +setup_hsr_interfaces +do_complete_ping_test -echo "INFO: All good." exit $ret -- cgit From b0e9c3b5fdafbe60e7a82be69439f95e06a4de39 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Sep 2023 20:10:06 +0200 Subject: selftests: hsr: Extend the testsuite to also cover HSRv1. The testsuite already has simply tests for HSRv0. The testuite would have been able to notice the v1 breakage if it was there at the time. Extend the testsuite to also cover HSRv1. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- tools/testing/selftests/net/hsr/hsr_ping.sh | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index d4613b7b7188..1c6457e54625 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -203,7 +203,9 @@ do_complete_ping_test() setup_hsr_interfaces() { - echo "INFO: preparing interfaces." + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv}." # Three HSR nodes. Each node has one link to each of its neighbour, two links in total. # # ns1eth1 ----- ns2eth1 @@ -219,10 +221,10 @@ setup_hsr_interfaces() ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - # HSRv0. - ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 - ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 - ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + # HSRv0/1 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 # IP for HSR ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 @@ -259,7 +261,16 @@ for i in "$ns1" "$ns2" "$ns3" ;do ip -net $i link set lo up done -setup_hsr_interfaces +setup_hsr_interfaces 0 +do_complete_ping_test +cleanup + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 1 do_complete_ping_test exit $ret -- cgit From 2c3dfba4cf84ac4f306cc6653b37b6dd6859ae9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Sep 2023 15:45:17 +0200 Subject: rfkill: sync before userspace visibility/changes If userspace quickly opens /dev/rfkill after a new instance was created, it might see the old state of the instance from before the sync work runs and may even _change_ the state, only to have the sync work change it again. Fix this by doing the sync inline where needed, not just for /dev/rfkill but also for sysfs. Signed-off-by: Johannes Berg --- net/rfkill/core.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 01fca7a10b4b..08630896b6c8 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -48,6 +48,7 @@ struct rfkill { bool persistent; bool polling_paused; bool suspended; + bool need_sync; const struct rfkill_ops *ops; void *data; @@ -368,6 +369,17 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill_event(rfkill); } +static void rfkill_sync(struct rfkill *rfkill) +{ + lockdep_assert_held(&rfkill_global_mutex); + + if (!rfkill->need_sync) + return; + + rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur); + rfkill->need_sync = false; +} + static void rfkill_update_global_state(enum rfkill_type type, bool blocked) { int i; @@ -730,6 +742,10 @@ static ssize_t soft_show(struct device *dev, struct device_attribute *attr, { struct rfkill *rfkill = to_rfkill(dev); + mutex_lock(&rfkill_global_mutex); + rfkill_sync(rfkill); + mutex_unlock(&rfkill_global_mutex); + return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0); } @@ -751,6 +767,7 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&rfkill_global_mutex); + rfkill_sync(rfkill); rfkill_set_block(rfkill, state); mutex_unlock(&rfkill_global_mutex); @@ -783,6 +800,10 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, { struct rfkill *rfkill = to_rfkill(dev); + mutex_lock(&rfkill_global_mutex); + rfkill_sync(rfkill); + mutex_unlock(&rfkill_global_mutex); + return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state)); } @@ -805,6 +826,7 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&rfkill_global_mutex); + rfkill_sync(rfkill); rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED); mutex_unlock(&rfkill_global_mutex); @@ -1032,14 +1054,10 @@ static void rfkill_uevent_work(struct work_struct *work) static void rfkill_sync_work(struct work_struct *work) { - struct rfkill *rfkill; - bool cur; - - rfkill = container_of(work, struct rfkill, sync_work); + struct rfkill *rfkill = container_of(work, struct rfkill, sync_work); mutex_lock(&rfkill_global_mutex); - cur = rfkill_global_states[rfkill->type].cur; - rfkill_set_block(rfkill, cur); + rfkill_sync(rfkill); mutex_unlock(&rfkill_global_mutex); } @@ -1087,6 +1105,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) round_jiffies_relative(POLL_INTERVAL)); if (!rfkill->persistent || rfkill_epo_lock_active) { + rfkill->need_sync = true; schedule_work(&rfkill->sync_work); } else { #ifdef CONFIG_RFKILL_INPUT @@ -1171,6 +1190,7 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) goto free; + rfkill_sync(rfkill); rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); list_add_tail(&ev->list, &data->events); } -- cgit From b2abdffb505f7e1bef1a769ba7cbdc819a6fe623 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 8 Sep 2023 14:05:59 +0800 Subject: RDMA/erdma: Fix NULL pointer access in regmr_cmd Fix the crash of regmr_cmd called by erdma_ib_alloc_mr. The reason is that mr->mem.mtt is not initialized but it is accessed in regmr_cmd. The call trace information: BUG: kernel NULL pointer dereference, address: 0000000000000000 <...> RIP: 0010:regmr_cmd+0x170/0x1c0 [erdma] <...> Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x66/0x150 ? do_user_addr_fault+0x61/0x660 ? exc_page_fault+0x65/0x140 ? asm_exc_page_fault+0x22/0x30 ? regmr_cmd+0x170/0x1c0 [erdma] ? preempt_count_add+0x70/0xa0 ? _raw_spin_lock_irqsave+0x19/0x50 ? _raw_spin_unlock_irqrestore+0x1b/0x40 ? erdma_alloc_idx+0x51/0x90 [erdma] erdma_get_dma_mr+0xa3/0x120 [erdma] __ib_alloc_pd+0xeb/0x1c0 [ib_core] Fixes: 7244b4aa4221 ("RDMA/erdma: Refactor the storage structure of MTT entries") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/3d140c1d-524a-4dbe-a51c-aee4f7ecafdb@moroto.mountain/ Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230908060559.80203-1-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 70eaed59a67c..c317947563fb 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -133,8 +133,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { struct erdma_pd *pd = to_epd(mr->ibmr.pd); + u32 mtt_level = ERDMA_MR_MTT_0LEVEL; struct erdma_cmdq_reg_mr_req req; - u32 mtt_level; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); @@ -147,10 +147,9 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); mtt_level = mr->mem.mtt->level; } - } else { + } else if (mr->type != ERDMA_MR_TYPE_DMA) { memcpy(req.phy_addr, mr->mem.mtt->buf, MTT_SIZE(mr->mem.page_cnt)); - mtt_level = ERDMA_MR_MTT_0LEVEL; } req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | -- cgit From cce7fc8b29961b64fadb1ce398dc5ff32a79643b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Sep 2023 01:25:55 +0300 Subject: serial: 8250_port: Check IRQ data before use In case the leaf driver wants to use IRQ polling (irq = 0) and IIR register shows that an interrupt happened in the 8250 hardware the IRQ data can be NULL. In such a case we need to skip the wake event as we came to this path from the timer interrupt and quite likely system is already awake. Without this fix we have got an Oops: serial8250: ttyS0 at I/O 0x3f8 (irq = 0, base_baud = 115200) is a 16550A ... BUG: kernel NULL pointer dereference, address: 0000000000000010 RIP: 0010:serial8250_handle_irq+0x7c/0x240 Call Trace: ? serial8250_handle_irq+0x7c/0x240 ? __pfx_serial8250_timeout+0x10/0x10 Fixes: 0ba9e3a13c6a ("serial: 8250: Add missing wakeup event reporting") Cc: stable Signed-off-by: Andy Shevchenko Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230831222555.614426-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fb891b67968f..141627370aab 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1936,7 +1936,10 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) skip_rx = true; if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) { - if (irqd_is_wakeup_set(irq_get_irq_data(port->irq))) + struct irq_data *d; + + d = irq_get_irq_data(port->irq); + if (d && irqd_is_wakeup_set(d)) pm_wakeup_event(tport->tty->dev, 0); if (!up->dma || handle_rx_dma(up, iir)) status = serial8250_rx_chars(up, status); -- cgit From 29346e217b8ab8a52889b88f00b268278d6b7668 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Sep 2023 07:15:07 +0200 Subject: Revert "tty: n_gsm: fix UAF in gsm_cleanup_mux" This reverts commit 9b9c8195f3f0d74a826077fc1c01b9ee74907239. The commit above is reverted as it did not solve the original issue. gsm_cleanup_mux() tries to free up the virtual ttys by calling gsm_dlci_release() for each available DLCI. There, dlci_put() is called to decrease the reference counter for the DLCI via tty_port_put() which finally calls gsm_dlci_free(). This already clears the pointer which is being checked in gsm_cleanup_mux() before calling gsm_dlci_release(). Therefore, it is not necessary to clear this pointer in gsm_cleanup_mux() as done in the reverted commit. The commit introduces a null pointer dereference: ? __die+0x1f/0x70 ? page_fault_oops+0x156/0x420 ? search_exception_tables+0x37/0x50 ? fixup_exception+0x21/0x310 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? tty_port_put+0x19/0xa0 gsmtty_cleanup+0x29/0x80 [n_gsm] release_one_tty+0x37/0xe0 process_one_work+0x1e6/0x3e0 worker_thread+0x4c/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe1/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The actual issue is that nothing guards dlci_put() from being called multiple times while the tty driver was triggered but did not yet finished calling gsm_dlci_free(). Fixes: 9b9c8195f3f0 ("tty: n_gsm: fix UAF in gsm_cleanup_mux") Cc: stable Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20230914051507.3240-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index b3550ff9c494..1f3aba607cd5 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -3097,10 +3097,8 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) gsm->has_devices = false; } for (i = NUM_DLCI - 1; i >= 0; i--) - if (gsm->dlci[i]) { + if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); - gsm->dlci[i] = NULL; - } mutex_unlock(&gsm->mutex); /* Now wipe the queues */ tty_ldisc_flush(gsm->tty); -- cgit From 18126c767658ae8a831257c6cb7776c5ba5e7249 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Sep 2023 15:18:06 +0300 Subject: RDMA/cma: Fix truncation compilation warning in make_cma_ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following compilation error is false alarm as RDMA devices don't have such large amount of ports to actually cause to format truncation. drivers/infiniband/core/cma_configfs.c: In function ‘make_cma_ports’: drivers/infiniband/core/cma_configfs.c:223:57: error: ‘snprintf’ output may be truncated before the last format character [-Werror=format-truncation=] 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^ drivers/infiniband/core/cma_configfs.c:223:17: note: ‘snprintf’ output between 2 and 11 bytes into a destination of size 10 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make[5]: *** [scripts/Makefile.build:243: drivers/infiniband/core/cma_configfs.o] Error 1 Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Link: https://lore.kernel.org/r/a7e3b347ee134167fa6a3787c56ef231a04bc8c2.1694434639.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7b68b3ea979f..f2fb2d8a6597 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -217,7 +217,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, return -ENOMEM; for (i = 0; i < ports_num; i++) { - char port_str[10]; + char port_str[11]; ports[i].port_num = i + 1; snprintf(port_str, sizeof(port_str), "%u", i + 1); -- cgit From bb6c4507fe825f1b4904fc3ffd329ab196c5e645 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Sep 2023 22:52:53 +0200 Subject: drm: fix up fbdev Kconfig defaults As a result of the recent Kconfig reworks, the default settings for the framebuffer interfaces changed in unexpected ways: Configurations that leave CONFIG_FB disabled but use DRM now get DRM_FBDEV_EMULATION by default. This also turns on the deprecated /dev/fb device nodes for machines that don't actually want it. In turn, configurations that previously had DRM_FBDEV_EMULATION enabled now only get the /dev/fb front-end but not the more useful framebuffer console, which is not selected any more. We had previously decided that any combination of the three frontends (FB_DEVICE, FRAMEBUFFER_CONSOLE and LOGO) should be selectable, but the new default settings mean that a lot of defconfig files would have to get adapted. Change the defaults back to what they were in Linux 6.5: - Leave DRM_FBDEV_EMULATION turned off unless CONFIG_FB is enabled. Previously this was a hard dependency but now the two are independent. However, configurations that enable CONFIG_FB probably also want to keep the emulation for DRM, while those without FB presumably did that intentionally in the past. - Leave FB_DEVICE turned off for FB=n. Following the same logic, the deprecated option should not automatically get enabled here, most users that had FB turned off in the past do not want it, even if they want the console - Turn the FRAMEBUFFER_CONSOLE option on if DRM_FBDEV_EMULATION is set to avoid having to change defconfig files that relied on it being selected unconditionally in the past. This also makes sense since both LOGO and FB_DEVICE are now disabled by default for builds without CONFIG_FB, but DRM_FBDEV_EMULATION would make no sense if all three are disabled. Fixes: a5ae331edb02b ("drm: Drop select FRAMEBUFFER_CONSOLE for DRM_FBDEV_EMULATION") Fixes: 701d2054fa317 ("fbdev: Make support for userspace interfaces configurable") Reported-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Reviewed-by: Javier Martinez Canillas Reviewed-by: Geert Uytterhoeven Acked-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230911205338.2385278-1-arnd@kernel.org --- drivers/gpu/drm/Kconfig | 2 +- drivers/video/console/Kconfig | 1 + drivers/video/fbdev/core/Kconfig | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index ab9ef1c20349..3caa020391c7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -136,7 +136,7 @@ config DRM_FBDEV_EMULATION bool "Enable legacy fbdev support for your modesetting driver" depends on DRM select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE - default y + default FB help Choose this option if you have a need for the legacy fbdev support. Note that this support also provides the linux console diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 1b5a319971ed..30577b1d3de5 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -73,6 +73,7 @@ config DUMMY_CONSOLE_ROWS config FRAMEBUFFER_CONSOLE bool "Framebuffer Console support" depends on FB_CORE && !UML + default DRM_FBDEV_EMULATION select VT_HW_CONSOLE_BINDING select CRC32 select FONT_SUPPORT diff --git a/drivers/video/fbdev/core/Kconfig b/drivers/video/fbdev/core/Kconfig index baf7e852c75b..5ac1b0637531 100644 --- a/drivers/video/fbdev/core/Kconfig +++ b/drivers/video/fbdev/core/Kconfig @@ -28,7 +28,7 @@ config FIRMWARE_EDID config FB_DEVICE bool "Provide legacy /dev/fb* device" depends on FB_CORE - default y + default FB help Say Y here if you want the legacy /dev/fb* device file and interfaces within sysfs anc procfs. It is only required if you -- cgit From eb6c97647be227822c7ce23655482b05e348fba5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 11 Sep 2023 12:57:04 +0100 Subject: iommu/arm-smmu-v3: Avoid constructing invalid range commands Although io-pgtable's non-leaf invalidations are always for full tables, I missed that SVA also uses non-leaf invalidations, while being at the mercy of whatever range the MMU notifier throws at it. This means it definitely wants the previous TTL fix as well, since it also doesn't know exactly which leaf level(s) may need invalidating, but it can also give us less-aligned ranges wherein certain corners may lead to building an invalid command where TTL, Num and Scale are all 0. It should be fine to handle this by over-invalidating an extra page, since falling back to a non-range command opens up a whole can of errata-flavoured worms. Fixes: 6833b8f2e199 ("iommu/arm-smmu-v3: Set TTL invalidation hint better") Reported-by: Rui Zhu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b99cfe71af2bd93a8a2930f20967fb2a4f7748dd.1694432734.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e82bf1c449a3..bd0a596f9863 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1895,18 +1895,23 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, /* Get the leaf page size */ tg = __ffs(smmu_domain->domain.pgsize_bitmap); + num_pages = size >> tg; + /* Convert page size of 12,14,16 (log2) to 1,2,3 */ cmd->tlbi.tg = (tg - 10) / 2; /* - * Determine what level the granule is at. For non-leaf, io-pgtable - * assumes .tlb_flush_walk can invalidate multiple levels at once, - * so ignore the nominal last-level granule and leave TTL=0. + * Determine what level the granule is at. For non-leaf, both + * io-pgtable and SVA pass a nominal last-level granule because + * they don't know what level(s) actually apply, so ignore that + * and leave TTL=0. However for various errata reasons we still + * want to use a range command, so avoid the SVA corner case + * where both scale and num could be 0 as well. */ if (cmd->tlbi.leaf) cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); - - num_pages = size >> tg; + else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1) + num_pages++; } cmds.num = 0; -- cgit From 938ba2f252a5dd260429ffbe71d147149de9f068 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 12 Sep 2023 15:31:42 +0200 Subject: dt-bindings: arm-smmu: Fix SDM630 clocks description SDM630 was abusingly referencing one of the internal bus clocks, that were recently dropped from Linux (because the original implementation did not make much sense), circumventing the interconnect framework. Fix it by dropping the bus-mm clock (which requires separating 630 from similar entries) and keeping the rest as-is. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230721-topic-rpm_clk_cleanup-v2-4-1e506593b1bd@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index cf29ab10501c..b1b2cf81b42f 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -270,6 +270,7 @@ allOf: contains: enum: - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 then: anyOf: - properties: @@ -311,7 +312,6 @@ allOf: compatible: contains: enum: - - qcom,sdm630-smmu-v2 - qcom,sm6375-smmu-v2 then: anyOf: -- cgit From 215b215d1e9278765c32af29515e8cdf679d47a3 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 15 Sep 2023 20:24:21 +0000 Subject: MAINTAINERS: Use wildcard pattern for ARM PMU headers Looks like arm_pmuv3.h isn't caught by the ARM PMU maintainers entry. Fix it with a wildcard. Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20230915202421.2706446-1-oliver.upton@linux.dev Signed-off-by: Will Deacon --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..5b1a9f9d98b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1662,7 +1662,7 @@ F: arch/arm*/include/asm/perf_event.h F: arch/arm*/kernel/hw_breakpoint.c F: arch/arm*/kernel/perf_* F: drivers/perf/ -F: include/linux/perf/arm_pmu.h +F: include/linux/perf/arm_pmu*.h ARM PORT M: Russell King -- cgit From ea852c17f5382a0a52041cfbd9a4451ae0fa1a38 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:24 +0200 Subject: tsnep: Fix NAPI scheduling According to the NAPI documentation networking/napi.rst, drivers which have to mask interrupts explicitly should use the napi_schedule_prep() and __napi_schedule() calls. No problem seen so far with current implementation. Nevertheless, let's align the implementation with documentation. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index f61bd89734c5..0cdf0de555ed 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -87,8 +87,11 @@ static irqreturn_t tsnep_irq(int irq, void *arg) /* handle TX/RX queue 0 interrupt */ if ((active & adapter->queue[0].irq_mask) != 0) { - tsnep_disable_irq(adapter, adapter->queue[0].irq_mask); - napi_schedule(&adapter->queue[0].napi); + if (napi_schedule_prep(&adapter->queue[0].napi)) { + tsnep_disable_irq(adapter, adapter->queue[0].irq_mask); + /* schedule after masking to avoid races */ + __napi_schedule(&adapter->queue[0].napi); + } } return IRQ_HANDLED; @@ -99,8 +102,11 @@ static irqreturn_t tsnep_irq_txrx(int irq, void *arg) struct tsnep_queue *queue = arg; /* handle TX/RX queue interrupt */ - tsnep_disable_irq(queue->adapter, queue->irq_mask); - napi_schedule(&queue->napi); + if (napi_schedule_prep(&queue->napi)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + /* schedule after masking to avoid races */ + __napi_schedule(&queue->napi); + } return IRQ_HANDLED; } -- cgit From a7f991953d73dd50c4c23b5437c0139960e1fad4 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:25 +0200 Subject: tsnep: Fix ethtool channels According to the NAPI documentation networking/napi.rst, for the ethtool API a channel is a IRQ/NAPI which services queues of a given type. tsnep uses a single IRQ/NAPI instance for every TX/RX queue pair. Therefore, combined channels shall be returned instead of separate tx/rx channels. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_ethtool.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c index 716815dad7d2..65ec1abc9442 100644 --- a/drivers/net/ethernet/engleder/tsnep_ethtool.c +++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c @@ -300,10 +300,8 @@ static void tsnep_ethtool_get_channels(struct net_device *netdev, { struct tsnep_adapter *adapter = netdev_priv(netdev); - ch->max_rx = adapter->num_rx_queues; - ch->max_tx = adapter->num_tx_queues; - ch->rx_count = adapter->num_rx_queues; - ch->tx_count = adapter->num_tx_queues; + ch->max_combined = adapter->num_queues; + ch->combined_count = adapter->num_queues; } static int tsnep_ethtool_get_ts_info(struct net_device *netdev, -- cgit From 46589db3817bd8b523701274885984b5a5dda7d1 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 15 Sep 2023 23:01:26 +0200 Subject: tsnep: Fix NAPI polling with budget 0 According to the NAPI documentation networking/napi.rst, Rx specific APIs like page pool and XDP cannot be used at all when budget is 0. skb Tx processing should happen regardless of the budget. Stop NAPI polling after Tx processing and skip Rx processing if budget is 0. Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 0cdf0de555ed..8b992dc9bb52 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1734,6 +1734,10 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (queue->tx) complete = tsnep_tx_poll(queue->tx, budget); + /* handle case where we are called by netpoll with a budget of 0 */ + if (unlikely(budget <= 0)) + return budget; + if (queue->rx) { done = queue->rx->xsk_pool ? tsnep_rx_poll_zc(queue->rx, napi, budget) : -- cgit From 479965a2b7ec481737df0cadf553331063b9c343 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Tue, 12 Sep 2023 14:34:29 +0100 Subject: arm64: cpufeature: Fix CLRBHB and BC detection ClearBHB support is indicated by the CLRBHB field in ID_AA64ISAR2_EL1. Following some refactoring the kernel incorrectly checks the BC field instead. Fix the detection to use the right field. (Note: The original ClearBHB support had it as FTR_HIGHER_SAFE, but this patch uses FTR_LOWER_SAFE, which seems more correct.) Also fix the detection of BC (hinted conditional branches) to use FTR_LOWER_SAFE, so that it is not reported on mismatched systems. Fixes: 356137e68a9f ("arm64/sysreg: Make BHB clear feature defines match the architecture") Fixes: 8fcc8285c0e3 ("arm64/sysreg: Convert ID_AA64ISAR2_EL1 to automatic generation") Cc: stable@vger.kernel.org Signed-off-by: Kristina Martsenko Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230912133429.2606875-1-kristina.martsenko@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/tools/sysreg | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96e50227f940..5bba39376055 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -663,7 +663,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_EL1_BC_SHIFT); + ID_AA64ISAR2_EL1_CLRBHB_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b018ae12ff5f..444a73c2e638 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,7 +222,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2517ef7c21cf..76ce150e7347 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1347,7 +1347,11 @@ UnsignedEnum 51:48 RPRFM 0b0000 NI 0b0001 IMP EndEnum -Res0 47:28 +Res0 47:32 +UnsignedEnum 31:28 CLRBHB + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 27:24 PAC_frac 0b0000 NI 0b0001 IMP -- cgit From 046b212ac9306c9046ab30cdf679bd40797ce069 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 14 Sep 2023 11:11:31 +0100 Subject: arm64/sme: Include ID_AA64PFR1_EL1.SME in cpu-feature-registers.rst We expose ID_AA64PFR1_EL1.SME to userspace but do not document this in cpu-feature-registers.rst. Add it. Reported-by: Peter Maydell Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230914-arm64-pfr1-sme-doc-v1-1-b6c497d10d77@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/cpu-feature-registers.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst index 4e4625f2455f..5e9ef91f5e36 100644 --- a/Documentation/arch/arm64/cpu-feature-registers.rst +++ b/Documentation/arch/arm64/cpu-feature-registers.rst @@ -175,6 +175,8 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | SME | [27-24] | y | + +------------------------------+---------+---------+ | MTE | [11-8] | y | +------------------------------+---------+---------+ | SSBS | [7-4] | y | -- cgit From 5ad361f42fe43e5f13f9b88341e75eaf2d1bd183 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 14 Sep 2023 11:09:29 +0100 Subject: arm64/hbc: Document HWCAP2_HBC When we added support for FEAT_HBC we added a new hwcap but did not document that we had done so, add the documentation. Signed-off-by: Mark Brown Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20230914-arm64-feat-hbc-doc-v1-1-797d25f06897@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/elf_hwcaps.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 8c8addb4194c..76ff9d7398fd 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -305,6 +305,9 @@ HWCAP2_SMEF16F16 HWCAP2_MOPS Functionality implied by ID_AA64ISAR2_EL1.MOPS == 0b0001. +HWCAP2_HBC + Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001. + 4. Unused AT_HWCAP bits ----------------------- -- cgit From 2816a09678f50fc6a69e742e90cb1fd7a9f1f9ff Mon Sep 17 00:00:00 2001 From: Luís Henriques Date: Fri, 8 Sep 2023 12:20:20 +0100 Subject: ceph: remove unnecessary check for NULL in parse_longname() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function ceph_get_inode() never returns NULL; instead it returns an ERR_PTR() if something fails. Thus, the check for NULL in parse_longname() is useless and can be dropped. Instead, move there the debug code that does the error checking so that it's only executed if ceph_get_inode() is called. Fixes: dd66df0053ef ("ceph: add support for encrypted snapshot names") Reported-by: Dan Carpenter Signed-off-by: Luís Henriques Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/crypto.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index e4d5cd56a80b..e1f31b86fd48 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -249,11 +249,9 @@ static struct inode *parse_longname(const struct inode *parent, if (!dir) { /* This can happen if we're not mounting cephfs on the root */ dir = ceph_get_inode(parent->i_sb, vino, NULL); - if (!dir) - dir = ERR_PTR(-ENOENT); + if (IS_ERR(dir)) + dout("Can't find inode %s (%s)\n", inode_number, name); } - if (IS_ERR(dir)) - dout("Can't find inode %s (%s)\n", inode_number, name); out: kfree(inode_number); -- cgit From d57125b55a292a8e74a1fb17182576a3b2b2e795 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Sep 2023 10:44:08 +0200 Subject: Revert "ceph: make members in struct ceph_mds_request_args_ext a union" This reverts commit 3af5ae22030cb59fab4fba35f5a2b62f47e14df9. ceph_mds_request_args_ext was already (and remains to be) a union. An additional anonymous union inside is bogus: union ceph_mds_request_args_ext { union { union ceph_mds_request_args old; struct { ... } __attribute__ ((packed)) setattr_ext; }; } Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li --- include/linux/ceph/ceph_fs.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f2301ee88bc..f3b3593254b9 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -467,19 +467,17 @@ union ceph_mds_request_args { } __attribute__ ((packed)); union ceph_mds_request_args_ext { - union { - union ceph_mds_request_args old; - struct { - __le32 mode; - __le32 uid; - __le32 gid; - struct ceph_timespec mtime; - struct ceph_timespec atime; - __le64 size, old_size; /* old_size needed by truncate */ - __le32 mask; /* CEPH_SETATTR_* */ - struct ceph_timespec btime; - } __attribute__ ((packed)) setattr_ext; - }; + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; }; #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -- cgit From b2eb3e67ee68dee9c0555466dfa8d7f0ffcc00db Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Fri, 15 Sep 2023 01:25:27 +0300 Subject: ACPI: processor: Fix uninitialized access of buf in acpi_set_pdc_bits() A bug was introduced during unification of setting CAP_SMP_T_SWCOORD for the _PDC and _OSC methods. The third u32 in the buffer is never cleared before setting bits on it. The memory is not guaranteed to be zero as it was allocated by kmalloc() instead of kzalloc(). Fix this by initializing the third u32 in the buffer to 0. Fixes: b9e8d0168a7a ("ACPI: processor: Set CAP_SMP_T_SWCOORD in arch_acpi_set_proc_cap_bits()") Signed-off-by: Michal Wilczynski [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_pdc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 1a8591e9a9bf..994091bd52de 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -19,6 +19,7 @@ static void acpi_set_pdc_bits(u32 *buf) { buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; + buf[2] = 0; /* Twiddle arch-specific bits needed for _PDC */ arch_acpi_set_proc_cap_bits(&buf[2]); -- cgit From 44a5b6b5c7fee5146572b4c57f0d9d9c398d1033 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 14 Sep 2023 11:09:30 +0100 Subject: arm64: Document missing userspace visible fields in ID_AA64ISAR2_EL1 We have exposed a number of fields in ID_AA64ISAR2_EL1 to userspace without adding the matching documentation in cpu-feature-registers.rst, update it to match the implementation. Signed-off-by: Mark Brown Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20230914-arm64-feat-hbc-doc-v1-2-797d25f06897@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/cpu-feature-registers.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst index 5e9ef91f5e36..de6d8a4790e2 100644 --- a/Documentation/arch/arm64/cpu-feature-registers.rst +++ b/Documentation/arch/arm64/cpu-feature-registers.rst @@ -290,8 +290,18 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | CSSC | [55-52] | y | + +------------------------------+---------+---------+ + | RPRFM | [51-48] | y | + +------------------------------+---------+---------+ + | BC | [23-20] | y | + +------------------------------+---------+---------+ | MOPS | [19-16] | y | +------------------------------+---------+---------+ + | APA3 | [15-12] | y | + +------------------------------+---------+---------+ + | GPA3 | [11-8] | y | + +------------------------------+---------+---------+ | RPRES | [7-4] | y | +------------------------------+---------+---------+ | WFXT | [3-0] | y | -- cgit From ea3105672c68a5b6d7368504067220682ee6c65c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Sep 2023 20:35:33 +0200 Subject: thermal: sysfs: Fix trip_point_hyst_store() After commit 2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip() function") updating a trip point temperature doesn't actually work, because the value supplied by user space is subsequently overwritten with the current trip point hysteresis value. Fix this by changing the code to parse the number string supplied by user space after retrieving the current trip point data from the thermal zone. Also drop a redundant tab character from the code in question. Fixes: 2e38a2a981b2 ("thermal/core: Add a generic thermal_zone_set_trip() function") Signed-off-by: Rafael J. Wysocki Cc: 6.3+ # 6.3+ --- drivers/thermal/thermal_sysfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 6c20c9f90a05..4e6a97db894e 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -185,9 +185,6 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip_id) != 1) return -EINVAL; - if (kstrtoint(buf, 10, &trip.hysteresis)) - return -EINVAL; - mutex_lock(&tz->lock); if (!device_is_registered(dev)) { @@ -198,7 +195,11 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, ret = __thermal_zone_get_trip(tz, trip_id, &trip); if (ret) goto unlock; - + + ret = kstrtoint(buf, 10, &trip.hysteresis); + if (ret) + goto unlock; + ret = thermal_zone_set_trip(tz, trip_id, &trip); unlock: mutex_unlock(&tz->lock); -- cgit From 6bec041147a2a64a490d1f813e8a004443061b38 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:45 +0200 Subject: mptcp: fix bogus receive window shrinkage with multiple subflows In case multiple subflows race to update the mptcp-level receive window, the subflow losing the race should use the window value provided by the "winning" subflow to update it's own tcp-level rcv_wnd. To such goal, the current code bogusly uses the mptcp-level rcv_wnd value as observed before the update attempt. On unlucky circumstances that may lead to TCP-level window shrinkage, and stall the other end. Address the issue feeding to the rcv wnd update the correct value. Fixes: f3589be0c420 ("mptcp: never shrink offered window") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/427 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/options.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c254accb14de..cd15ec73073e 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) if (rcv_wnd == rcv_wnd_old) break; - if (before64(rcv_wnd_new, rcv_wnd)) { + + rcv_wnd_old = rcv_wnd; + if (before64(rcv_wnd_new, rcv_wnd_old)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); goto raise_win; } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); - rcv_wnd_old = rcv_wnd; } return; } -- cgit From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++ net/mptcp/subflow.c | 36 ------------------------------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; -- cgit From 9f1a98813b4b686482e5ef3c9d998581cace0ba6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:47 +0200 Subject: mptcp: process pending subflow error on close On incoming TCP reset, subflow closing could happen before error propagation. That in turn could cause the socket error being ignored, and a missing socket state transition, as reported by Daire-Byrne. Address the issues explicitly checking for subflow socket error at close time. To avoid code duplication, factor-out of __mptcp_error_report() a new helper implementing the relevant bits. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429 Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 63 ++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 915860027b1a..1c96b8da71df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,40 +770,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } -void __mptcp_error_report(struct sock *sk) +static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) { - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); + int err = sock_error(ssk); + int ssk_state; - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; + if (!err) + return false; - if (!err) - continue; + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) + return false; - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + return true; +} + +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) + if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) + break; } /* In most cases we will be able to lock the mptcp socket. If its already @@ -2428,6 +2432,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } out_release: + __mptcp_subflow_error_report(sk, ssk); release_sock(ssk); sock_put(ssk); -- cgit From f6909dc1c1f4452879278128012da6c76bc186a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:48 +0200 Subject: mptcp: rename timer related helper to less confusing names The msk socket uses to different timeout to track close related events and retransmissions. The existing helpers do not indicate clearly which timer they actually touch, making the related code quite confusing. Change the existing helpers name to avoid such confusion. No functional change intended. This patch is linked to the next one ("mptcp: fix dangling connection hang-up"). The two patches are supposed to be backported together. Cc: stable@vger.kernel.org # v5.11+ Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 42 +++++++++++++++++++++--------------------- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c96b8da71df..c8f38f303a90 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -405,7 +405,7 @@ drop: return false; } -static void mptcp_stop_timer(struct sock *sk) +static void mptcp_stop_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -911,12 +911,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list } } -static bool mptcp_timer_pending(struct sock *sk) +static bool mptcp_rtx_timer_pending(struct sock *sk) { return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); } -static void mptcp_reset_timer(struct sock *sk) +static void mptcp_reset_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned long tout; @@ -1050,10 +1050,10 @@ static void __mptcp_clean_una(struct sock *sk) out: if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { - if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) - mptcp_stop_timer(sk); + if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) + mptcp_stop_rtx_timer(sk); } else { - mptcp_reset_timer(sk); + mptcp_reset_rtx_timer(sk); } } @@ -1626,8 +1626,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(ssk, &info); /* ensure the rtx timer is running */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (do_check_data_fin) mptcp_check_send_data_fin(sk); } @@ -1690,8 +1690,8 @@ out: if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) @@ -2260,7 +2260,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static void mptcp_timeout_timer(struct timer_list *t) +static void mptcp_tout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2629,14 +2629,14 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } /* schedule the timeout timer for the relevant event: either close timeout * or mp_fail timeout. The close timeout takes precedence on the mp_fail one */ -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) { struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; @@ -2669,7 +2669,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2758,7 +2758,7 @@ static void __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); - timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); } static void mptcp_ca_reset(struct sock *sk) @@ -2849,8 +2849,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } break; } @@ -2933,7 +2933,7 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; mptcp_release_sched(msk); @@ -3053,7 +3053,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } return do_cancel_work; @@ -3116,7 +3116,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); if (msk->token) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7254b3562575..5e2026815c8e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -718,7 +718,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2f40c23fdb0d..433f290984c8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); - mptcp_reset_timeout(msk, subflow->fail_tout); + mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk) -- cgit From 27e5ccc2d5a50ed61bb73153edb1066104b108b3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 16 Sep 2023 12:52:49 +0200 Subject: mptcp: fix dangling connection hang-up According to RFC 8684 section 3.3: A connection is not closed unless [...] or an implementation-specific connection-level send timeout. Currently the MPTCP protocol does not implement such timeout, and connection timing-out at the TCP-level never move to close state. Introduces a catch-up condition at subflow close time to move the MPTCP socket to close, too. That additionally allows removing similar existing inside the worker. Finally, allow some additional timeout for plain ESTABLISHED mptcp sockets, as the protocol allows creating new subflows even at that point and making the connection functional again. This issue is actually present since the beginning, but it is basically impossible to solve without a long chain of functional pre-requisites topped by commit bbd49d114d57 ("mptcp: consolidate transition to TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current patch, please also backport this other commit as well. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430 Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 86 +++++++++++++++++++++++++--------------------------- net/mptcp/protocol.h | 22 ++++++++++++++ net/mptcp/subflow.c | 1 + 3 files changed, 65 insertions(+), 44 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c8f38f303a90..e252539b1e19 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); + mptcp_stop_tout_timer(sk); return true; } @@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, bool dispose_it, need_push = false; /* If the first subflow moved to a close state before accept, e.g. due - * to an incoming reset, mptcp either: - * - if either the subflow or the msk are dead, destroy the context - * (the subflow socket is deleted by inet_child_forget) and the msk - * - otherwise do nothing at the moment and take action at accept and/or - * listener shutdown - user-space must be able to accept() the closed - * socket. + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't + * survive too. */ - if (msk->in_accept_queue && msk->first == ssk) { - if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) - return; - + if (msk->in_accept_queue && msk->first == ssk && + (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); @@ -2443,6 +2440,22 @@ out_release: out: if (need_push) __mptcp_push_pending(sk, 0); + + /* Catch every 'all subflows closed' scenario, including peers silently + * closing them, e.g. due to timeout. + * For established sockets, allow an additional timeout before closing, + * as the protocol can still create more subflows. + */ + if (list_is_singular(&msk->conn_list) && msk->first && + inet_sk_state_load(msk->first) == TCP_CLOSE) { + if (sk->sk_state != TCP_ESTABLISHED || + msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else { + mptcp_start_tout_timer(sk); + } + } } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk) } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk) { - s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; - struct mptcp_subflow_context *subflow; - - if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) - return true; + if (!inet_csk(sk)->icsk_mtup.probe_timestamp || + sk->sk_state == TCP_CLOSE) + return false; - /* if all subflows are in closed status don't bother with additional - * timeout - */ - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { - if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != - TCP_CLOSE) - return false; - } - return true; + return time_after32(tcp_jiffies32, + inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; - if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + + TCP_TIMEWAIT_LEN; /* the close timeout takes precedence on the fail one, and here at least one of * them is active */ - timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; sk_reset_timer(sk, &sk->sk_timer, timeout); } @@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_subflow_reset(ssk); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - - mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(sk); - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) - mptcp_do_fastclose(sk); + if (mptcp_close_tout_expired(sk)) { + mptcp_do_fastclose(sk); + mptcp_close_wake_up(sk); + } - if (sk->sk_state == TCP_CLOSE) { - __mptcp_destroy_sock(sk); - goto unlock; - } + if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; } if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ - inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); @@ -3053,7 +3051,7 @@ cleanup: __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_tout_timer(msk, 0); + mptcp_start_tout_timer(sk); } return do_cancel_work; @@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); - sk_stop_timer(sk, &sk->sk_timer); + mptcp_stop_tout_timer(sk); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5e2026815c8e..ed61d6850cce 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + +static inline void mptcp_stop_tout_timer(struct sock *sk) +{ + if (!inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + + sk_stop_timer(sk, &sk->sk_timer); + inet_csk(sk)->icsk_mtup.probe_timestamp = 0; +} + +static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) +{ + /* avoid 0 timestamp, as that means no close timeout */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; +} + +static inline void mptcp_start_tout_timer(struct sock *sk) +{ + mptcp_set_close_tout(sk, tcp_jiffies32); + mptcp_reset_tout_timer(mptcp_sk(sk), 0); +} + static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 433f290984c8..918c1a235790 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink: -- cgit From 418f438a2db67503fe00cef5bbd64fd1f891e145 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:29:58 +0000 Subject: Documentation: netdev: fix dead link in ax25.rst http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Update the documentation to point there instead. And acknowledge that while the linux-hams list isn't entirely dead, it isn't what most would call 'active'. Remove that word. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- Documentation/networking/ax25.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst index f060cfb1445a..605e72c6c877 100644 --- a/Documentation/networking/ax25.rst +++ b/Documentation/networking/ax25.rst @@ -7,9 +7,9 @@ AX.25 To use the amateur radio protocols within Linux you will need to get a suitable copy of the AX.25 Utilities. More detailed information about AX.25, NET/ROM and ROSE, associated programs and utilities can be -found on http://www.linux-ax25.org. +found on https://linux-ax25.in-berlin.de. -There is an active mailing list for discussing Linux amateur radio matters +There is a mailing list for discussing Linux amateur radio matters called linux-hams@vger.kernel.org. To subscribe to it, send a message to majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body of the message, the subject field is ignored. You don't need to be -- cgit From 1943f2b0ac5a9fde718c18c1f4ab8332c8b5cd60 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:30:10 +0000 Subject: MAINTAINERS: Update link for linux-ax25.org http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Update all links to the new URL. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bf0f54c24f81..c155eb535906 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3344,7 +3344,7 @@ AX.25 NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/ax25.h F: include/uapi/linux/ax25.h F: net/ax25/ @@ -14756,7 +14756,7 @@ NETROM NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/netrom.h F: include/uapi/linux/netrom.h F: net/netrom/ @@ -18607,7 +18607,7 @@ ROSE NETWORK LAYER M: Ralf Baechle L: linux-hams@vger.kernel.org S: Maintained -W: http://www.linux-ax25.org/ +W: https://linux-ax25.in-berlin.de F: include/net/rose.h F: include/uapi/linux/rose.h F: net/rose/ -- cgit From 71273c46a34823e54af7828df67e5983ba85d6c2 Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Sun, 17 Sep 2023 15:30:21 +0000 Subject: ax25: Kconfig: Update link for linux-ax25.org http://linux-ax25.org has been down for nearly a year. Its official replacement is https://linux-ax25.in-berlin.de. Change all references to the old site in the ax25 Kconfig to its replacement. Link: https://marc.info/?m=166792551600315 Signed-off-by: Peter Lafreniere Signed-off-by: David S. Miller --- net/ax25/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig index d3a9843a043d..fdb666607f10 100644 --- a/net/ax25/Kconfig +++ b/net/ax25/Kconfig @@ -10,7 +10,7 @@ menuconfig HAMRADIO If you want to connect your Linux box to an amateur radio, answer Y here. You want to read and more specifically about AX.25 on Linux - . + . Note that the answer to this question won't directly affect the kernel: saying N will just cause the configurator to skip all @@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE configuration. Linux cannot yet act as a DAMA server. This option only compiles DAMA slave support into the kernel. It still needs to be enabled at runtime. For more about DAMA see - . If unsure, say Y. + . If unsure, say Y. # placeholder until implemented config AX25_DAMA_MASTER @@ -87,9 +87,9 @@ config NETROM A comprehensive listing of all the software for Linux amateur radio users as well as information about how to configure an AX.25 port is contained in the Linux Ham Wiki, available from - . You also might want to check out the - file . More information about - digital amateur radio in general is on the WWW at + . You also might want to check out + the file . More information + about digital amateur radio in general is on the WWW at . To compile this driver as a module, choose M here: the @@ -106,9 +106,9 @@ config ROSE A comprehensive listing of all the software for Linux amateur radio users as well as information about how to configure an AX.25 port is contained in the Linux Ham Wiki, available from - . You also might want to check out the - file . More information about - digital amateur radio in general is on the WWW at + . You also might want to check out + the file . More information + about digital amateur radio in general is on the WWW at . To compile this driver as a module, choose M here: the -- cgit From 5f66db08cbd3ca471c66bacb0282902c79db9274 Mon Sep 17 00:00:00 2001 From: Stefan Moring Date: Sun, 17 Sep 2023 18:40:37 +0200 Subject: spi: imx: Take in account bits per word instead of assuming 8-bits The IMX spi driver has a hardcoded 8, breaking the driver for word lengths other than 8. Signed-off-by: Stefan Moring Reported-by: Sebastian Reichel Fixes: 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") Tested-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230917164037.29284-1-stefanmoring@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index a8a74c7cb79f..498e35c8db2c 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -662,7 +662,7 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, if (spi_imx->count >= 512) ctrl |= 0xFFF << MX51_ECSPI_CTRL_BL_OFFSET; else - ctrl |= (spi_imx->count*8 - 1) + ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) << MX51_ECSPI_CTRL_BL_OFFSET; } -- cgit From e0b4ab3bb92bda8d12f55842614362989d5b2cb3 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Sep 2023 14:27:19 -0700 Subject: platform/x86: intel_scu_ipc: Check status after timeout in busy_loop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible for the polling loop in busy_loop() to get scheduled away for a long time. status = ipc_read_status(scu); // status = IPC_STATUS_BUSY if (!(status & IPC_STATUS_BUSY)) If this happens, then the status bit could change while the task is scheduled away and this function would never read the status again after timing out. Instead, the function will return -ETIMEDOUT when it's possible that scheduling didn't work out and the status bit was cleared. Bit polling code should always check the bit being polled one more time after the timeout in case this happens. Fix this by reading the status once more after the while loop breaks. The readl_poll_timeout() macro implements all of this, and it is shorter, so use that macro here to consolidate code and fix this. There were some concerns with using readl_poll_timeout() because it uses timekeeping, and timekeeping isn't running early on or during the late stages of system suspend or early stages of system resume, but an audit of the code concluded that this code isn't called during those times so it is safe to use the macro. Cc: Prashant Malani Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Reviewed-by: Kuppuswamy Sathyanarayanan Fixes: e7b7ab3847c9 ("platform/x86: intel_scu_ipc: Sleeping is fine when polling") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20230913212723.3055315-2-swboyd@chromium.org Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 6851d10d6582..4c774ee8bb1b 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -231,19 +232,15 @@ static inline u32 ipc_data_readl(struct intel_scu_ipc_dev *scu, u32 offset) /* Wait till scu status is busy */ static inline int busy_loop(struct intel_scu_ipc_dev *scu) { - unsigned long end = jiffies + IPC_TIMEOUT; - - do { - u32 status; - - status = ipc_read_status(scu); - if (!(status & IPC_STATUS_BUSY)) - return (status & IPC_STATUS_ERR) ? -EIO : 0; + u8 status; + int err; - usleep_range(50, 100); - } while (time_before(jiffies, end)); + err = readx_poll_timeout(ipc_read_status, scu, status, !(status & IPC_STATUS_BUSY), + 100, jiffies_to_usecs(IPC_TIMEOUT)); + if (err) + return err; - return -ETIMEDOUT; + return (status & IPC_STATUS_ERR) ? -EIO : 0; } /* Wait till ipc ioc interrupt is received or timeout in 10 HZ */ -- cgit From 427fada620733e6474d783ae6037a66eae42bf8c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Sep 2023 14:27:20 -0700 Subject: platform/x86: intel_scu_ipc: Check status upon timeout in ipc_wait_for_interrupt() It's possible for the completion in ipc_wait_for_interrupt() to timeout, simply because the interrupt was delayed in being processed. A timeout in itself is not an error. This driver should check the status register upon a timeout to ensure that scheduling or interrupt processing delays don't affect the outcome of the IPC return value. CPU0 SCU ---- --- ipc_wait_for_interrupt() wait_for_completion_timeout(&scu->cmd_complete) [TIMEOUT] status[IPC_STATUS_BUSY]=0 Fix this problem by reading the status bit in all cases, regardless of the timeout. If the completion times out, we'll assume the problem was that the IPC_STATUS_BUSY bit was still set, but if the status bit is cleared in the meantime we know that we hit some scheduling delay and we should just check the error bit. Cc: Prashant Malani Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Fixes: ed12f295bfd5 ("ipc: Added support for IPC interrupt mode") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20230913212723.3055315-3-swboyd@chromium.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 4c774ee8bb1b..299c15312acb 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -248,10 +248,12 @@ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu) { int status; - if (!wait_for_completion_timeout(&scu->cmd_complete, IPC_TIMEOUT)) - return -ETIMEDOUT; + wait_for_completion_timeout(&scu->cmd_complete, IPC_TIMEOUT); status = ipc_read_status(scu); + if (status & IPC_STATUS_BUSY) + return -ETIMEDOUT; + if (status & IPC_STATUS_ERR) return -EIO; -- cgit From efce78584e583226e9a1f6cb2fb555d6ff47c3e7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Sep 2023 14:27:21 -0700 Subject: platform/x86: intel_scu_ipc: Don't override scu in intel_scu_ipc_dev_simple_command() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Andy discovered this bug during patch review. The 'scu' argument to this function shouldn't be overridden by the function itself. It doesn't make any sense. Looking at the commit history, we see that commit f57fa18583f5 ("platform/x86: intel_scu_ipc: Introduce new SCU IPC API") removed the setting of the scu to ipcdev in other functions, but not this one. That was an oversight. Remove this line so that we stop overriding the scu instance that is used by this function. Reported-by: Andy Shevchenko Closes: https://lore.kernel.org/r/ZPjdZ3xNmBEBvNiS@smile.fi.intel.com Cc: Prashant Malani Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Fixes: f57fa18583f5 ("platform/x86: intel_scu_ipc: Introduce new SCU IPC API") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20230913212723.3055315-4-swboyd@chromium.org Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 299c15312acb..3271f81a9c00 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -443,7 +443,6 @@ int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, mutex_unlock(&ipclock); return -ENODEV; } - scu = ipcdev; cmdval = sub << 12 | cmd; ipc_command(scu, cmdval); err = intel_scu_ipc_check_status(scu); -- cgit From 85e654c9f722853a595fa941dca60c157b707b86 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Sep 2023 14:27:22 -0700 Subject: platform/x86: intel_scu_ipc: Fail IPC send if still busy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible for interrupts to get significantly delayed to the point that callers of intel_scu_ipc_dev_command() and friends can call the function once, hit a timeout, and call it again while the interrupt still hasn't been processed. This driver will get seriously confused if the interrupt is finally processed after the second IPC has been sent with ipc_command(). It won't know which IPC has been completed. This could be quite disastrous if calling code assumes something has happened upon return from intel_scu_ipc_dev_simple_command() when it actually hasn't. Let's avoid this scenario by simply returning -EBUSY in this case. Hopefully higher layers will know to back off or fail gracefully when this happens. It's all highly unlikely anyway, but it's better to be correct here as we have no way to know which IPC the status register is telling us about if we send a second IPC while the previous IPC is still processing. Cc: Prashant Malani Cc: Kuppuswamy Sathyanarayanan Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Fixes: ed12f295bfd5 ("ipc: Added support for IPC interrupt mode") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20230913212723.3055315-5-swboyd@chromium.org Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_scu_ipc.c | 40 +++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 3271f81a9c00..a68df4133403 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -265,6 +265,24 @@ static int intel_scu_ipc_check_status(struct intel_scu_ipc_dev *scu) return scu->irq > 0 ? ipc_wait_for_interrupt(scu) : busy_loop(scu); } +static struct intel_scu_ipc_dev *intel_scu_ipc_get(struct intel_scu_ipc_dev *scu) +{ + u8 status; + + if (!scu) + scu = ipcdev; + if (!scu) + return ERR_PTR(-ENODEV); + + status = ipc_read_status(scu); + if (status & IPC_STATUS_BUSY) { + dev_dbg(&scu->dev, "device is busy\n"); + return ERR_PTR(-EBUSY); + } + + return scu; +} + /* Read/Write power control(PMIC in Langwell, MSIC in PenWell) registers */ static int pwr_reg_rdwr(struct intel_scu_ipc_dev *scu, u16 *addr, u8 *data, u32 count, u32 op, u32 id) @@ -278,11 +296,10 @@ static int pwr_reg_rdwr(struct intel_scu_ipc_dev *scu, u16 *addr, u8 *data, memset(cbuf, 0, sizeof(cbuf)); mutex_lock(&ipclock); - if (!scu) - scu = ipcdev; - if (!scu) { + scu = intel_scu_ipc_get(scu); + if (IS_ERR(scu)) { mutex_unlock(&ipclock); - return -ENODEV; + return PTR_ERR(scu); } for (nc = 0; nc < count; nc++, offset += 2) { @@ -437,12 +454,12 @@ int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, int err; mutex_lock(&ipclock); - if (!scu) - scu = ipcdev; - if (!scu) { + scu = intel_scu_ipc_get(scu); + if (IS_ERR(scu)) { mutex_unlock(&ipclock); - return -ENODEV; + return PTR_ERR(scu); } + cmdval = sub << 12 | cmd; ipc_command(scu, cmdval); err = intel_scu_ipc_check_status(scu); @@ -482,11 +499,10 @@ int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, return -EINVAL; mutex_lock(&ipclock); - if (!scu) - scu = ipcdev; - if (!scu) { + scu = intel_scu_ipc_get(scu); + if (IS_ERR(scu)) { mutex_unlock(&ipclock); - return -ENODEV; + return PTR_ERR(scu); } memcpy(inbuf, in, inlen); -- cgit From 81bf4a4e9cb754e957ab614cde6a3b16244d670b Mon Sep 17 00:00:00 2001 From: Dennis Bonke Date: Thu, 14 Sep 2023 15:03:56 +0200 Subject: platform/x86: thinkpad_acpi: Take mutex in hotkey_resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hotkey_status_set expects the hotkey_mutex to be held. It seems like it was missed here and that gives lockdep warnings while resuming. Fixes: 38831eaf7d4c ("platform/x86: thinkpad_acpi: use lockdep annotations") Reviewed-by: Thomas Weißschuh Signed-off-by: Dennis Bonke Link: https://lore.kernel.org/r/20230914130356.235912-1-admin@dennisbonke.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d70c89d32534..41584427dc32 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4116,9 +4116,11 @@ static void hotkey_resume(void) { tpacpi_disable_brightness_delay(); + mutex_lock(&hotkey_mutex); if (hotkey_status_set(true) < 0 || hotkey_mask_set(hotkey_acpi_mask) < 0) pr_err("error while attempting to reset the event firmware interface\n"); + mutex_unlock(&hotkey_mutex); tpacpi_send_radiosw_update(); tpacpi_input_send_tabletsw(); -- cgit From 2e1b3ae3e1f2cf5a3c9c05d5f961d7d4257b489f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 7 Sep 2023 09:16:14 +0200 Subject: wifi: rtw88: rtw8723d: Fix MAC address offset in EEPROM The MAC address is stored at offset 0x107 in the EEPROM, like correctly stated in the comment. Add a two bytes reserved field right before the MAC address to shift it from offset 0x105 to 0x107. With this the MAC address returned from my RTL8723du wifi stick can be correctly decoded as "Shenzhen Four Seas Global Link Network Technology Co., Ltd." Fixes: 87caeef032fc ("wifi: rtw88: Add rtw8723du chipset support") Signed-off-by: Sascha Hauer Reported-by: Yanik Fuchs Cc: stable@vger.kernel.org Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230907071614.2032404-1-s.hauer@pengutronix.de --- drivers/net/wireless/realtek/rtw88/rtw8723d.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h index 3642a2c7f80c..2434e2480cbe 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h @@ -46,6 +46,7 @@ struct rtw8723du_efuse { u8 vender_id[2]; /* 0x100 */ u8 product_id[2]; /* 0x102 */ u8 usb_option; /* 0x104 */ + u8 res5[2]; /* 0x105 */ u8 mac_addr[ETH_ALEN]; /* 0x107 */ }; -- cgit From aef7a0300047e7b4707ea0411dc9597cba108fc8 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 8 Sep 2023 18:41:12 +0800 Subject: wifi: mwifiex: Fix oob check condition in mwifiex_process_rx_packet Only skip the code path trying to access the rfc1042 headers when the buffer is too small, so the driver can still process packets without rfc1042 headers. Fixes: 119585281617 ("wifi: mwifiex: Fix OOB and integer underflow when rx packets") Signed-off-by: Pin-yen Lin Acked-by: Brian Norris Reviewed-by: Matthew Wang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230908104308.1546501-1-treapking@chromium.org --- drivers/net/wireless/marvell/mwifiex/sta_rx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c index 65420ad67416..257737137cd7 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c @@ -86,7 +86,8 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length); rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off; - if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) { + if (sizeof(rx_pkt_hdr->eth803_hdr) + sizeof(rfc1042_header) + + rx_pkt_off > skb->len) { mwifiex_dbg(priv->adapter, ERROR, "wrong rx packet offset: len=%d, rx_pkt_off=%d\n", skb->len, rx_pkt_off); @@ -95,12 +96,13 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, return -1; } - if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, - sizeof(bridge_tunnel_header))) || - (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, - sizeof(rfc1042_header)) && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX)) { + if (sizeof(*rx_pkt_hdr) + rx_pkt_off <= skb->len && + ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, + sizeof(bridge_tunnel_header))) || + (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, + sizeof(rfc1042_header)) && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX))) { /* * Replace the 803 header and rfc1042 header (llc/snap) with an * EthernetII header, keep the src/dst and snap_type -- cgit From 4fed494abcd4fde5c24de19160e93814f912fdb3 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Thu, 14 Sep 2023 09:02:27 +0200 Subject: wifi: brcmfmac: Replace 1-element arrays with flexible arrays Since commit 2d47c6956ab3 ("ubsan: Tighten UBSAN_BOUNDS on GCC"), UBSAN_BOUNDS no longer pretends 1-element arrays are unbounded. Walking 'element' and 'channel_list' will trigger warnings, so make them proper flexible arrays. False positive warnings were: UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:6984:20 index 1 is out of range for type '__le32 [1]' UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:1126:27 index 1 is out of range for type '__le16 [1]' for these lines of code: 6884 ch.chspec = (u16)le32_to_cpu(list->element[i]); 1126 params_le->channel_list[i] = cpu_to_le16(chanspec); Cc: stable@vger.kernel.org # 6.5+ Signed-off-by: Juerg Haefliger Reviewed-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230914070227.12028-1-juerg.haefliger@canonical.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index bece26741d3a..611d1a6aabb9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -442,7 +442,12 @@ struct brcmf_scan_params_v2_le { * fixed parameter portion is assumed, otherwise * ssid in the fixed portion is ignored */ - __le16 channel_list[1]; /* list of chanspecs */ + union { + __le16 padding; /* Reserve space for at least 1 entry for abort + * which uses an on stack brcmf_scan_params_v2_le + */ + DECLARE_FLEX_ARRAY(__le16, channel_list); /* chanspecs */ + }; }; struct brcmf_scan_results { @@ -702,7 +707,7 @@ struct brcmf_sta_info_le { struct brcmf_chanspec_list { __le32 count; /* # of entries */ - __le32 element[1]; /* variable length uint32 list */ + __le32 element[]; /* variable length uint32 list */ }; /* -- cgit From 069b292b4b1f9c24674ea69533cd14f7492ead80 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 18 Sep 2023 13:29:01 +0300 Subject: MAINTAINERS: Add myself into x86 platform driver maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hans has been asking for another person to help as the maintainer of the x86 platform driver because Mark has not been able to find time to do that. I got asked for the task and have been reviewing the relevant patches for a while now but lets make it more official by adding the MAINTAINERS entries. Signed-off-by: Ilpo Järvinen Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230918102901.17669-2-ilpo.jarvinen@linux.intel.com Acked-by: Hans de Goede Signed-off-by: Hans de Goede --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..b04cbcec521f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13617,6 +13617,7 @@ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT M: Hans de Goede +M: Ilpo Järvinen M: Mark Gross M: Vadim Pasternak L: platform-driver-x86@vger.kernel.org @@ -14211,6 +14212,7 @@ F: drivers/platform/surface/surface_gpe.c MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT M: Hans de Goede +M: Ilpo Järvinen M: Mark Gross M: Maximilian Luz L: platform-driver-x86@vger.kernel.org @@ -23424,6 +23426,7 @@ F: drivers/platform/x86/x86-android-tablets/ X86 PLATFORM DRIVERS M: Hans de Goede +M: Ilpo Järvinen M: Mark Gross L: platform-driver-x86@vger.kernel.org S: Maintained -- cgit From 52954b750958dcab9e44935f0c32643279091c85 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 11 Sep 2023 20:00:28 +0200 Subject: gfs2: Fix another freeze/thaw hang On a thawed filesystem, the freeze glock is held in shared mode. In order to initiate a cluster-wide freeze, the node initiating the freeze drops the freeze glock and grabs it in exclusive mode. The other nodes recognize this as contention on the freeze glock; function freeze_go_callback is invoked. This indicates to them that they must freeze the filesystem locally, drop the freeze glock, and then re-acquire it in shared mode before being able to unfreeze the filesystem locally. While a node is trying to re-acquire the freeze glock in shared mode, additional contention can occur. In that case, the node must behave in the same way as above. Unfortunately, freeze_go_callback() contains a check that causes it to bail out when the freeze glock isn't held in shared mode. Fix that to allow the glock to be unlocked or held in shared mode. In addition, update a reference to trylock_super() which has been renamed to super_trylock_shared() in the meantime. Fixes: b77b4a4815a9 ("gfs2: Rework freeze / thaw logic") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d26759a98b10..f41ca89d216b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -567,15 +567,16 @@ static void freeze_go_callback(struct gfs2_glock *gl, bool remote) struct super_block *sb = sdp->sd_vfs; if (!remote || - gl->gl_state != LM_ST_SHARED || + (gl->gl_state != LM_ST_SHARED && + gl->gl_state != LM_ST_UNLOCKED) || gl->gl_demote_state != LM_ST_UNLOCKED) return; /* * Try to get an active super block reference to prevent racing with - * unmount (see trylock_super()). But note that unmount isn't the only - * place where a write lock on s_umount is taken, and we can fail here - * because of things like remount as well. + * unmount (see super_trylock_shared()). But note that unmount isn't + * the only place where a write lock on s_umount is taken, and we can + * fail here because of things like remount as well. */ if (down_read_trylock(&sb->s_umount)) { atomic_inc(&sb->s_active); -- cgit From 62862485a4c3a52029fc30f4bdde9af04afdafc9 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 12 Sep 2023 08:05:51 -0500 Subject: gfs2: fix glock shrinker ref issues Before this patch, function gfs2_scan_glock_lru would only try to free glocks that had a reference count of 0. But if the reference count ever got to 0, the glock should have already been freed. Shrinker function gfs2_dispose_glock_lru checks whether glocks on the LRU are demote_ok, and if so, tries to demote them. But that's only possible if the reference count is at least 1. This patch changes gfs2_scan_glock_lru so it will try to demote and/or dispose of glocks that have a reference count of 1 and which are either demotable, or are already unlocked. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9cbf8d98489a..4a280be229a6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2010,7 +2010,9 @@ static long gfs2_scan_glock_lru(int nr) if (!test_bit(GLF_LOCK, &gl->gl_flags)) { if (!spin_trylock(&gl->gl_lockref.lock)) continue; - if (!gl->gl_lockref.count) { + if (gl->gl_lockref.count <= 1 && + (gl->gl_state == LM_ST_UNLOCKED || + demote_ok(gl))) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; -- cgit From fb95d536080e6c1db099f0023f59cd55adcc5d87 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 18 Sep 2023 08:13:41 -0500 Subject: gfs2: Fix quota=quiet oversight Patch eef46ab713f7 introduced a new gfs2 quota=quiet mount option. Checks for the new option were added to quota.c, but a check in gfs2_quota_lock_check() was overlooked. This patch adds the missing check. Fixes: eef46ab713f7 ("gfs2: Introduce new quota=quiet mount option") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 21ada332d555..1429945215a0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -50,7 +50,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON && + sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) return 0; ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) -- cgit From 6c667ef6e2c88523469a6a94493b441cac2970d9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Sep 2023 15:59:04 +0300 Subject: HID: steelseries: Fix signedness bug in steelseries_headset_arctis_1_fetch_battery() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hid_hw_raw_request() function returns negative error codes or the number bytes transferred.  The problem is that when it returns negative error codes and we check if "ret < sizeof(arctis_1_battery_request)", then the negative values are type promoted from int to high unsigned long values and treated as success. This was detected using Smatch: drivers/hid/hid-steelseries.c:393 steelseries_headset_arctis_1_fetch_battery() warn: error code type promoted to positive: 'ret' Fixes: a0c76896c3fb ("HID: steelseries: Add support for Arctis 1 XBox") Signed-off-by: Dan Carpenter Reviewed-by: Bastien Nocera Signed-off-by: Jiri Kosina --- drivers/hid/hid-steelseries.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c index 43d2cf7153d7..b3edadf42d6d 100644 --- a/drivers/hid/hid-steelseries.c +++ b/drivers/hid/hid-steelseries.c @@ -390,7 +390,7 @@ static int steelseries_headset_arctis_1_fetch_battery(struct hid_device *hdev) ret = hid_hw_raw_request(hdev, arctis_1_battery_request[0], write_buf, sizeof(arctis_1_battery_request), HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); - if (ret < sizeof(arctis_1_battery_request)) { + if (ret < (int)sizeof(arctis_1_battery_request)) { hid_err(hdev, "hid_hw_raw_request() failed with %d\n", ret); ret = -ENODATA; } -- cgit From 2d866603e25b1ce7e536839f62d1faae1c03d92f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 28 Aug 2023 00:24:38 +0200 Subject: HID: logitech-hidpp: Add Bluetooth ID for the Logitech M720 Triathlon mouse Using hidpp for the M720 adds battery info reporting and hires scrolling support. Signed-off-by: Hans de Goede Signed-off-by: Bastien Nocera Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 05f5b5f588a2..ff077df0babf 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4677,6 +4677,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, + { /* M720 Triathlon mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb015) }, { /* MX Ergo trackball over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, -- cgit From 058574879853260a22bbec1f94221dfc5149d85c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Sep 2023 17:05:17 -0700 Subject: HID: nvidia-shield: add LEDS_CLASS dependency The hid-nvidia-shield driver uses functions that are built only when LEDS_CLASS is set, so make the driver depend on that symbol to prevent build errors. riscv32-linux-ld: drivers/hid/hid-nvidia-shield.o: in function `.L11': hid-nvidia-shield.c:(.text+0x192): undefined reference to `led_classdev_unregister' riscv32-linux-ld: drivers/hid/hid-nvidia-shield.o: in function `.L113': hid-nvidia-shield.c:(.text+0xfa4): undefined reference to `led_classdev_register_ext' Fixes: 09308562d4af ("HID: nvidia-shield: Initial driver implementation with Thunderstrike support") Signed-off-by: Randy Dunlap Cc: Rahul Rameshbabu Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 0cea301cc9a9..c72459d24237 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -799,6 +799,7 @@ config HID_NVIDIA_SHIELD tristate "NVIDIA SHIELD devices" depends on USB_HID depends on BT_HIDP + depends on LEDS_CLASS help Support for NVIDIA SHIELD accessories. -- cgit From ffe3b7837a2bb421df84d0177481db9f52c93a71 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 18 Sep 2023 10:40:59 +0800 Subject: HID: holtek: fix slab-out-of-bounds Write in holtek_kbd_input_event There is a slab-out-of-bounds Write bug in hid-holtek-kbd driver. The problem is the driver assumes the device must have an input but some malicious devices violate this assumption. Fix this by checking hid_device's input is non-empty before its usage. Signed-off-by: Ma Ke Signed-off-by: Jiri Kosina --- drivers/hid/hid-holtek-kbd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 403506b9697e..b346d68a06f5 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -130,6 +130,10 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, return -ENODEV; boot_hid = usb_get_intfdata(boot_interface); + if (list_empty(&boot_hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } boot_hid_input = list_first_entry(&boot_hid->inputs, struct hid_input, list); -- cgit From 8ed99af4a266a3492d773b5d85c3f8e9f81254b6 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Fri, 15 Sep 2023 22:27:54 +0000 Subject: selftests/user_events: Fix to unmount tracefs when test created mount Fix to unmount tracefs if the self-test mounted it to allow testing. If tracefs was already mounted, this does nothing. Suggested-by: Mark Brown Link: https://lore.kernel.org/all/29fce076-746c-4650-8358-b4e0fa215cf7@sirena.org.uk/ Fixes: a06023a8f78d ("selftests/user_events: Fix failures when user_events is not installed") Signed-off-by: Beau Belgrave Reviewed-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/user_events/abi_test.c | 4 +++- tools/testing/selftests/user_events/dyn_test.c | 5 ++++- tools/testing/selftests/user_events/ftrace_test.c | 5 ++++- tools/testing/selftests/user_events/perf_test.c | 5 ++++- .../selftests/user_events/user_events_selftests.h | 24 +++++++++++++++++----- 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 22374d29ffdd..8202f1327c39 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -91,16 +91,18 @@ static int reg_disable(long *enable, int bit) FIXTURE(user) { long check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); change_event(false); self->check = 0; } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); } TEST_F(user, enablement) { diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index 32c827a52d7d..a85980190bea 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -144,13 +144,16 @@ do { \ FIXTURE(user) { int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + wait_for_delete(); } diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 6a260caeeddc..dcd7509fe2e0 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -204,10 +204,11 @@ FIXTURE(user) { int data_fd; int enable_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); self->status_fd = open(status_file, O_RDONLY); ASSERT_NE(-1, self->status_fd); @@ -219,6 +220,8 @@ FIXTURE_SETUP(user) { } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->status_fd); close(self->data_fd); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index f893398cda05..5288e768b207 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -111,16 +111,19 @@ static int clear(int *check) FIXTURE(user) { int data_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); self->data_fd = open(data_file, O_RDWR); ASSERT_NE(-1, self->data_fd); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->data_fd); if (clear(&self->check) != 0) diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h index 690378942f82..e1c3c063c031 100644 --- a/tools/testing/selftests/user_events/user_events_selftests.h +++ b/tools/testing/selftests/user_events/user_events_selftests.h @@ -11,13 +11,19 @@ #include "../kselftest.h" -static inline bool tracefs_enabled(char **message, bool *fail) +static inline void tracefs_unmount(void) +{ + umount("/sys/kernel/tracing"); +} + +static inline bool tracefs_enabled(char **message, bool *fail, bool *umount) { struct stat buf; int ret; *message = ""; *fail = false; + *umount = false; /* Ensure tracefs is installed */ ret = stat("/sys/kernel/tracing", &buf); @@ -37,6 +43,8 @@ static inline bool tracefs_enabled(char **message, bool *fail) return false; } + *umount = true; + ret = stat("/sys/kernel/tracing/README", &buf); } @@ -49,13 +57,14 @@ static inline bool tracefs_enabled(char **message, bool *fail) return true; } -static inline bool user_events_enabled(char **message, bool *fail) +static inline bool user_events_enabled(char **message, bool *fail, bool *umount) { struct stat buf; int ret; *message = ""; *fail = false; + *umount = false; if (getuid() != 0) { *message = "Must be run as root"; @@ -63,7 +72,7 @@ static inline bool user_events_enabled(char **message, bool *fail) return false; } - if (!tracefs_enabled(message, fail)) + if (!tracefs_enabled(message, fail, umount)) return false; /* Ensure user_events is installed */ @@ -85,10 +94,10 @@ static inline bool user_events_enabled(char **message, bool *fail) return true; } -#define USER_EVENT_FIXTURE_SETUP(statement) do { \ +#define USER_EVENT_FIXTURE_SETUP(statement, umount) do { \ char *message; \ bool fail; \ - if (!user_events_enabled(&message, &fail)) { \ + if (!user_events_enabled(&message, &fail, &(umount))) { \ if (fail) { \ TH_LOG("Setup failed due to: %s", message); \ ASSERT_FALSE(fail); \ @@ -97,4 +106,9 @@ static inline bool user_events_enabled(char **message, bool *fail) } \ } while (0) +#define USER_EVENT_FIXTURE_TEARDOWN(umount) do { \ + if ((umount)) \ + tracefs_unmount(); \ +} while (0) + #endif /* _USER_EVENTS_SELFTESTS_H */ -- cgit From 10f4c9b9a33b7df000f74fa0d896351fb1a61e6a Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 18 Sep 2023 12:52:34 +0200 Subject: x86/asm: Fix build of UML with KASAN Building UML with KASAN fails since commit 69d4c0d32186 ("entry, kasan, x86: Disallow overriding mem*() functions") with the following errors: $ tools/testing/kunit/kunit.py run --kconfig_add CONFIG_KASAN=y ... ld: mm/kasan/shadow.o: in function `memset': shadow.c:(.text+0x40): multiple definition of `memset'; arch/x86/lib/memset_64.o:(.noinstr.text+0x0): first defined here ld: mm/kasan/shadow.o: in function `memmove': shadow.c:(.text+0x90): multiple definition of `memmove'; arch/x86/lib/memmove_64.o:(.noinstr.text+0x0): first defined here ld: mm/kasan/shadow.o: in function `memcpy': shadow.c:(.text+0x110): multiple definition of `memcpy'; arch/x86/lib/memcpy_64.o:(.noinstr.text+0x0): first defined here UML does not use GENERIC_ENTRY and is still supposed to be allowed to override the mem*() functions, so use weak aliases in that case. Fixes: 69d4c0d32186 ("entry, kasan, x86: Disallow overriding mem*() functions") Signed-off-by: Vincent Whitchurch Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20230918-uml-kasan-v3-1-7ad6db477df6@axis.com --- arch/x86/include/asm/linkage.h | 7 +++++++ arch/x86/lib/memcpy_64.S | 2 +- arch/x86/lib/memmove_64.S | 2 +- arch/x86/lib/memset_64.S | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5ff49fd67732..571fe4d2d232 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -105,6 +105,13 @@ CFI_POST_PADDING \ SYM_FUNC_END(__cfi_##name) +/* UML needs to be able to override memcpy() and friends for KASAN. */ +#ifdef CONFIG_UML +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK +#else +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ #define SYM_TYPED_FUNC_START(name) \ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 8f95fb267caa..76697df8dfd5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -40,7 +40,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS(memcpy, __memcpy) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0559b206fb11..ccdf3a597045 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -212,5 +212,5 @@ SYM_FUNC_START(__memmove) SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) -SYM_FUNC_ALIAS(memmove, __memmove) +SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 7c59a704c458..3d818b849ec6 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS(memset, __memset) +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) -- cgit From a6828214480e2f00a8a7e64c7a55fc42b0f54e1c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Sep 2023 17:49:35 -0400 Subject: workqueue: Removed double allocation of wq_update_pod_attrs_buf First commit 2930155b2e272 ("workqueue: Initialize unbound CPU pods later in the boot") added the initialization of wq_update_pod_attrs_buf to workqueue_init_early(), and then latter on, commit 84193c07105c6 ("workqueue: Generalize unbound CPU pods") added it as well. This appeared in a kmemleak run where the second allocation made the first allocation leak. Fixes: 84193c07105c6 ("workqueue: Generalize unbound CPU pods") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Geert Uytterhoeven Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c85825e17df8..129328b765fb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6535,9 +6535,6 @@ void __init workqueue_init_early(void) BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); - wq_update_pod_attrs_buf = alloc_workqueue_attrs(); - BUG_ON(!wq_update_pod_attrs_buf); - pt->nr_pods = 1; cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); pt->pod_node[0] = NUMA_NO_NODE; -- cgit From c1ed72171ed580fbf159e703b77685aa4b0d0df5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 12:08:27 +0200 Subject: ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CBA Like various other ASUS ExpertBook-s, the ASUS ExpertBook B1402CBA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to EdgeHigh. This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217901 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 32cfa3f4efd3..8116b55b6c98 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -439,6 +439,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + .ident = "Asus ExpertBook B1402CBA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"), + }, + }, { .ident = "Asus ExpertBook B1502CBA", .matches = { -- cgit From dd64c873ed11cdae340be06dcd2364870fd3e4fc Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 11 Sep 2023 16:27:22 +0800 Subject: workqueue: Fix missed pwq_release_worker creation in wq_cpu_intensive_thresh_init() Currently, if the wq_cpu_intensive_thresh_us is set to specific value, will cause the wq_cpu_intensive_thresh_init() early exit and missed creation of pwq_release_worker. this commit therefore create the pwq_release_worker in advance before checking the wq_cpu_intensive_thresh_us. Signed-off-by: Zqiang Signed-off-by: Tejun Heo Fixes: 967b494e2fd1 ("workqueue: Use a kthread_worker to release pool_workqueues") --- kernel/workqueue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 129328b765fb..b9f053a5a5f0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6602,13 +6602,13 @@ static void __init wq_cpu_intensive_thresh_init(void) unsigned long thresh; unsigned long bogo; + pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release"); + BUG_ON(IS_ERR(pwq_release_worker)); + /* if the user set it to a specific value, keep it */ if (wq_cpu_intensive_thresh_us != ULONG_MAX) return; - pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release"); - BUG_ON(IS_ERR(pwq_release_worker)); - /* * The default of 10ms is derived from the fact that most modern (as of * 2023) processors can do a lot in 10ms and that it's just below what -- cgit From 2dd1d862817b850787f4755c05d55e5aeb76dd08 Mon Sep 17 00:00:00 2001 From: Ahmad Khalifa Date: Mon, 18 Sep 2023 19:47:22 +0100 Subject: hwmon: (nct6775) Fix non-existent ALARM warning Skip non-existent ALARM attribute to avoid a shift-out-of-bounds dmesg warning. Reported-by: Doug Smythies Closes: https://lore.kernel.org/linux-hwmon/ZQVzdlHgWdFhOVyQ@debian.me/T/#mc69b690660eb50734a6b07506d74a119e0266f1b Fixes: b7f1f7b2523a ("hwmon: (nct6775) Additional TEMP registers for nct6799") Signed-off-by: Ahmad Khalifa Link: https://lore.kernel.org/r/20230918184722.2033225-1-ahmad@khalifa.ws Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 02a71244fc3b..b5b81bd83bb1 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -1910,6 +1910,10 @@ static umode_t nct6775_in_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct nct6775_data *data = dev_get_drvdata(dev); int in = index / 5; /* voltage index */ + int nr = index % 5; /* attribute index */ + + if (nr == 1 && data->ALARM_BITS[in] == -1) + return 0; if (!(data->have_in & BIT(in))) return 0; -- cgit From df1c357f25d808e30b216188330e708e09e1a412 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 18 Sep 2023 14:17:11 +0100 Subject: netfs: Only call folio_start_fscache() one time for each folio If a network filesystem using netfs implements a clamp_length() function, it can set subrequest lengths smaller than a page size. When we loop through the folios in netfs_rreq_unlock_folios() to set any folios to be written back, we need to make sure we only call folio_start_fscache() once for each folio. Otherwise, this simple testcase: mount -o fsc,rsize=1024,wsize=1024 127.0.0.1:/export /mnt/nfs dd if=/dev/zero of=/mnt/nfs/file.bin bs=4096 count=1 1+0 records in 1+0 records out 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.0126359 s, 324 kB/s echo 3 > /proc/sys/vm/drop_caches cat /mnt/nfs/file.bin > /dev/null will trigger an oops similar to the following: page dumped because: VM_BUG_ON_FOLIO(folio_test_private_2(folio)) ------------[ cut here ]------------ kernel BUG at include/linux/netfs.h:44! ... CPU: 5 PID: 134 Comm: kworker/u16:5 Kdump: loaded Not tainted 6.4.0-rc5 ... RIP: 0010:netfs_rreq_unlock_folios+0x68e/0x730 [netfs] ... Call Trace: netfs_rreq_assess+0x497/0x660 [netfs] netfs_subreq_terminated+0x32b/0x610 [netfs] nfs_netfs_read_completion+0x14e/0x1a0 [nfs] nfs_read_completion+0x2f9/0x330 [nfs] rpc_free_task+0x72/0xa0 [sunrpc] rpc_async_release+0x46/0x70 [sunrpc] process_one_work+0x3bd/0x710 worker_thread+0x89/0x610 kthread+0x181/0x1c0 ret_from_fork+0x29/0x50 Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers" Link: https://bugzilla.redhat.com/show_bug.cgi?id=2210612 Signed-off-by: Dave Wysochanski Reviewed-by: Jeff Layton Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230608214137.856006-1-dwysocha@redhat.com/ # v1 Link: https://lore.kernel.org/r/20230915185704.1082982-1-dwysocha@redhat.com/ # v2 Signed-off-by: Linus Torvalds --- fs/netfs/buffered_read.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 3404707ddbe7..2cd3ccf4c439 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -47,12 +47,14 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) xas_for_each(&xas, folio, last_page) { loff_t pg_end; bool pg_failed = false; + bool folio_started; if (xas_retry(&xas, folio)) continue; pg_end = folio_pos(folio) + folio_size(folio) - 1; + folio_started = false; for (;;) { loff_t sreq_end; @@ -60,8 +62,10 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) + if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { folio_start_fscache(folio); + folio_started = true; + } pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) -- cgit From 5a59f2ff30ae27bb5c3c1aa5d9e11d4d9fc003a5 Mon Sep 17 00:00:00 2001 From: Shixiong Ou Date: Wed, 6 Sep 2023 09:49:42 +0800 Subject: vfio/pds: Add missing PCI_IOV depends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If PCI_ATS isn't set, then pdev->physfn is not defined. it causes a compilation issue: ../drivers/vfio/pci/pds/vfio_dev.c:165:30: error: ‘struct pci_dev’ has no member named ‘physfn’; did you mean ‘is_physfn’? 165 | __func__, pci_dev_id(pdev->physfn), pci_id, vf_id, | ^~~~~~ So adding PCI_IOV depends to select PCI_ATS. Signed-off-by: Shixiong Ou Reviewed-by: Brett Creeley Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230906014942.1658769-1-oushixiong@kylinos.cn Fixes: 63f77a7161a2 ("vfio/pds: register with the pds_core PF") Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/pds/Kconfig b/drivers/vfio/pci/pds/Kconfig index 407b3fd32733..6eceef7b028a 100644 --- a/drivers/vfio/pci/pds/Kconfig +++ b/drivers/vfio/pci/pds/Kconfig @@ -3,7 +3,7 @@ config PDS_VFIO_PCI tristate "VFIO support for PDS PCI devices" - depends on PDS_CORE + depends on PDS_CORE && PCI_IOV select VFIO_PCI_CORE help This provides generic PCI support for PDS devices using the VFIO -- cgit From 27004f89b0a2479eceb77885337c2a7b0fdafbc4 Mon Sep 17 00:00:00 2001 From: Shixiong Ou Date: Thu, 14 Sep 2023 10:13:32 +0800 Subject: vfio/pds: Use proper PF device access helper The pci_physfn() helper exists to support cases where the physfn field may not be compiled into the pci_dev structure. We've declared this driver dependent on PCI_IOV to avoid this problem, but regardless we should follow the precedent not to access this field directly. Signed-off-by: Shixiong Ou Reviewed-by: Brett Creeley Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230914021332.1929155-1-oushixiong@kylinos.cn Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/vfio_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index b46174f5eb09..649b18ee394b 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -162,7 +162,7 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pci_id = PCI_DEVID(pdev->bus->number, pdev->devfn); dev_dbg(&pdev->dev, "%s: PF %#04x VF %#04x vf_id %d domain %d pds_vfio %p\n", - __func__, pci_dev_id(pdev->physfn), pci_id, vf_id, + __func__, pci_dev_id(pci_physfn(pdev)), pci_id, vf_id, pci_domain_nr(pdev->bus), pds_vfio); return 0; -- cgit From e599ed7866cd804ca15de7a92f7f629944cc278d Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Thu, 14 Sep 2023 17:15:08 +0800 Subject: block: correct stale comment in rq_qos_wait The rq_qos_wait calls common wake-up function rq_qos_wake_function to get token. Just replace stale wbt_wake_function with rq_qos_wake_function in comment. Signed-off-by: Kemeng Shi Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230914091508.36232-1-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 167be74df4ee..dd7310c94713 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -270,7 +270,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, finish_wait(&rqw->wait, &data.wq); /* - * We raced with wbt_wake_function() getting a token, + * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ -- cgit From 35d30c9cf12730a1e37053dfde4007c7cc452d1a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2023 15:57:39 -0700 Subject: iomap: don't skip reading in !uptodate folios when unsharing a range Prior to commit a01b8f225248e, we would always read in the contents of a !uptodate folio prior to writing userspace data into the folio, allocated a folio state object, etc. Ritesh introduced an optimization that skips all of that if the write would cover the entire folio. Unfortunately, the optimization misses the unshare case, where we always have to read in the folio contents since there isn't a data buffer supplied by userspace. This can result in stale kernel memory exposure if userspace issues a FALLOC_FL_UNSHARE_RANGE call on part of a shared file that isn't already cached. This was caught by observing fstests regressions in the "unshare around" mechanism that is used for unaligned writes to a reflinked realtime volume when the realtime extent size is larger than 1FSB, though I think it applies to any shared file. Cc: ritesh.list@gmail.com, willy@infradead.org Fixes: a01b8f225248e ("iomap: Allocate ifs in ->write_begin() early") Signed-off-by: Darrick J. Wong Reviewed-by: Ritesh Harjani (IBM) --- fs/iomap/buffered-io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ae8673ce08b1..0350830fc989 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -640,11 +640,13 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, size_t poff, plen; /* - * If the write completely overlaps the current folio, then + * If the write or zeroing completely overlaps the current folio, then * entire folio will be dirtied so there is no need for * per-block state tracking structures to be attached to this folio. + * For the unshare case, we must read in the ondisk contents because we + * are not changing pagecache contents. */ - if (pos <= folio_pos(folio) && + if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && pos + len >= folio_pos(folio) + folio_size(folio)) return 0; -- cgit From f939aeea7ab7d96cd321e7ac107f5a070836b66f Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Fri, 15 Sep 2023 16:27:28 +0800 Subject: erofs: allow empty device tags in flatdev mode Device tags aren't actually required in flatdev mode, thus fix mount failure due to empty device tags in flatdev mode. Signed-off-by: Jingbo Xu Fixes: 8b465fecc35a ("erofs: support flattened block device for multi-blob images") Reviewed-by: Jia Zhu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20230915082728.56588-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 44a24d573f1f..3700af9ee173 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -235,7 +235,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(ptr); dis = ptr + erofs_blkoff(sb, *pos); - if (!dif->path) { + if (!sbi->devs->flatdev && !dif->path) { if (!dis->tag[0]) { erofs_err(sb, "empty device tag @ pos %llu", *pos); return -EINVAL; -- cgit From 37510dd566bdbff31a769cde2fa6654bccdb8b24 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 24 Aug 2023 17:34:21 +0200 Subject: xen: simplify evtchn_do_upcall() call maze There are several functions involved for performing the functionality of evtchn_do_upcall(): - __xen_evtchn_do_upcall() doing the real work - xen_hvm_evtchn_do_upcall() just being a wrapper for __xen_evtchn_do_upcall(), exposed for external callers - xen_evtchn_do_upcall() calling __xen_evtchn_do_upcall(), too, but without any user Simplify this maze by: - removing the unused xen_evtchn_do_upcall() - removing xen_hvm_evtchn_do_upcall() as the only left caller of __xen_evtchn_do_upcall(), while renaming __xen_evtchn_do_upcall() to xen_evtchn_do_upcall() Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Thomas Gleixner Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_hvm.c | 2 +- drivers/xen/events/events_base.c | 21 ++------------------- drivers/xen/platform-pci.c | 2 +- include/xen/events.h | 3 +-- 7 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d59765aef22..c392e18f1e43 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -207,7 +207,7 @@ static void xen_power_off(void) static irqreturn_t xen_arm_callback(int irq, void *arg) { - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); return IRQ_HANDLED; } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b33..93c60c0c9d4a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -294,7 +294,7 @@ static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b8db2148c07d..0337392a3121 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(hypercall_page); * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. - * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. + * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9a192f51f1b0..3f8c34707c50 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -136,7 +136,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3bdd5b59661d..0bb86e6c4d0a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1704,7 +1704,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) generic_handle_irq(irq); } -static int __xen_evtchn_do_upcall(void) +int xen_evtchn_do_upcall(void) { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; @@ -1735,24 +1735,7 @@ static int __xen_evtchn_do_upcall(void) return ret; } - -void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - - __xen_evtchn_do_upcall(); - - irq_exit(); - set_irq_regs(old_regs); -} - -int xen_hvm_evtchn_do_upcall(void) -{ - return __xen_evtchn_do_upcall(); -} -EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); +EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index fcc819131572..544d3f9010b9 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -64,7 +64,7 @@ static uint64_t get_callback_via(struct pci_dev *pdev) static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) { - return xen_hvm_evtchn_do_upcall(); + return xen_evtchn_do_upcall(); } static int xen_allocate_irq(struct pci_dev *pdev) diff --git a/include/xen/events.h b/include/xen/events.h index 95d5e28de324..23932b0673dc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -105,8 +105,7 @@ int irq_from_virq(unsigned int cpu, unsigned int virq); evtchn_port_t evtchn_from_irq(unsigned irq); int xen_set_callback_via(uint64_t via); -void xen_evtchn_do_upcall(struct pt_regs *regs); -int xen_hvm_evtchn_do_upcall(void); +int xen_evtchn_do_upcall(void); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, -- cgit From 361239fd1448d64faa4adba5bbf100401c0a606e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:26 +0200 Subject: arm/xen: remove lazy mode related definitions include/xen/arm/hypervisor.h contains definitions related to paravirt lazy mode, which are used nowhere in the code. All paravirt lazy mode related users are in x86 code, so remove the definitions on Arm side. Signed-off-by: Juergen Gross Acked-by: Stefano Stabellini Link: https://lore.kernel.org/r/20230913113828.18421-2-jgross@suse.com Signed-off-by: Juergen Gross --- include/xen/arm/hypervisor.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 43ef24dd030e..9995695204f5 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -7,18 +7,6 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - -static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - return PARAVIRT_LAZY_NONE; -} - #ifdef CONFIG_XEN void __init xen_early_init(void); #else -- cgit From a4a7644c15096f57f92252dd6e1046bf269c87d8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:27 +0200 Subject: x86/xen: move paravirt lazy code Only Xen is using the paravirt lazy mode code, so it can be moved to Xen specific sources. This allows to make some of the functions static or to merge them into their only call sites. While at it do a rename from "paravirt" to "xen" for all moved specifiers. No functional change. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-3-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/paravirt_types.h | 15 -------- arch/x86/include/asm/xen/hypervisor.h | 26 ++++++++++++++ arch/x86/kernel/paravirt.c | 67 ----------------------------------- arch/x86/xen/enlighten_pv.c | 39 ++++++++++++++++---- arch/x86/xen/mmu_pv.c | 55 ++++++++++++++++++---------- arch/x86/xen/multicalls.h | 4 +-- include/trace/events/xen.h | 12 +++---- 7 files changed, 102 insertions(+), 116 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4acbcddddc29..772d03487520 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -9,13 +9,6 @@ struct paravirt_patch_site { u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; - -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; #endif #ifdef CONFIG_PARAVIRT @@ -549,14 +542,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); - -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); -void paravirt_flush_lazy_mmu(void); - void _paravirt_nop(void); void paravirt_BUG(void); unsigned long paravirt_ret0(void); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 5fc35f889cd1..ed05ce3df5c7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -36,6 +36,7 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include #include #define XEN_SIGNATURE "XenVMMXenVMM" @@ -63,4 +64,29 @@ void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); #endif +/* Lazy mode for batching updates / context switch */ +enum xen_lazy_mode { + XEN_LAZY_NONE, + XEN_LAZY_MMU, + XEN_LAZY_CPU, +}; + +DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); + +static inline void enter_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + + this_cpu_write(xen_lazy_mode, mode); +} + +static inline void leave_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != mode); + + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); +} + +enum xen_lazy_mode xen_get_lazy_mode(void); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 975f98d5eee5..97f1436c1a20 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -143,66 +143,7 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } -static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; - -static inline void enter_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - - this_cpu_write(paravirt_lazy_mode, mode); -} - -static void leave_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); - - this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); -} - -void paravirt_enter_lazy_mmu(void) -{ - enter_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_leave_lazy_mmu(void) -{ - leave_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_flush_lazy_mmu(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - #ifdef CONFIG_PARAVIRT_XXL -void paravirt_start_context_switch(struct task_struct *prev) -{ - BUG_ON(preemptible()); - - if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); - } - enter_lazy(PARAVIRT_LAZY_CPU); -} - -void paravirt_end_context_switch(struct task_struct *next) -{ - BUG_ON(preemptible()); - - leave_lazy(PARAVIRT_LAZY_CPU); - - if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) - arch_enter_lazy_mmu_mode(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -229,14 +170,6 @@ static noinstr void pv_native_safe_halt(void) } #endif -enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - if (in_interrupt()) - return PARAVIRT_LAZY_NONE; - - return this_cpu_read(paravirt_lazy_mode); -} - struct pv_info pv_info = { .name = "bare hardware", #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 49352fad7d1d..54b83825c4b6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -101,6 +101,16 @@ struct tls_descs { struct desc_struct desc[3]; }; +DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; + +enum xen_lazy_mode xen_get_lazy_mode(void) +{ + if (in_interrupt()) + return XEN_LAZY_NONE; + + return this_cpu_read(xen_lazy_mode); +} + /* * Updating the 3 TLS descriptors in the GDT on every task switch is * surprisingly expensive so we avoid updating them if they haven't @@ -362,10 +372,25 @@ static noinstr unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } +static void xen_start_context_switch(struct task_struct *prev) +{ + BUG_ON(preemptible()); + + if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); + } + enter_lazy(XEN_LAZY_CPU); +} + static void xen_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + xen_mc_flush(); - paravirt_end_context_switch(next); + leave_lazy(XEN_LAZY_CPU); + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } static unsigned long xen_store_tr(void) @@ -472,7 +497,7 @@ static void xen_set_ldt(const void *addr, unsigned entries) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gdt(const struct desc_ptr *dtr) @@ -568,7 +593,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (xen_get_lazy_mode() == XEN_LAZY_CPU) loadsegment(fs, 0); xen_mc_batch(); @@ -577,7 +602,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) load_TLS_descriptor(t, cpu, 1); load_TLS_descriptor(t, cpu, 2); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gs_index(unsigned int idx) @@ -909,7 +934,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } @@ -973,7 +998,7 @@ static void xen_write_cr0(unsigned long cr0) MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_write_cr4(unsigned long cr4) @@ -1156,7 +1181,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - .start_context_switch = paravirt_start_context_switch, + .start_context_switch = xen_start_context_switch, .end_context_switch = xen_end_context_switch, }, }; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1652c39e3dfb..b6830554ff69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -236,7 +236,7 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) u.val = pmd_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -270,7 +270,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) + if (xen_get_lazy_mode() != XEN_LAZY_MMU) return false; xen_mc_batch(); @@ -279,7 +279,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) u.val = pte_val_ma(pteval); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); return true; } @@ -325,7 +325,7 @@ void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } /* Assume pteval_t is equivalent to all the other *val_t types. */ @@ -419,7 +419,7 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) u.val = pud_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -499,7 +499,7 @@ static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) __xen_set_p4d_hyper(ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -531,7 +531,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) if (user_ptr) __xen_set_p4d_hyper((p4d_t *)user_ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } #if CONFIG_PGTABLE_LEVELS >= 5 @@ -1245,7 +1245,7 @@ static noinline void xen_flush_tlb(void) op->cmd = MMUEXT_TLB_FLUSH_LOCAL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1265,7 +1265,7 @@ static void xen_flush_tlb_one_user(unsigned long addr) op->arg1.linear_addr = addr & PAGE_MASK; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1302,7 +1302,7 @@ static void xen_flush_tlb_multi(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } static unsigned long xen_read_cr3(void) @@ -1361,7 +1361,7 @@ static void xen_write_cr3(unsigned long cr3) else __xen_write_cr3(false, 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } /* @@ -1396,7 +1396,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } static int xen_pgd_alloc(struct mm_struct *mm) @@ -1557,7 +1557,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } } @@ -1587,7 +1587,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) __set_pfn_prot(pfn, PAGE_KERNEL); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); ClearPagePinned(page); } @@ -1804,7 +1804,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) */ xen_mc_batch(); __xen_write_cr3(true, __pa(init_top_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for @@ -2083,6 +2083,23 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +static void xen_enter_lazy_mmu(void) +{ + enter_lazy(XEN_LAZY_MMU); +} + +static void xen_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (xen_get_lazy_mode() == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + static void __init xen_post_allocator_init(void) { pv_ops.mmu.set_pte = xen_set_pte; @@ -2107,7 +2124,7 @@ static void xen_leave_lazy_mmu(void) { preempt_disable(); xen_mc_flush(); - paravirt_leave_lazy_mmu(); + leave_lazy(XEN_LAZY_MMU); preempt_enable(); } @@ -2166,9 +2183,9 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .exit_mmap = xen_exit_mmap, .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, + .enter = xen_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, - .flush = paravirt_flush_lazy_mmu, + .flush = xen_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, @@ -2385,7 +2402,7 @@ static noinline void xen_flush_tlb_all(void) op->cmd = MMUEXT_TLB_FLUSH_ALL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 1c51b2c87f30..c3867b585e0d 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -26,7 +26,7 @@ static inline void xen_mc_batch(void) /* need to disable interrupts until this entry is complete */ local_irq_save(flags); - trace_xen_mc_batch(paravirt_get_lazy_mode()); + trace_xen_mc_batch(xen_get_lazy_mode()); __this_cpu_write(xen_mc_irq_flags, flags); } @@ -44,7 +44,7 @@ static inline void xen_mc_issue(unsigned mode) { trace_xen_mc_issue(mode); - if ((paravirt_get_lazy_mode() & mode) == 0) + if ((xen_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 44a3f565264d..0577f0cdd231 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -6,26 +6,26 @@ #define _TRACE_XEN_H #include -#include +#include #include struct multicall_entry; /* Multicalls */ DECLARE_EVENT_CLASS(xen_mc__batch, - TP_PROTO(enum paravirt_lazy_mode mode), + TP_PROTO(enum xen_lazy_mode mode), TP_ARGS(mode), TP_STRUCT__entry( - __field(enum paravirt_lazy_mode, mode) + __field(enum xen_lazy_mode, mode) ), TP_fast_assign(__entry->mode = mode), TP_printk("start batch LAZY_%s", - (__entry->mode == PARAVIRT_LAZY_MMU) ? "MMU" : - (__entry->mode == PARAVIRT_LAZY_CPU) ? "CPU" : "NONE") + (__entry->mode == XEN_LAZY_MMU) ? "MMU" : + (__entry->mode == XEN_LAZY_CPU) ? "CPU" : "NONE") ); #define DEFINE_XEN_MC_BATCH(name) \ DEFINE_EVENT(xen_mc__batch, name, \ - TP_PROTO(enum paravirt_lazy_mode mode), \ + TP_PROTO(enum xen_lazy_mode mode), \ TP_ARGS(mode)) DEFINE_XEN_MC_BATCH(xen_mc_batch); -- cgit From 49147beb0ccbf4c5bb81a44be93ec3bc5e4a79f1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Sep 2023 13:38:28 +0200 Subject: x86/xen: allow nesting of same lazy mode When running as a paravirtualized guest under Xen, Linux is using "lazy mode" for issuing hypercalls which don't need to take immediate effect in order to improve performance (examples are e.g. multiple PTE changes). There are two different lazy modes defined: MMU and CPU lazy mode. Today it is not possible to nest multiple lazy mode sections, even if they are of the same kind. A recent change in memory management added nesting of MMU lazy mode sections, resulting in a regression when running as Xen PV guest. Technically there is no reason why nesting of multiple sections of the same kind of lazy mode shouldn't be allowed. So add support for that for fixing the regression. Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230913113828.18421-4-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypervisor.h | 15 +++++++++++++-- arch/x86/xen/enlighten_pv.c | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index ed05ce3df5c7..7048dfacc04b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -72,10 +72,18 @@ enum xen_lazy_mode { }; DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); +DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); static inline void enter_lazy(enum xen_lazy_mode mode) { - BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); + + if (mode == old_mode) { + this_cpu_inc(xen_lazy_nesting); + return; + } + + BUG_ON(old_mode != XEN_LAZY_NONE); this_cpu_write(xen_lazy_mode, mode); } @@ -84,7 +92,10 @@ static inline void leave_lazy(enum xen_lazy_mode mode) { BUG_ON(this_cpu_read(xen_lazy_mode) != mode); - this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + if (this_cpu_read(xen_lazy_nesting) == 0) + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + else + this_cpu_dec(xen_lazy_nesting); } enum xen_lazy_mode xen_get_lazy_mode(void); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 54b83825c4b6..bbbfdd495ebd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -102,6 +102,7 @@ struct tls_descs { }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; +DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { -- cgit From 0fc6ff5a0f0488e09b496773c440ed5bb36d1f0d Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 11 Sep 2023 18:59:31 +0000 Subject: xen/efi: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. `efi_loader_signature` has space for 4 bytes. We are copying "Xen" (3 bytes) plus a NUL-byte which makes 4 total bytes. With that being said, there is currently not a bug with the current `strncpy()` implementation in terms of buffer overreads but we should favor a more robust string interface either way. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer while being functionally the same in this case. Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Cc: Kees Cook Signed-off-by: Justin Stitt Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230911-strncpy-arch-x86-xen-efi-c-v1-1-96ab2bba2feb@google.com Signed-off-by: Juergen Gross --- arch/x86/xen/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 863d0d6b3edc..7250d0e0e1a9 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -138,7 +138,7 @@ void __init xen_efi_init(struct boot_params *boot_params) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + strscpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", sizeof(boot_params->efi_info.efi_loader_signature)); boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); -- cgit From 263cb0cc5abac7c22a6c0dfa7e50e89d8e6c6900 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Aug 2023 23:06:14 +0200 Subject: media: imx-mipi-csis: Remove an incorrect fwnode_handle_put() call The commit in Fixes has removed an fwnode_graph_get_endpoint_by_id() call in mipi_csis_subdev_init(). So the reference that was taken should not be released anymore in the error handling path of the probe and in the remove function. Remove the now incorrect fwnode_handle_put() calls. Fixes: 1029939b3782 ("media: v4l: async: Simplify async sub-device fwnode matching") Signed-off-by: Christophe JAILLET Reviewed-by: Laurent Pinchart Reviewed-by: Rui Miguel Silva Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/platform/nxp/imx-mipi-csis.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/media/platform/nxp/imx-mipi-csis.c b/drivers/media/platform/nxp/imx-mipi-csis.c index 16f19a640130..5f93712bf485 100644 --- a/drivers/media/platform/nxp/imx-mipi-csis.c +++ b/drivers/media/platform/nxp/imx-mipi-csis.c @@ -1490,7 +1490,6 @@ err_cleanup: v4l2_async_unregister_subdev(&csis->sd); err_disable_clock: mipi_csis_clk_disable(csis); - fwnode_handle_put(csis->sd.fwnode); return ret; } @@ -1510,7 +1509,6 @@ static void mipi_csis_remove(struct platform_device *pdev) mipi_csis_clk_disable(csis); v4l2_subdev_cleanup(&csis->sd); media_entity_cleanup(&csis->sd.entity); - fwnode_handle_put(csis->sd.fwnode); pm_runtime_set_suspended(&pdev->dev); } -- cgit From aadb0330cfb60829318ef02ccfb9dd09cd14d920 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 19 Sep 2023 10:12:05 +0300 Subject: ALSA: usb-audio: scarlett_gen2: Fix another -Wformat-truncation warning The recent enablement of -Wformat-truncation leads to a false-positive warning for mixer_scarlett_gen2.c. For suppressing the warning, replace snprintf() with scnprintf(). As stated in the above, truncation doesn't matter. Fixes: 78bd8f5126f8 ("ALSA: usb-audio: scarlett_gen2: Fix -Wformat-truncation warning") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230919071205.10684-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 5c6f50f38840..d260be8cb6bc 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -3205,8 +3205,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) /* Add input phantom controls */ if (info->inputs_per_phantom == 1) { for (i = 0; i < info->phantom_count; i++) { - snprintf(s, sizeof(s), fmt, i + 1, - "Phantom Power", "Switch"); + scnprintf(s, sizeof(s), fmt, i + 1, + "Phantom Power", "Switch"); err = scarlett2_add_new_ctl( mixer, &scarlett2_phantom_ctl, i, 1, s, &private->phantom_ctls[i]); -- cgit From 8070274b472e2e9f5f67a990f5e697634c415708 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 18 Sep 2023 00:53:28 +0800 Subject: net: stmmac: fix incorrect rxq|txq_stats reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") caused one regression as found by Uwe, the backtrace looks like: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-rc1-00449-g133466c3bbe1-dirty #21 Hardware name: STM32 (Device Tree Support) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x60/0x90 dump_stack_lvl from register_lock_class+0x98c/0x99c register_lock_class from __lock_acquire+0x74/0x293c __lock_acquire from lock_acquire+0x134/0x398 lock_acquire from stmmac_get_stats64+0x2ac/0x2fc stmmac_get_stats64 from dev_get_stats+0x44/0x130 dev_get_stats from rtnl_fill_stats+0x38/0x120 rtnl_fill_stats from rtnl_fill_ifinfo+0x834/0x17f4 rtnl_fill_ifinfo from rtmsg_ifinfo_build_skb+0xc0/0x144 rtmsg_ifinfo_build_skb from rtmsg_ifinfo+0x50/0x88 rtmsg_ifinfo from __dev_notify_flags+0xc0/0xec __dev_notify_flags from dev_change_flags+0x50/0x5c dev_change_flags from ip_auto_config+0x2f4/0x1260 ip_auto_config from do_one_initcall+0x70/0x35c do_one_initcall from kernel_init_freeable+0x2ac/0x308 kernel_init_freeable from kernel_init+0x1c/0x138 kernel_init from ret_from_fork+0x14/0x2c The reason is the rxq|txq_stats structures are not what expected because stmmac_open() -> __stmmac_open() the structure is overwritten by "memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));" This causes the well initialized syncp member of rxq|txq_stats is overwritten unexpectedly as pointed out by Johannes and Uwe. Fix this issue by moving rxq|txq_stats back to stmmac_extra_stats. For SMP cache friendly, we also mark stmmac_txq_stats and stmmac_rxq_stats as ____cacheline_aligned_in_smp. Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") Signed-off-by: Jisheng Zhang Reported-by: Uwe Kleine-König Tested-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230917165328.3403-1-jszhang@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/common.h | 7 +- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 16 +-- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 16 +-- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 16 +-- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 16 +-- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 - .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 32 +++--- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 125 +++++++++++---------- 8 files changed, 120 insertions(+), 110 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 403cb397d4d3..1e996c29043d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -70,7 +70,7 @@ struct stmmac_txq_stats { u64 tx_tso_frames; u64 tx_tso_nfrags; struct u64_stats_sync syncp; -}; +} ____cacheline_aligned_in_smp; struct stmmac_rxq_stats { u64 rx_bytes; @@ -79,7 +79,7 @@ struct stmmac_rxq_stats { u64 rx_normal_irq_n; u64 napi_poll; struct u64_stats_sync syncp; -}; +} ____cacheline_aligned_in_smp; /* Extra statistic and debug information exposed by ethtool */ struct stmmac_extra_stats { @@ -202,6 +202,9 @@ struct stmmac_extra_stats { unsigned long mtl_est_hlbf; unsigned long mtl_est_btre; unsigned long mtl_est_btrlm; + /* per queue statistics */ + struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; + struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; unsigned long rx_dropped; unsigned long rx_errors; unsigned long tx_dropped; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 01e77368eef1..465ff1fd4785 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -441,8 +441,8 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; u32 v; @@ -455,9 +455,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_TX_INT) { ret |= handle_tx; - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); } if (v & EMAC_TX_DMA_STOP_INT) @@ -479,9 +479,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_RX_INT) { ret |= handle_rx; - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); } if (v & EMAC_RX_BUF_UA_INT) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 980e5f8a37ec..9470d3fd2ded 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -171,8 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; if (dir == DMA_DIR_RX) @@ -201,15 +201,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, } /* TX/RX NORMAL interrupts */ if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } if (likely(intr_status & DMA_CHAN_STATUS_TI)) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index aaa09b16b016..7907d62d3437 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -162,8 +162,8 @@ static void show_rx_process_state(unsigned int status) int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; int ret = 0; /* read the status register (CSR5) */ u32 intr_status = readl(ioaddr + DMA_STATUS); @@ -215,16 +215,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_INTR_ENA); /* to schedule NAPI on real RIE event. */ if (likely(value & DMA_INTR_ENA_RIE)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } } if (likely(intr_status & DMA_STATUS_TI)) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } if (unlikely(intr_status & DMA_STATUS_ERI)) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index fa69d64a8694..3cde695fec91 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -337,8 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; @@ -367,15 +367,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, /* TX/RX NORMAL interrupts */ if (likely(intr_status & XGMAC_NIS)) { if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_normal_irq_n++; - u64_stats_update_end(&rx_q->rxq_stats.syncp); + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); ret |= handle_rx; } if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_normal_irq_n++; - u64_stats_update_end(&tx_q->txq_stats.syncp); + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); ret |= handle_tx; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 3401e888a9f6..cd7a9768de5f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -78,7 +78,6 @@ struct stmmac_tx_queue { dma_addr_t dma_tx_phy; dma_addr_t tx_tail_addr; u32 mss; - struct stmmac_txq_stats txq_stats; }; struct stmmac_rx_buffer { @@ -123,7 +122,6 @@ struct stmmac_rx_queue { unsigned int len; unsigned int error; } state; - struct stmmac_rxq_stats rxq_stats; }; struct stmmac_channel { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index b7ac7abecdd3..6aa5c0556d22 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -548,14 +548,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) pos = data; for (q = 0; q < tx_cnt; q++) { - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; struct stmmac_txq_stats snapshot; data = pos; do { - start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp); - snapshot = tx_q->txq_stats; - } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->syncp); + snapshot = *txq_stats; + } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n); for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { @@ -566,14 +566,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) pos = data; for (q = 0; q < rx_cnt; q++) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; struct stmmac_rxq_stats snapshot; data = pos; do { - start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp); - snapshot = rx_q->rxq_stats; - } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->syncp); + snapshot = *rxq_stats; + } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n); for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { @@ -637,14 +637,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, pos = j; for (i = 0; i < rx_queues_count; i++) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i]; struct stmmac_rxq_stats snapshot; j = pos; do { - start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp); - snapshot = rx_q->rxq_stats; - } while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->syncp); + snapshot = *rxq_stats; + } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); data[j++] += snapshot.rx_pkt_n; data[j++] += snapshot.rx_normal_irq_n; @@ -654,14 +654,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, pos = j; for (i = 0; i < tx_queues_count; i++) { - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i]; struct stmmac_txq_stats snapshot; j = pos; do { - start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp); - snapshot = tx_q->txq_stats; - } while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->syncp); + snapshot = *txq_stats; + } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); data[j++] += snapshot.tx_pkt_n; data[j++] += snapshot.tx_normal_irq_n; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2206789802bf..83c567a89a46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2426,6 +2426,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) { struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct xsk_buff_pool *pool = tx_q->xsk_pool; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc = NULL; @@ -2505,9 +2506,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); entry = tx_q->cur_tx; } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_set_ic_bit += tx_set_ic_bit; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (tx_desc) { stmmac_flush_tx_descriptors(priv, queue); @@ -2547,6 +2548,7 @@ static void stmmac_bump_dma_threshold(struct stmmac_priv *priv, u32 chan) static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; unsigned int entry, xmits = 0, count = 0; u32 tx_packets = 0, tx_errors = 0; @@ -2706,11 +2708,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (tx_q->dirty_tx != tx_q->cur_tx) stmmac_tx_timer_arm(priv, queue); - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_packets += tx_packets; - tx_q->txq_stats.tx_pkt_n += tx_packets; - tx_q->txq_stats.tx_clean++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_packets += tx_packets; + txq_stats->tx_pkt_n += tx_packets; + txq_stats->tx_clean++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); priv->xstats.tx_errors += tx_errors; @@ -4114,6 +4116,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); unsigned int first_entry, tx_packets; + struct stmmac_txq_stats *txq_stats; int tmp_pay_len = 0, first_tx; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; @@ -4124,6 +4127,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) int i; tx_q = &priv->dma_conf.tx_queue[queue]; + txq_stats = &priv->xstats.txq_stats[queue]; first_tx = tx_q->cur_tx; /* Compute header lengths */ @@ -4282,13 +4286,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_bytes += skb->len; - tx_q->txq_stats.tx_tso_frames++; - tx_q->txq_stats.tx_tso_nfrags += nfrags; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_bytes += skb->len; + txq_stats->tx_tso_frames++; + txq_stats->tx_tso_nfrags += nfrags; if (set_ic) - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4359,6 +4363,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u32 queue = skb_get_queue_mapping(skb); int nfrags = skb_shinfo(skb)->nr_frags; int gso = skb_shinfo(skb)->gso_type; + struct stmmac_txq_stats *txq_stats; struct dma_edesc *tbs_desc = NULL; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; @@ -4368,6 +4373,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t des; tx_q = &priv->dma_conf.tx_queue[queue]; + txq_stats = &priv->xstats.txq_stats[queue]; first_tx = tx_q->cur_tx; if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) @@ -4519,11 +4525,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_bytes += skb->len; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_bytes += skb->len; if (set_ic) - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4730,6 +4736,7 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv, static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, struct xdp_frame *xdpf, bool dma_map) { + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc; @@ -4789,9 +4796,9 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, unsigned long flags; tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, tx_desc); - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->tx_set_ic_bit++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); } stmmac_enable_dma_transmission(priv, priv->ioaddr); @@ -4936,7 +4943,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, struct dma_desc *p, struct dma_desc *np, struct xdp_buff *xdp) { - struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_channel *ch = &priv->channel[queue]; unsigned int len = xdp->data_end - xdp->data; enum pkt_hash_types hash_type; @@ -4966,10 +4973,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, skb_record_rx_queue(skb, queue); napi_gro_receive(&ch->rxtx_napi, skb); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_pkt_n++; - rx_q->rxq_stats.rx_bytes += len; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_pkt_n++; + rxq_stats->rx_bytes += len; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); } static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) @@ -5042,6 +5049,7 @@ static struct stmmac_xdp_buff *xsk_buff_to_stmmac_ctx(struct xdp_buff *xdp) static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) { + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; unsigned int count = 0, error = 0, len = 0; int dirty = stmmac_rx_dirty(priv, queue); @@ -5205,9 +5213,9 @@ read_again: stmmac_finalize_xdp_rx(priv, xdp_status); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_pkt_n += count; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5235,6 +5243,7 @@ read_again: static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue]; struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; unsigned int count = 0, error = 0, len = 0; @@ -5496,11 +5505,11 @@ drain_data: stmmac_rx_refill(priv, queue); - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.rx_packets += rx_packets; - rx_q->rxq_stats.rx_bytes += rx_bytes; - rx_q->rxq_stats.rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->rx_packets += rx_packets; + rxq_stats->rx_bytes += rx_bytes; + rxq_stats->rx_pkt_n += count; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5513,15 +5522,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, rx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_rx_queue *rx_q; + struct stmmac_rxq_stats *rxq_stats; u32 chan = ch->index; unsigned long flags; int work_done; - rx_q = &priv->dma_conf.rx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + rxq_stats = &priv->xstats.rxq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); work_done = stmmac_rx(priv, budget, chan); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -5540,15 +5549,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, tx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_tx_queue *tx_q; + struct stmmac_txq_stats *txq_stats; u32 chan = ch->index; unsigned long flags; int work_done; - tx_q = &priv->dma_conf.tx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats = &priv->xstats.txq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); work_done = stmmac_tx_clean(priv, budget, chan); work_done = min(work_done, budget); @@ -5570,20 +5579,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget) container_of(napi, struct stmmac_channel, rxtx_napi); struct stmmac_priv *priv = ch->priv_data; int rx_done, tx_done, rxtx_done; - struct stmmac_rx_queue *rx_q; - struct stmmac_tx_queue *tx_q; + struct stmmac_rxq_stats *rxq_stats; + struct stmmac_txq_stats *txq_stats; u32 chan = ch->index; unsigned long flags; - rx_q = &priv->dma_conf.rx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp); - rx_q->rxq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags); + rxq_stats = &priv->xstats.rxq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); + rxq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); - tx_q = &priv->dma_conf.tx_queue[chan]; - flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp); - tx_q->txq_stats.napi_poll++; - u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags); + txq_stats = &priv->xstats.txq_stats[chan]; + flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); + txq_stats->napi_poll++; + u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); tx_done = stmmac_tx_clean(priv, budget, chan); tx_done = min(tx_done, budget); @@ -6926,7 +6935,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 int q; for (q = 0; q < tx_cnt; q++) { - struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats; + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; u64 tx_packets; u64 tx_bytes; @@ -6941,7 +6950,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 } for (q = 0; q < rx_cnt; q++) { - struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats; + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; u64 rx_packets; u64 rx_bytes; @@ -7342,9 +7351,9 @@ int stmmac_dvr_probe(struct device *device, priv->dev = ndev; for (i = 0; i < MTL_MAX_RX_QUEUES; i++) - u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp); + u64_stats_init(&priv->xstats.rxq_stats[i].syncp); for (i = 0; i < MTL_MAX_TX_QUEUES; i++) - u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp); + u64_stats_init(&priv->xstats.txq_stats[i].syncp); stmmac_set_ethtool_ops(ndev); priv->pause = pause; -- cgit From cff9b2332ab762b7e0586c793c431a8f2ea4db04 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 15 Sep 2023 13:44:44 -0400 Subject: kernel/sched: Modify initial boot task idle setup Initial booting is setting the task flag to idle (PF_IDLE) by the call path sched_init() -> init_idle(). Having the task idle and calling call_rcu() in kernel/rcu/tiny.c means that TIF_NEED_RESCHED will be set. Subsequent calls to any cond_resched() will enable IRQs, potentially earlier than the IRQ setup has completed. Recent changes have caused just this scenario and IRQs have been enabled early. This causes a warning later in start_kernel() as interrupts are enabled before they are fully set up. Fix this issue by setting the PF_IDLE flag later in the boot sequence. Although the boot task was marked as idle since (at least) d80e4fda576d, I am not sure that it is wrong to do so. The forced context-switch on idle task was introduced in the tiny_rcu update, so I'm going to claim this fixes 5f6130fa52ee. Fixes: 5f6130fa52ee ("tiny_rcu: Directly force QS when call_rcu_[bh|sched]() on idle_task") Signed-off-by: Liam R. Howlett Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-mm/CAMuHMdWpvpWoDa=Ox-do92czYRvkok6_x6pYUH+ZouMcJbXy+Q@mail.gmail.com/ --- kernel/sched/core.c | 2 +- kernel/sched/idle.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..802551e0009b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9269,7 +9269,7 @@ void __init init_idle(struct task_struct *idle, int cpu) * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. */ - idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + idle->flags |= PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); #ifdef CONFIG_SMP diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 342f58a329f5..5007b25c5bc6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -373,6 +373,7 @@ EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { + current->flags |= PF_IDLE; arch_cpu_idle_prepare(); cpuhp_online_idle(state); while (1) -- cgit From a8cf700c17d9ca6cb8ee7dc5c9330dbac3948237 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:45 -0700 Subject: x86/srso: Fix srso_show_state() side effect Reading the 'spec_rstack_overflow' sysfs file can trigger an unnecessary MSR write, and possibly even a (handled) exception if the microcode hasn't been updated. Avoid all that by just checking X86_FEATURE_IBPB_BRTYPE instead, which gets set by srso_select_mitigation() if the updated microcode exists. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/27d128899cb8aee9eb2b57ddc996742b0c1d776b.1693889988.git.jpoimboe@kernel.org --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f081d26616ac..bdd3e296f72b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2717,7 +2717,7 @@ static ssize_t srso_show_state(char *buf) return sysfs_emit(buf, "%s%s\n", srso_strings[srso_mitigation], - (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); + boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode"); } static ssize_t gds_show_state(char *buf) -- cgit From 91857ae20303cc98ed36720d9868fcd604a2ee75 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:46 -0700 Subject: x86/srso: Set CPUID feature bits independently of bug or mitigation status Booting with mitigations=off incorrectly prevents the X86_FEATURE_{IBPB_BRTYPE,SBPB} CPUID bits from getting set. Also, future CPUs without X86_BUG_SRSO might still have IBPB with branch type prediction flushing, in which case SBPB should be used instead of IBPB. The current code doesn't allow for that. Also, cpu_has_ibpb_brtype_microcode() has some surprising side effects and the setting of these feature bits really doesn't belong in the mitigation code anyway. Move it to earlier. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikolay Borisov Reviewed-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/869a1709abfe13b673bdd10c2f4332ca253a40bc.1693889988.git.jpoimboe@kernel.org --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/amd.c | 28 +++++++++------------------- arch/x86/kernel/cpu/bugs.c | 13 +------------ 3 files changed, 10 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0086920cda06..a3669a7774ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -683,13 +683,11 @@ extern u16 get_llc_id(unsigned int cpu); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); -extern bool cpu_has_ibpb_brtype_microcode(void); extern void amd_clear_divider(void); extern void amd_check_microcode(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } -static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd8379d84445..afacc48e07da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -766,6 +766,15 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TOPOEXT)) smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; + + if (!cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { + if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + else if (c->x86 >= 0x19 && !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + setup_force_cpu_cap(X86_FEATURE_SBPB); + } + } } static void init_amd_k8(struct cpuinfo_x86 *c) @@ -1301,25 +1310,6 @@ void amd_check_microcode(void) on_each_cpu(zenbleed_check_cpu, NULL, 1); } -bool cpu_has_ibpb_brtype_microcode(void) -{ - switch (boot_cpu_data.x86) { - /* Zen1/2 IBPB flushes branch type predictions too. */ - case 0x17: - return boot_cpu_has(X86_FEATURE_AMD_IBPB); - case 0x19: - /* Poke the MSR bit on Zen3/4 to check its presence. */ - if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { - setup_force_cpu_cap(X86_FEATURE_SBPB); - return true; - } else { - return false; - } - default: - return false; - } -} - /* * Issue a DIV 0/1 insn to clear any division data from previous DIV * operations. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bdd3e296f72b..b0ae985aa6a4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2404,26 +2404,15 @@ early_param("spec_rstack_overflow", srso_parse_cmdline); static void __init srso_select_mitigation(void) { - bool has_microcode; + bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE); if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) goto pred_cmd; - /* - * The first check is for the kernel running as a guest in order - * for guests to verify whether IBPB is a viable mitigation. - */ - has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode(); if (!has_microcode) { pr_warn("IBPB-extending microcode not applied!\n"); pr_warn(SRSO_NOTICE); } else { - /* - * Enable the synthetic (even if in a real CPUID leaf) - * flags for guests. - */ - setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); - /* * Zen1/2 with SMT off aren't vulnerable after the right * IBPB microcode has been applied. -- cgit From 02428d0366a27c2f33bc4361eb10467777804f29 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:47 -0700 Subject: x86/srso: Don't probe microcode in a guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support live migration, the hypervisor sets the "lowest common denominator" of features. Probing the microcode isn't allowed because any detected features might go away after a migration. As Andy Cooper states: "Linux must not probe microcode when virtualised.  What it may see instantaneously on boot (owing to MSR_PRED_CMD being fully passed through) is not accurate for the lifetime of the VM." Rely on the hypervisor to set the needed IBPB_BRTYPE and SBPB bits. Fixes: 1b5277c0ea0b ("x86/srso: Add SRSO_NO support") Suggested-by: Andrew Cooper Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Andrew Cooper Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/3938a7209606c045a3f50305d201d840e8c834c7.1693889988.git.jpoimboe@kernel.org --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index afacc48e07da..03ef962a6992 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -767,7 +767,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TOPOEXT)) smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; - if (!cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); else if (c->x86 >= 0x19 && !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { -- cgit From 01b057b2f4cc2d905a0bd92195657dbd9a7005ab Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:48 -0700 Subject: x86/srso: Fix SBPB enablement for spec_rstack_overflow=off If the user has requested no SRSO mitigation, other mitigations can use the lighter-weight SBPB instead of IBPB. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/b20820c3cfd1003171135ec8d762a0b957348497.1693889988.git.jpoimboe@kernel.org --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b0ae985aa6a4..10499bcd4e39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2433,7 +2433,7 @@ static void __init srso_select_mitigation(void) switch (srso_cmd) { case SRSO_CMD_OFF: - return; + goto pred_cmd; case SRSO_CMD_MICROCODE: if (has_microcode) { -- cgit From 3914784553f68c931fc666dbe7e86fe881aada38 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 14 Sep 2023 23:08:44 +0200 Subject: i2c: i801: unregister tco_pdev in i801_probe() error path We have to unregister tco_pdev also if i2c_add_adapter() fails. Fixes: 9424693035a5 ("i2c: i801: Create iTCO device on newer Intel PCHs") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 73ae06432133..1d855258a45d 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1754,6 +1754,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) "SMBus I801 adapter at %04lx", priv->smba); err = i2c_add_adapter(&priv->adapter); if (err) { + platform_device_unregister(priv->tco_pdev); i801_acpi_remove(priv); return err; } -- cgit From cf094baa3e0f19f1f80ceaf205c80402b024386c Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 18 Sep 2023 23:02:57 -0700 Subject: s390/bpf: Let arch_prepare_bpf_trampoline return program size arch_prepare_bpf_trampoline() for s390 currently returns 0 on success. This is not a problem for regular trampoline. However, struct_ops relies on the return value to advance "image" pointer: bpf_struct_ops_map_update_elem() { ... for_each_member(i, t, member) { ... err = bpf_struct_ops_prepare_trampoline(); ... image += err; } } When arch_prepare_bpf_trampoline returns 0 on success, all members of the struct_ops will point to the same trampoline (the last one). Fix this by returning the program size in arch_prepare_bpf_trampoline (on success). This is the same behavior as other architectures. Signed-off-by: Song Liu Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()") Reviewed-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230919060258.3237176-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index de2fb12120d2..2861e3360aff 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2513,7 +2513,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, return -E2BIG; } - return ret; + return tjit.common.prg; } bool bpf_jit_supports_subprog_tailcalls(void) -- cgit From 48f5e7d3f7300ff679dc50bfb7a7451de6f29e4c Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 18 Sep 2023 23:02:58 -0700 Subject: selftests/bpf: Check bpf_cubic_acked() is called via struct_ops Test bpf_tcp_ca (in test_progs) checks multiple tcp_congestion_ops. However, there isn't a test that verifies functions in the tcp_congestion_ops is actually called. Add a check to verify that bpf_cubic_acked is actually called during the test. Suggested-by: Martin KaFai Lau Signed-off-by: Song Liu Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230919060258.3237176-3-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 2 ++ tools/testing/selftests/bpf/progs/bpf_cubic.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index a53c254c6058..4aabeaa525d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -185,6 +185,8 @@ static void test_cubic(void) do_test("bpf_cubic", NULL); + ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index d9660e7200e2..c997e3e3d3fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -490,6 +490,8 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) } } +int bpf_cubic_acked_called = 0; + void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) { @@ -497,6 +499,7 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, struct bictcp *ca = inet_csk_ca(sk); __u32 delay; + bpf_cubic_acked_called = 1; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; -- cgit From bd3caddf299a640efb66c6022efed7fe744db626 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:36 +0800 Subject: net: hns3: add cmdq check for vf periodic service task When the vf cmdq is disabled, there is no need to keep these task running. So this patch skip these task when the cmdq is disabled. Fixes: ff200099d271 ("net: hns3: remove unnecessary work in hclgevf_main") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7a2f9233d695..a4d68fb216fb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1855,7 +1855,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) unsigned long delta = round_jiffies_relative(HZ); struct hnae3_handle *handle = &hdev->nic; - if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) + if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state) || + test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) return; if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { -- cgit From f9f651261130cdcb7adc9a3e365b356bc2749ab3 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:37 +0800 Subject: net: hns3: fix GRE checksum offload issue The device_version V3 hardware can't offload the checksum for IP in GRE packets, but can do it for NvGRE. So default to disable the checksum and GSO offload for GRE, but keep the ability to enable it when only using NvGRE. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b4895c7b3efd..cf50368441b7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3353,6 +3353,15 @@ static void hns3_set_default_feature(struct net_device *netdev) NETIF_F_HW_TC); netdev->hw_enc_features |= netdev->vlan_features | NETIF_F_TSO_MANGLEID; + + /* The device_version V3 hardware can't offload the checksum for IP in + * GRE packets, but can do it for NvGRE. So default to disable the + * checksum and GSO offload for GRE. + */ + if (ae_dev->dev_version > HNAE3_DEVICE_VERSION_V2) { + netdev->features &= ~NETIF_F_GSO_GRE; + netdev->features &= ~NETIF_F_GSO_GRE_CSUM; + } } static int hns3_alloc_buffer(struct hns3_enet_ring *ring, -- cgit From f2ed304922a55690529bcca59678dd92d7466ce8 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 18 Sep 2023 15:48:38 +0800 Subject: net: hns3: only enable unicast promisc when mac table full Currently, the driver will enable unicast promisc for the function once configure mac address fail. It's unreasonable when the failure is caused by using same mac address with other functions. So only enable unicast promisc when mac table full. Fixes: c631c696823c ("net: hns3: refactor the promisc mode setting") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8ca368424436..c0d03283775f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8824,7 +8824,7 @@ static void hclge_update_overflow_flags(struct hclge_vport *vport, if (mac_type == HCLGE_MAC_ADDR_UC) { if (is_all_added) vport->overflow_promisc_flags &= ~HNAE3_OVERFLOW_UPE; - else + else if (hclge_is_umv_space_full(vport, true)) vport->overflow_promisc_flags |= HNAE3_OVERFLOW_UPE; } else { if (is_all_added) -- cgit From 1a7be66e4685b8541546222c305cce9710718a88 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 18 Sep 2023 15:48:39 +0800 Subject: net: hns3: fix fail to delete tc flower rules during reset issue Firmware does not respond driver commands during reset Therefore, rule will fail to delete while the firmware is resetting So, if failed to delete rule, set rule state to TO_DEL, and the rule will be deleted when periodic task being scheduled. Fixes: 0205ec041ec6 ("net: hns3: add support for hw tc offload of tc flower") Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c0d03283775f..2bd77871f3bf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7348,6 +7348,12 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle, ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, NULL, false); if (ret) { + /* if tcam config fail, set rule state to TO_DEL, + * so the rule will be deleted when periodic + * task being scheduled. + */ + hclge_update_fd_list(hdev, HCLGE_FD_TO_DEL, rule->location, NULL); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); spin_unlock_bh(&hdev->fd_rule_lock); return ret; } -- cgit From 0770063096d5da4a8e467b6e73c1646a75589628 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:40 +0800 Subject: net: hns3: add 5ms delay before clear firmware reset irq source Currently the reset process in hns3 and firmware watchdog init process is asynchronous. we think firmware watchdog initialization is completed before hns3 clear the firmware interrupt source. However, firmware initialization may not complete early. so we add delay before hns3 clear firmware interrupt source and 5 ms delay is enough to avoid second firmware reset interrupt. Fixes: c1a81619d73a ("net: hns3: Add mailbox interrupt handling to PF driver") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2bd77871f3bf..c42574e29747 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3564,9 +3564,14 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type, u32 regclr) { +#define HCLGE_IMP_RESET_DELAY 5 + switch (event_type) { case HCLGE_VECTOR0_EVENT_PTP: case HCLGE_VECTOR0_EVENT_RST: + if (regclr == BIT(HCLGE_VECTOR0_IMPRESET_INT_B)) + mdelay(HCLGE_IMP_RESET_DELAY); + hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr); break; case HCLGE_VECTOR0_EVENT_MBX: -- cgit From 8d533cac92181cc1b1e451f6b22311ad1881618b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 11 Sep 2023 21:09:27 +0200 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 14 ++++++++++---- arch/s390/configs/defconfig | 13 +++++++++---- arch/s390/configs/zfcpdump_defconfig | 4 ++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index af2fbe48e16c..438cd92e6080 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,23 +40,25 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_SIG=y +CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y +CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m @@ -434,6 +436,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +# CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m @@ -577,6 +580,7 @@ CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m # CONFIG_DRM_DEBUG_MODESET_LOCK is not set CONFIG_FB=y +# CONFIG_FB_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set @@ -647,6 +651,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y +CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m @@ -703,6 +708,7 @@ CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_INIT_STACK_NONE=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m @@ -825,9 +831,9 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y CONFIG_DEBUG_IRQFLAGS=y +CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 3f263b767a4c..1b8150e50f6a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,23 +38,25 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_SIG=y +CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y +CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m @@ -424,6 +426,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +# CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m @@ -566,6 +569,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m CONFIG_FB=y +# CONFIG_FB_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set @@ -632,6 +636,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y +CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -687,6 +692,7 @@ CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_INIT_STACK_NONE=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -781,7 +787,6 @@ CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y CONFIG_TEST_LOCKUP=m -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index e62fb2015102..b831083b4edd 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_CRASH_DUMP=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set @@ -15,9 +16,8 @@ CONFIG_NR_CPUS=2 CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set -CONFIG_CRASH_DUMP=y # CONFIG_PFAULT is not set -# CONFIG_S390_HYPFS_FS is not set +# CONFIG_S390_HYPFS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set -- cgit From 5c95bf274665cc9f5126e4a48a9da51114f7afd2 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 12 Sep 2023 16:47:32 +0200 Subject: s390/cert_store: fix string length handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building cert_store.o with W=1 reveals this bug: CC arch/s390/kernel/cert_store.o arch/s390/kernel/cert_store.c:443:45: warning: ‘sprintf’ may write a terminating nul past the end of the destination [-Wformat-overflow=] 443 | sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); | ^ arch/s390/kernel/cert_store.c:443:9: note: ‘sprintf’ output between 15 and 18 bytes into a destination of size 15 443 | sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); Fix this by using the correct maximum width for each integer component in both buffer length calculation and format string. Also switch to using snprintf() to guard against potential future changes to the integer range of each component. Fixes: 8cf57d7217c3 ("s390: add support for user-defined certificates") Reported-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/cert_store.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index 3986a044eb36..554447768bdd 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -432,15 +432,16 @@ static char *get_key_description(struct vcssb *vcssb, const struct vce *vce) char *desc; cs_token = vcssb->cs_token; - /* Description string contains "%64s:%04u:%08u\0". */ + /* Description string contains "%64s:%05u:%010u\0". */ name_len = sizeof(vce->vce_hdr.vc_name); - len = name_len + 1 + 4 + 1 + 8 + 1; + len = name_len + 1 + 5 + 1 + 10 + 1; desc = kmalloc(len, GFP_KERNEL); if (!desc) return NULL; memcpy(desc, vce->vce_hdr.vc_name, name_len); - sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); + snprintf(desc + name_len, len - name_len, ":%05u:%010u", + vce->vce_hdr.vc_index, cs_token); return desc; } -- cgit From 44bdb313da57322c9b3c108eb66981c6ec6509f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 09:13:51 +0000 Subject: net: bridge: use DEV_STATS_INC() syzbot/KCSAN reported data-races in br_handle_frame_finish() [1] This function can run from multiple cpus without mutual exclusion. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. Handles updates to dev->stats.tx_dropped while we are at it. [1] BUG: KCSAN: data-race in br_handle_frame_finish / br_handle_frame_finish read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 1: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 run_ksoftirqd+0x17/0x20 kernel/softirq.c:921 smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 0: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 do_softirq+0x5e/0x90 kernel/softirq.c:454 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline] _raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] batadv_tt_local_purge+0x1a8/0x1f0 net/batman-adv/translation-table.c:1356 batadv_tt_purge+0x2b/0x630 net/batman-adv/translation-table.c:3560 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000000d7190 -> 0x00000000000d7191 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 14848 Comm: kworker/u4:11 Not tainted 6.6.0-rc1-syzkaller-00236-gad8a69f361b9 #0 Fixes: 1c29fc4989bc ("[BRIDGE]: keep track of received multicast packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: bridge@lists.linux-foundation.org Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20230918091351.1356153-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/bridge/br_forward.c | 4 ++-- net/bridge/br_input.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 9d7bc8b96b53..7431f89e897b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev, skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } @@ -268,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index c34a0b0901b0..c729528b5e85 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -181,12 +181,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst && mdst->host_joined) || br_multicast_is_router(brmctx, skb)) { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } mcast_hit = true; } else { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } break; case BR_PKT_UNICAST: -- cgit From 3dc0bab23dba53f315c9a7b4a679e0a6d46f7c6e Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 19 Sep 2023 14:08:29 +0200 Subject: power: supply: core: fix use after free in uevent power_supply_uevent() which is called to emit a udev event on device deletion attempts to use the power_supply_battery_info structure, which is device-managed and has been freed before this point. Fix this by not generating all battery/charger properties when the device is about to be removed. This also avoids generating errors when trying to access the hardware in hot-unplug scenarios. ================================================================== BUG: KASAN: slab-use-after-free in power_supply_battery_info_has_prop (power_supply_core.c:872) Read of size 4 at addr 0000000062e59028 by task python3/27 Call Trace: power_supply_battery_info_has_prop (power_supply_core.c:872) power_supply_uevent (power_supply_sysfs.c:504) dev_uevent (drivers/base/core.c:2590) kobject_uevent_env (lib/kobject_uevent.c:558) kobject_uevent (lib/kobject_uevent.c:643) device_del (drivers/base/core.c:3266 drivers/base/core.c:3831) device_unregister (drivers/base/core.c:3730 drivers/base/core.c:3854) power_supply_unregister (power_supply_core.c:1608) devm_power_supply_release (power_supply_core.c:1515) release_nodes (drivers/base/devres.c:506) devres_release_group (drivers/base/devres.c:669) i2c_device_remove (drivers/i2c/i2c-core-base.c:629) device_remove (drivers/base/dd.c:570) device_release_driver_internal (drivers/base/dd.c:1274 drivers/base/dd.c:1295) device_driver_detach (drivers/base/dd.c:1332) unbind_store (drivers/base/bus.c:247) ... Allocated by task 27: devm_kmalloc (drivers/base/devres.c:119 drivers/base/devres.c:829) power_supply_get_battery_info (include/linux/device.h:316 power_supply_core.c:626) __power_supply_register (power_supply_core.c:1408) devm_power_supply_register (power_supply_core.c:1544) bq256xx_probe (bq256xx_charger.c:1539 bq256xx_charger.c:1727) bq256xx_charger i2c_device_probe (drivers/i2c/i2c-core-base.c:584) really_probe (drivers/base/dd.c:579 drivers/base/dd.c:658) __driver_probe_device (drivers/base/dd.c:800) device_driver_attach (drivers/base/dd.c:1128) bind_store (drivers/base/bus.c:273) ... Freed by task 27: kfree (mm/slab_common.c:1073) release_nodes (drivers/base/devres.c:503) devres_release_all (drivers/base/devres.c:536) device_del (drivers/base/core.c:3829) device_unregister (drivers/base/core.c:3730 drivers/base/core.c:3854) power_supply_unregister (power_supply_core.c:1608) devm_power_supply_release (power_supply_core.c:1515) release_nodes (drivers/base/devres.c:506) devres_release_group (drivers/base/devres.c:669) i2c_device_remove (drivers/i2c/i2c-core-base.c:629) device_remove (drivers/base/dd.c:570) device_release_driver_internal (drivers/base/dd.c:1274 drivers/base/dd.c:1295) device_driver_detach (drivers/base/dd.c:1332) unbind_store (drivers/base/bus.c:247) ... ================================================================== Reported-by: Vincent Whitchurch Fixes: 27a2195efa8d ("power: supply: core: auto-exposure of simple-battery data") Tested-by: Vincent Whitchurch Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 06e5b6b0e255..d483a81560ab 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -482,6 +482,13 @@ int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env) if (ret) return ret; + /* + * Kernel generates KOBJ_REMOVE uevent in device removal path, after + * resources have been freed. Exit early to avoid use-after-free. + */ + if (psy->removing) + return 0; + prop_buf = (char *)get_zeroed_page(GFP_KERNEL); if (!prop_buf) return -ENOMEM; -- cgit From e527adfb9b7d9d05a4577c116519e59a2bda4b05 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 Jan 2023 19:13:46 +0100 Subject: firmware: imx-dsp: Fix an error handling path in imx_dsp_setup_channels() If mbox_request_channel_byname() fails, the memory allocated a few lines above still need to be freed before going to the error handling path. Fixes: 046326989a18 ("firmware: imx: Save channel name for further use") Signed-off-by: Christophe JAILLET Reviewed-by: Daniel Baluta Signed-off-by: Shawn Guo --- drivers/firmware/imx/imx-dsp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/imx/imx-dsp.c b/drivers/firmware/imx/imx-dsp.c index 3dba590a2a95..508eab346fc6 100644 --- a/drivers/firmware/imx/imx-dsp.c +++ b/drivers/firmware/imx/imx-dsp.c @@ -114,6 +114,7 @@ static int imx_dsp_setup_channels(struct imx_dsp_ipc *dsp_ipc) dsp_chan->idx = i % 2; dsp_chan->ch = mbox_request_channel_byname(cl, chan_name); if (IS_ERR(dsp_chan->ch)) { + kfree(dsp_chan->name); ret = PTR_ERR(dsp_chan->ch); if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to request mbox chan %s ret %d\n", -- cgit From deff8486a40e75813f2841f533c7572489981bae Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 19 Sep 2023 09:11:53 +0100 Subject: ALSA: hda: cs35l56: Use the new RUNTIME_PM_OPS() macro Use RUNTIME_PM_OPS() instead of the old SET_RUNTIME_PM_OPS(). This means we don't need __maybe_unused on the functions. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230919081153.19793-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 87ffe8fbff99..7adc1d373d65 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -105,7 +105,7 @@ static void cs35l56_hda_playback_hook(struct device *dev, int action) } } -static int __maybe_unused cs35l56_hda_runtime_suspend(struct device *dev) +static int cs35l56_hda_runtime_suspend(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); @@ -115,7 +115,7 @@ static int __maybe_unused cs35l56_hda_runtime_suspend(struct device *dev) return cs35l56_runtime_suspend_common(&cs35l56->base); } -static int __maybe_unused cs35l56_hda_runtime_resume(struct device *dev) +static int cs35l56_hda_runtime_resume(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); int ret; @@ -1015,7 +1015,7 @@ void cs35l56_hda_remove(struct device *dev) EXPORT_SYMBOL_NS_GPL(cs35l56_hda_remove, SND_HDA_SCODEC_CS35L56); const struct dev_pm_ops cs35l56_hda_pm_ops = { - SET_RUNTIME_PM_OPS(cs35l56_hda_runtime_suspend, cs35l56_hda_runtime_resume, NULL) + RUNTIME_PM_OPS(cs35l56_hda_runtime_suspend, cs35l56_hda_runtime_resume, NULL) SYSTEM_SLEEP_PM_OPS(cs35l56_hda_system_suspend, cs35l56_hda_system_resume) LATE_SYSTEM_SLEEP_PM_OPS(cs35l56_hda_system_suspend_late, cs35l56_hda_system_resume_early) -- cgit From 381ddcd5875e496f2eae06bb65853271b7150fee Mon Sep 17 00:00:00 2001 From: Balamurugan C Date: Tue, 19 Sep 2023 17:11:35 +0800 Subject: ASoC: Intel: soc-acpi: Add entry for sof_es8336 in MTL match table. Adding support for ES83x6 codec in MTL match table. Signed-off-by: Balamurugan C Reviewed-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230919091136.1922253-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-mtl-match.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 0304246d2922..9cec1a4a6cd8 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -30,6 +30,11 @@ static const struct snd_soc_acpi_codecs mtl_rt5682_rt5682s_hp = { .codecs = {"10EC5682", "RTL5682"}, }; +static const struct snd_soc_acpi_codecs mtl_essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { { .comp_ids = &mtl_rt5682_rt5682s_hp, @@ -52,6 +57,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { .quirk_data = &mtl_rt1019p_amp, .sof_tplg_filename = "sof-mtl-rt1019-rt5682.tplg", }, + { + .comp_ids = &mtl_essx_83x6, + .drv_name = "sof-essx8336", + .sof_tplg_filename = "sof-mtl-es8336", /* the tplg suffix is added at run time */ + .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | + SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | + SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, + }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_mtl_machines); -- cgit From d1f67278d4b2de3bf544ea9bcd9f64d03584df87 Mon Sep 17 00:00:00 2001 From: Balamurugan C Date: Tue, 19 Sep 2023 17:11:36 +0800 Subject: ASoC: Intel: soc-acpi: Add entry for HDMI_In capture support in MTL match table Adding HDMI-In capture via I2S feature support in MTL platform. Signed-off-by: Balamurugan C Reviewed-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230919091136.1922253-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_es8336.c | 10 ++++++++++ sound/soc/intel/common/soc-acpi-intel-mtl-match.c | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index f8a3e8a91761..9904a9e33ccc 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -808,6 +808,16 @@ static const struct platform_device_id board_ids[] = { SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK | SOF_ES8336_JD_INVERTED), }, + { + .name = "mtl_es83x6_c1_h02", + .driver_data = (kernel_ulong_t)(SOF_ES8336_SSP_CODEC(1) | + SOF_NO_OF_HDMI_CAPTURE_SSP(2) | + SOF_HDMI_CAPTURE_1_SSP(0) | + SOF_HDMI_CAPTURE_2_SSP(2) | + SOF_SSP_HDMI_CAPTURE_PRESENT | + SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK | + SOF_ES8336_JD_INVERTED), + }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 9cec1a4a6cd8..92498d1d6c8d 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -35,6 +35,11 @@ static const struct snd_soc_acpi_codecs mtl_essx_83x6 = { .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, }; +static const struct snd_soc_acpi_codecs mtl_lt6911_hdmi = { + .num_codecs = 1, + .codecs = {"INTC10B0"} +}; + struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { { .comp_ids = &mtl_rt5682_rt5682s_hp, @@ -57,6 +62,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { .quirk_data = &mtl_rt1019p_amp, .sof_tplg_filename = "sof-mtl-rt1019-rt5682.tplg", }, + { + .comp_ids = &mtl_essx_83x6, + .drv_name = "mtl_es83x6_c1_h02", + .machine_quirk = snd_soc_acpi_codec_list, + .quirk_data = &mtl_lt6911_hdmi, + .sof_tplg_filename = "sof-mtl-es83x6-ssp1-hdmi-ssp02.tplg", + }, { .comp_ids = &mtl_essx_83x6, .drv_name = "sof-essx8336", -- cgit From b399f9706a1cbae42731cc420a46cfb9c3c6b10f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 19 Sep 2023 16:36:06 +0800 Subject: ASoC: Intel: soc-acpi: fix Dell SKU 0B34 The rule for the SoundWire tables is that the platforms with more devices need to be added first. We broke that rule with the Dell SKU 0B34, and caused the second amplifier for SKU 0AF3 to be ignored. The fix is simple, we need to move the single-amplifier entry after the two-amplifier one. Fixes: b62a1a839b48 ("ASoC: Intel: soc-acpi: add tables for Dell SKU 0B34") Closes: https://github.com/thesofproject/linux/issues/4559 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Chao Song Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230919083606.1920202-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-adl-match.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 8e995edf4c10..5103e75ac830 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -655,18 +655,18 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = { .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt1316-l2-mono-rt714-l3.tplg", }, - { - .link_mask = 0x3, /* rt1316 on link1 & rt714 on link0 */ - .links = adl_sdw_rt1316_link1_rt714_link0, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-adl-rt1316-l1-mono-rt714-l0.tplg", - }, { .link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */ .links = adl_sdw_rt1316_link12_rt714_link0, .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt1316-l12-rt714-l0.tplg", }, + { + .link_mask = 0x3, /* rt1316 on link1 & rt714 on link0 */ + .links = adl_sdw_rt1316_link1_rt714_link0, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-adl-rt1316-l1-mono-rt714-l0.tplg", + }, { .link_mask = 0x5, /* 2 active links required */ .links = adl_sdw_rt1316_link2_rt714_link0, -- cgit From fb0b8d299781be8d46b3612aa96cef28da0d93f4 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 19 Sep 2023 17:21:25 +0800 Subject: ASoC: Intel: sof_sdw: add support for SKU 0B14 One more missing SKU in the list. Closes: https://github.com/thesofproject/linux/issues/4543 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Chao Song Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230919092125.1922468-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 5a1c750e6ae6..842649501e30 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -376,6 +376,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { /* No Jack */ .driver_data = (void *)SOF_SDW_TGL_HDMI, }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B14"), + }, + /* No Jack */ + .driver_data = (void *)SOF_SDW_TGL_HDMI, + }, + { .callback = sof_sdw_quirk_cb, .matches = { -- cgit From 69cf63b6560205a390a736b88d112374655adb28 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 19 Sep 2023 01:22:57 +0000 Subject: ASoC: simple-card-utils: fixup simple_util_startup() error handling It should use "goto" instead of "return" Fixes: 5ca2ab459817 ("ASoC: simple-card-utils: Add new system-clock-fixed flag") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/202309141205.ITZeDJxV-lkp@intel.com/ Closes: https://lore.kernel.org/all/202309151840.au9Aa2W4-lkp@intel.com/ Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87v8c76jnz.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 5b18a4af022f..2588ec735dbd 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -310,7 +310,8 @@ int asoc_simple_startup(struct snd_pcm_substream *substream) if (fixed_sysclk % props->mclk_fs) { dev_err(rtd->dev, "fixed sysclk %u not divisible by mclk_fs %u\n", fixed_sysclk, props->mclk_fs); - return -EINVAL; + ret = -EINVAL; + goto codec_err; } ret = snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, fixed_rate, fixed_rate); -- cgit From 41bae58df411f9accf01ea660730649b2fab1dab Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 19 Sep 2023 05:34:18 +0000 Subject: ASoC: simple-card: fixup asoc_simple_probe() error handling asoc_simple_probe() is used for both "DT probe" (A) and "platform probe" (B). It uses "goto err" when error case, but it is not needed for "platform probe" case (B). Thus it is using "return" directly there. static int asoc_simple_probe(...) { ^ if (...) { | ... (A) if (ret < 0) | goto err; v } else { ^ ... | if (ret < 0) (B) return -Exxx; v } ... ^ if (ret < 0) (C) goto err; v ... err: (D) simple_util_clean_reference(card); return ret; } Both case are using (C) part, and it calls (D) when err case. But (D) will do nothing for (B) case. Because of these behavior, current code itself is not wrong, but is confusable, and more, static analyzing tool will warning on (B) part (should use goto err). To avoid static analyzing tool warning, this patch uses "goto err" on (B) part. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87o7hy7mlh.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 190f11366e84..274417e39e7d 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -759,10 +759,12 @@ static int asoc_simple_probe(struct platform_device *pdev) struct snd_soc_dai_link *dai_link = priv->dai_link; struct simple_dai_props *dai_props = priv->dai_props; + ret = -EINVAL; + cinfo = dev->platform_data; if (!cinfo) { dev_err(dev, "no info for asoc-simple-card\n"); - return -EINVAL; + goto err; } if (!cinfo->name || @@ -771,7 +773,7 @@ static int asoc_simple_probe(struct platform_device *pdev) !cinfo->platform || !cinfo->cpu_dai.name) { dev_err(dev, "insufficient asoc_simple_card_info settings\n"); - return -EINVAL; + goto err; } cpus = dai_link->cpus; -- cgit From 41b07476da38ac2878a14e5b8fe0312c41ea36e3 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 19 Sep 2023 16:27:16 +0800 Subject: ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support New platform SSID:0x231f. 0x17 was only speaker pin, DAC assigned will be 0x03. Headphone assigned to 0x02. Playback via headphone will get EQ filter processing. So, it needs to swap DAC. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 883a7e865bc5..751783f3a15c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10577,6 +10577,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x90170110}, {0x19, 0x03a11030}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, + {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */ + {0x19, 0x04a11040}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110}, -- cgit From 9ef4dc24eb85f9e95ef183d09ae622b8efc34783 Mon Sep 17 00:00:00 2001 From: Woody Zhang Date: Tue, 5 Sep 2023 19:41:00 +0800 Subject: mfd: cs42l43: Fix MFD_CS42L43 dependency on REGMAP_IRQ MFD_CS42L43 depends on REGMAP_IRQ as it uses devm_regmap_add_irq_chip. Otherwise the build may fail with following log. AR drivers/built-in.a AR built-in.a AR vmlinux.a LD vmlinux.o OBJCOPY modules.builtin.modinfo GEN modules.builtin MODPOST Module.symvers ERROR: modpost: "devm_regmap_add_irq_chip" [drivers/mfd/cs42l43.ko] undefined! Fixes: ace6d1448138 ("mfd: cs42l43: Add support for cs42l43 core driver") Signed-off-by: Woody Zhang Acked-by: Charles Keepax Link: https://lore.kernel.org/r/tencent_E54C7156F583E15FFE320E576E3389C39A08@qq.com Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index aea95745c73f..90ce58fd629e 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -241,6 +241,7 @@ config MFD_CS42L43 tristate select MFD_CORE select REGMAP + select REGMAP_IRQ config MFD_CS42L43_I2C tristate "Cirrus Logic CS42L43 (I2C)" -- cgit From 9dc1664fab2246bc2c3e9bf2cf21518a857f9b5b Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 18 Sep 2023 16:07:24 +0200 Subject: leds: Drop BUG_ON check for LED_COLOR_ID_MULTI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c3f853184bed ("leds: Fix BUG_ON check for LED_COLOR_ID_MULTI that is always false") fixed a no-op BUG_ON. This turned out to cause a regression, since some in-tree device-tree files already use LED_COLOR_ID_MULTI. Drop the BUG_ON altogether. Fixes: c3f853184bed ("leds: Fix BUG_ON check for LED_COLOR_ID_MULTI that is always false") Reported-by: Da Xue Closes: https://lore.kernel.org/linux-leds/ZQLelWcNjjp2xndY@duo.ucw.cz/T/ Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20230918140724.18634-1-kabel@kernel.org Signed-off-by: Lee Jones --- drivers/leds/led-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 04f9ea675f2c..214ed81eb0e9 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -479,10 +479,6 @@ int led_compose_name(struct device *dev, struct led_init_data *init_data, led_parse_fwnode_props(dev, fwnode, &props); - /* We want to label LEDs that can produce full range of colors - * as RGB, not multicolor */ - BUG_ON(props.color == LED_COLOR_ID_MULTI); - if (props.label) { /* * If init_data.devicename is NULL, then it indicates that -- cgit From 492032760127251e5540a5716a70996bacf2a3fd Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 18 Sep 2023 20:30:11 +0800 Subject: team: fix null-ptr-deref when team device type is changed Get a null-ptr-deref bug as follows with reproducer [1]. BUG: kernel NULL pointer dereference, address: 0000000000000228 ... RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q] ... Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x150 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? vlan_dev_hard_header+0x35/0x140 [8021q] ? vlan_dev_hard_header+0x8e/0x140 [8021q] neigh_connected_output+0xb2/0x100 ip6_finish_output2+0x1cb/0x520 ? nf_hook_slow+0x43/0xc0 ? ip6_mtu+0x46/0x80 ip6_finish_output+0x2a/0xb0 mld_sendpack+0x18f/0x250 mld_ifc_work+0x39/0x160 process_one_work+0x1e6/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [1] $ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}' $ ip link add name t-dummy type dummy $ ip link add link t-dummy name t-dummy.100 type vlan id 100 $ ip link add name t-nlmon type nlmon $ ip link set t-nlmon master team0 $ ip link set t-nlmon nomaster $ ip link set t-dummy up $ ip link set team0 up $ ip link set t-dummy.100 down $ ip link set t-dummy.100 master team0 When enslave a vlan device to team device and team device type is changed from non-ether to ether, header_ops of team device is changed to vlan_header_ops. That is incorrect and will trigger null-ptr-deref for vlan->real_dev in vlan_dev_hard_header() because team device is not a vlan device. Cache eth_header_ops in team_setup(), then assign cached header_ops to header_ops of team net device when its type is changed from non-ether to ether to fix the bug. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Ziyang Xuan Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/team/team.c | 10 +++++++++- include/linux/if_team.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8b94580194e..508d9a392ab1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2115,7 +2115,12 @@ static const struct ethtool_ops team_ethtool_ops = { static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { - dev->header_ops = port_dev->header_ops; + struct team *team = netdev_priv(dev); + + if (port_dev->type == ARPHRD_ETHER) + dev->header_ops = team->header_ops_cache; + else + dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; @@ -2162,8 +2167,11 @@ static int team_dev_type_check_change(struct net_device *dev, static void team_setup(struct net_device *dev) { + struct team *team = netdev_priv(dev); + ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + team->header_ops_cache = dev->header_ops; dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* -- cgit From e3603ccf4a35fdf433ee7b60bb7cfb598f19c8fa Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Sep 2023 11:03:24 -0500 Subject: smb3: Add dynamic trace points for RDMA (smbdirect) reconnect smb3_smbd_connect_done and smb3_smbd_connect_err To improve debugging of RDMA issues add those two. We already had dynamic tracepoints for non-RDMA connect done and error cases. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 9 ++++++--- fs/smb/client/trace.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 2a2aec8c6112..94df9eec3d8d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1401,10 +1401,13 @@ create_conn: server->smbd_conn = smbd_get_connection( server, (struct sockaddr *) &server->dstaddr); - if (server->smbd_conn) + if (server->smbd_conn) { cifs_dbg(VFS, "RDMA transport re-established\n"); - - return server->smbd_conn ? 0 : -ENOENT; + trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr); + return 0; + } + trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr); + return -ENOENT; } static void destroy_caches_and_workqueue(struct smbd_connection *info) diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index a7e4755bed0f..de199ec9f726 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -935,6 +935,8 @@ DEFINE_EVENT(smb3_connect_class, smb3_##name, \ TP_ARGS(hostname, conn_id, addr)) DEFINE_SMB3_CONNECT_EVENT(connect_done); +DEFINE_SMB3_CONNECT_EVENT(smbd_connect_done); +DEFINE_SMB3_CONNECT_EVENT(smbd_connect_err); DECLARE_EVENT_CLASS(smb3_connect_err_class, TP_PROTO(char *hostname, __u64 conn_id, -- cgit From a5f31a5028d1e88e97c3b6cdc3e3bf2da085e232 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2023 15:57:40 -0700 Subject: iomap: convert iomap_unshare_iter to use large folios Convert iomap_unshare_iter to create large folios if possible, since the write and zeroing paths already do that. I think this got missed in the conversion of the write paths that landed in 6.6-rc1. Cc: ritesh.list@gmail.com, willy@infradead.org Signed-off-by: Darrick J. Wong Reviewed-by: Ritesh Harjani (IBM) --- fs/iomap/buffered-io.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 0350830fc989..644479ccefbd 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1263,7 +1263,6 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; loff_t length = iomap_length(iter); - long status = 0; loff_t written = 0; /* don't bother with blocks that are not shared to start with */ @@ -1274,28 +1273,33 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) return length; do { - unsigned long offset = offset_in_page(pos); - unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length); struct folio *folio; + int status; + size_t offset; + size_t bytes = min_t(u64, SIZE_MAX, length); status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) return status; - if (iter->iomap.flags & IOMAP_F_STALE) + if (iomap->flags & IOMAP_F_STALE) break; - status = iomap_write_end(iter, pos, bytes, bytes, folio); - if (WARN_ON_ONCE(status == 0)) + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) + bytes = folio_size(folio) - offset; + + bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + if (WARN_ON_ONCE(bytes == 0)) return -EIO; cond_resched(); - pos += status; - written += status; - length -= status; + pos += bytes; + written += bytes; + length -= bytes; balance_dirty_pages_ratelimited(iter->inode->i_mapping); - } while (length); + } while (length > 0); return written; } -- cgit From 8dbe33956d96c9d066ef15ca933ede30748198b2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 14 Aug 2023 19:12:47 +0300 Subject: efi/unaccepted: Make sure unaccepted table is mapped Unaccepted table is now allocated from EFI_ACPI_RECLAIM_MEMORY. It translates into E820_TYPE_ACPI, which is not added to memblock and therefore not mapped in the direct mapping. This causes a crash on the first touch of the table. Use memblock_add() to make sure that the table is mapped in direct mapping. Align the range to the nearest page borders. Ranges smaller than page size are not mapped. Fixes: e7761d827e99 ("efi/unaccepted: Use ACPI reclaim memory for unaccepted memory table") Reported-by: Hongyu Ning Signed-off-by: Kirill A. Shutemov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1599f1176842..ce20a60676f0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -623,6 +623,34 @@ static __init int match_config_table(const efi_guid_t *guid, return 0; } +/** + * reserve_unaccepted - Map and reserve unaccepted configuration table + * @unaccepted: Pointer to unaccepted memory table + * + * memblock_add() makes sure that the table is mapped in direct mapping. During + * normal boot it happens automatically because the table is allocated from + * usable memory. But during crashkernel boot only memory specifically reserved + * for crash scenario is mapped. memblock_add() forces the table to be mapped + * in crashkernel case. + * + * Align the range to the nearest page borders. Ranges smaller than page size + * are not going to be mapped. + * + * memblock_reserve() makes sure that future allocations will not touch the + * table. + */ + +static __init void reserve_unaccepted(struct efi_unaccepted_memory *unaccepted) +{ + phys_addr_t start, size; + + start = PAGE_ALIGN_DOWN(efi.unaccepted); + size = PAGE_ALIGN(sizeof(*unaccepted) + unaccepted->size); + + memblock_add(start, size); + memblock_reserve(start, size); +} + int __init efi_config_parse_tables(const efi_config_table_t *config_tables, int count, const efi_config_table_type_t *arch_tables) @@ -751,11 +779,9 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, unaccepted = early_memremap(efi.unaccepted, sizeof(*unaccepted)); if (unaccepted) { - unsigned long size; if (unaccepted->version == 1) { - size = sizeof(*unaccepted) + unaccepted->size; - memblock_reserve(efi.unaccepted, size); + reserve_unaccepted(unaccepted); } else { efi.unaccepted = EFI_INVALID_TABLE_ADDR; } -- cgit From 331955600ddf55a2c6d92a00f95b0865f1c74fc3 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Fri, 8 Sep 2023 13:36:53 -0700 Subject: x86/shstk: Handle vfork clone failure correctly Shadow stacks are allocated automatically and freed on exit, depending on the clone flags. The two cases where new shadow stacks are not allocated are !CLONE_VM (fork()) and CLONE_VFORK (vfork()). For !CLONE_VM, although a new stack is not allocated, it can be freed normally because it will happen in the child's copy of the VM. However, for CLONE_VFORK the parent and the child are actually using the same shadow stack. So the kernel doesn't need to allocate *or* free a shadow stack for a CLONE_VFORK child. CLONE_VFORK children already need special tracking to avoid returning to userspace until the child exits or execs. Shadow stack uses this same tracking to avoid freeing CLONE_VFORK shadow stacks. However, the tracking is not setup until the clone has succeeded (internally). Which means, if a CLONE_VFORK fails, the existing logic will not know it is a CLONE_VFORK and proceed to unmap the parents shadow stack. This error handling cleanup logic runs via exit_thread() in the bad_fork_cleanup_thread label in copy_process(). The issue was seen in the glibc test "posix/tst-spawn3-pidfd" while running with shadow stack using currently out-of-tree glibc patches. Fix it by not unmapping the vfork shadow stack in the error case as well. Since clone is implemented in core code, it is not ideal to pass the clone flags along the error path in order to have shadow stack code have symmetric logic in the freeing half of the thread shadow stack handling. Instead use the existing state for thread shadow stacks to track whether the thread is managing its own shadow stack. For CLONE_VFORK, simply set shstk->base and shstk->size to 0, and have it mean the thread is not managing a shadow stack and so should skip cleanup work. Implement this by breaking up the CLONE_VFORK and !CLONE_VM cases in shstk_alloc_thread_stack() to separate conditionals since, the logic is now different between them. In the case of CLONE_VFORK && !CLONE_VM, the existing behavior is to not clean up the shadow stack in the child (which should go away quickly with either be exit or exec), so maintain that behavior by handling the CLONE_VFORK case first in the allocation path. This new logioc cleanly handles the case of normal, successful CLONE_VFORK's skipping cleaning up their shadow stack's on exit as well. So remove the existing, vfork shadow stack freeing logic. This is in deactivate_mm() where vfork_done is used to tell if it is a vfork child that can skip cleaning up the thread shadow stack. Fixes: b2926a36b97a ("x86/shstk: Handle thread shadow stack") Reported-by: H.J. Lu Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Tested-by: H.J. Lu Link: https://lore.kernel.org/all/20230908203655.543765-2-rick.p.edgecombe%40intel.com --- arch/x86/include/asm/mmu_context.h | 3 +-- arch/x86/kernel/shstk.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 416901d406f8..8dac45a2c7fc 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -186,8 +186,7 @@ do { \ #else #define deactivate_mm(tsk, mm) \ do { \ - if (!tsk->vfork_done) \ - shstk_free(tsk); \ + shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index fd689921a1db..ad63252ebebc 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -205,10 +205,21 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long cl return 0; /* - * For CLONE_VM, except vfork, the child needs a separate shadow + * For CLONE_VFORK the child will share the parents shadow stack. + * Make sure to clear the internal tracking of the thread shadow + * stack so the freeing logic run for child knows to leave it alone. + */ + if (clone_flags & CLONE_VFORK) { + shstk->base = 0; + shstk->size = 0; + return 0; + } + + /* + * For !CLONE_VM the child will use a copy of the parents shadow * stack. */ - if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) + if (!(clone_flags & CLONE_VM)) return 0; size = adjust_shstk_size(stack_size); @@ -408,6 +419,13 @@ void shstk_free(struct task_struct *tsk) if (!tsk->mm || tsk->mm != current->mm) return; + /* + * If shstk->base is NULL, then this task is not managing its + * own shadow stack (CLONE_VFORK). So skip freeing it. + */ + if (!shstk->base) + return; + unmap_shadow_stack(shstk->base, shstk->size); } -- cgit From 748c90c693363d05c6b2f3915edc7999a2f71837 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Fri, 8 Sep 2023 13:36:54 -0700 Subject: x86/shstk: Remove useless clone error handling When clone fails after the shadow stack is allocated, any allocated shadow stack is cleaned up in exit_thread() in copy_process(). So the logic in copy_thread() is unneeded, and also will not handle failures that happen outside of copy_thread(). In addition, since there is a second attempt to unmap the same shadow stack, there is a race where an newly mapped region could get unmapped. So remove the logic in copy_thread() and rely on exit_thread() to handle clone failure. Fixes: b2926a36b97a ("x86/shstk: Handle thread shadow stack") Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Tested-by: H.J. Lu Link: https://lore.kernel.org/all/20230908203655.543765-3-rick.p.edgecombe%40intel.com --- arch/x86/kernel/process.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9f0909142a0a..b6f4e8399fca 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -257,13 +257,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) io_bitmap_share(p); - /* - * If copy_thread() if failing, don't leak the shadow stack possibly - * allocated in shstk_alloc_thread_stack() above. - */ - if (ret) - shstk_free(p); - return ret; } -- cgit From 509ff51ee652c41a277c2b439aea01a8f56a27b9 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Fri, 8 Sep 2023 13:36:55 -0700 Subject: x86/shstk: Add warning for shadow stack double unmap There are several ways a thread's shadow stacks can get unmapped. This can happen on exit or exec, as well as error handling in exec or clone. The task struct already keeps track of the thread's shadow stack. Use the size variable to keep track of if the shadow stack has already been freed. When an attempt to double unmap the thread shadow stack is caught, warn about it and abort the operation. Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Tested-by: H.J. Lu Link: https://lore.kernel.org/all/20230908203655.543765-4-rick.p.edgecombe%40intel.com --- arch/x86/kernel/shstk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index ad63252ebebc..59e15dd8d0f8 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -426,7 +426,18 @@ void shstk_free(struct task_struct *tsk) if (!shstk->base) return; + /* + * shstk->base is NULL for CLONE_VFORK child tasks, and so is + * normal. But size = 0 on a shstk->base is not normal and + * indicated an attempt to free the thread shadow stack twice. + * Warn about it. + */ + if (WARN_ON(!shstk->size)) + return; + unmap_shadow_stack(shstk->base, shstk->size); + + shstk->size = 0; } static int wrss_control(bool enable) -- cgit From 2da338ff752a2789470d733111a5241f30026675 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Sep 2023 11:35:53 -0500 Subject: smb3: do not start laundromat thread when dir leases disabled When no directory lease support, or for IPC shares where directories can not be opened, do not start an unneeded laundromat thread for that mount (it wastes resources). Fixes: d14de8067e3f ("cifs: Add a laundromat thread for cached directories") Reviewed-by: Paulo Alcantara (SUSE) Acked-by: Tom Talpey Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 6 ++++++ fs/smb/client/cifsglob.h | 2 +- fs/smb/client/cifsproto.h | 2 +- fs/smb/client/connect.c | 8 ++++++-- fs/smb/client/misc.c | 14 +++++++++----- fs/smb/client/smb2pdu.c | 2 +- 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index b17f067e4ada..e2be8aedb26e 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -452,6 +452,9 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) struct cached_fid *cfid, *q; LIST_HEAD(entry); + if (cfids == NULL) + return; + spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { list_move(&cfid->entry, &entry); @@ -651,6 +654,9 @@ void free_cached_dirs(struct cached_fids *cfids) struct cached_fid *cfid, *q; LIST_HEAD(entry); + if (cfids == NULL) + return; + if (cfids->laundromat) { kthread_stop(cfids->laundromat); cfids->laundromat = NULL; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 032d8716f671..f594fcc0e889 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1943,7 +1943,7 @@ require use of the stronger protocol */ * cifsInodeInfo->lock_sem cifsInodeInfo->llist cifs_init_once * ->can_cache_brlcks * cifsInodeInfo->deferred_lock cifsInodeInfo->deferred_closes cifsInodeInfo_alloc - * cached_fid->fid_mutex cifs_tcon->crfid tconInfoAlloc + * cached_fid->fid_mutex cifs_tcon->crfid tcon_info_alloc * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 7d8035846680..0c37eefa18a5 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -512,7 +512,7 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); extern struct cifs_ses *sesInfoAlloc(void); extern void sesInfoFree(struct cifs_ses *); -extern struct cifs_tcon *tconInfoAlloc(void); +extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled); extern void tconInfoFree(struct cifs_tcon *); extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 687754791bf0..3902e90dca6b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1882,7 +1882,8 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) } } - tcon = tconInfoAlloc(); + /* no need to setup directory caching on IPC share, so pass in false */ + tcon = tcon_info_alloc(false); if (tcon == NULL) return -ENOMEM; @@ -2492,7 +2493,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) goto out_fail; } - tcon = tconInfoAlloc(); + if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) + tcon = tcon_info_alloc(true); + else + tcon = tcon_info_alloc(false); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 366b755ca913..35b176457bbe 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -113,18 +113,22 @@ sesInfoFree(struct cifs_ses *buf_to_free) } struct cifs_tcon * -tconInfoAlloc(void) +tcon_info_alloc(bool dir_leases_enabled) { struct cifs_tcon *ret_buf; ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL); if (!ret_buf) return NULL; - ret_buf->cfids = init_cached_dirs(); - if (!ret_buf->cfids) { - kfree(ret_buf); - return NULL; + + if (dir_leases_enabled == true) { + ret_buf->cfids = init_cached_dirs(); + if (!ret_buf->cfids) { + kfree(ret_buf); + return NULL; + } } + /* else ret_buf->cfids is already set to NULL above */ atomic_inc(&tconInfoAllocCount); ret_buf->status = TID_NEW; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 44d4943e9c56..405ea324f28d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3878,7 +3878,7 @@ void smb2_reconnect_server(struct work_struct *work) goto done; /* allocate a dummy tcon struct used for reconnect */ - tcon = tconInfoAlloc(); + tcon = tcon_info_alloc(false); if (!tcon) { resched = true; list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { -- cgit From 2409205acd3c7c877f3d0080cac6a5feb3358f83 Mon Sep 17 00:00:00 2001 From: Yann Sionneau Date: Mon, 11 Sep 2023 16:07:49 +0200 Subject: i2c: designware: fix __i2c_dw_disable() in case master is holding SCL low The DesignWare IP can be synthesized with the IC_EMPTYFIFO_HOLD_MASTER_EN parameter. In this case, when the TX FIFO gets empty and the last command didn't have the STOP bit (IC_DATA_CMD[9]), the controller will hold SCL low until a new command is pushed into the TX FIFO or the transfer is aborted. When the controller is holding SCL low, it cannot be disabled. The transfer must first be aborted. Also, the bus recovery won't work because SCL is held low by the master. Check if the master is holding SCL low in __i2c_dw_disable() before trying to disable the controller. If SCL is held low, an abort is initiated. When the abort is done, then proceed with disabling the controller. This whole situation can happen for instance during SMBus read data block if the slave just responds with "byte count == 0". This puts the driver in an unrecoverable state, because the controller is holding SCL low and the current __i2c_dw_disable() procedure is not working. In this situation only a SoC reset can fix the i2c bus. Co-developed-by: Jonathan Borne Signed-off-by: Jonathan Borne Signed-off-by: Yann Sionneau Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-common.c | 17 +++++++++++++++++ drivers/i2c/busses/i2c-designware-core.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index cdd8c67d9129..affcfb243f0f 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -441,8 +441,25 @@ err_release_lock: void __i2c_dw_disable(struct dw_i2c_dev *dev) { + unsigned int raw_intr_stats; + unsigned int enable; int timeout = 100; + bool abort_needed; unsigned int status; + int ret; + + regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &raw_intr_stats); + regmap_read(dev->map, DW_IC_ENABLE, &enable); + + abort_needed = raw_intr_stats & DW_IC_INTR_MST_ON_HOLD; + if (abort_needed) { + regmap_write(dev->map, DW_IC_ENABLE, enable | DW_IC_ENABLE_ABORT); + ret = regmap_read_poll_timeout(dev->map, DW_IC_ENABLE, enable, + !(enable & DW_IC_ENABLE_ABORT), 10, + 100); + if (ret) + dev_err(dev->dev, "timeout while trying to abort current transfer\n"); + } do { __i2c_dw_disable_nowait(dev); diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index cf4f684f5356..a7f6f3eafad7 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -98,6 +98,7 @@ #define DW_IC_INTR_START_DET BIT(10) #define DW_IC_INTR_GEN_CALL BIT(11) #define DW_IC_INTR_RESTART_DET BIT(12) +#define DW_IC_INTR_MST_ON_HOLD BIT(13) #define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \ DW_IC_INTR_TX_ABRT | \ @@ -108,6 +109,8 @@ DW_IC_INTR_RX_UNDER | \ DW_IC_INTR_RD_REQ) +#define DW_IC_ENABLE_ABORT BIT(1) + #define DW_IC_STATUS_ACTIVITY BIT(0) #define DW_IC_STATUS_TFE BIT(2) #define DW_IC_STATUS_RFNE BIT(3) -- cgit From e72590fa56b73b99885b17c74017b6cd4355bf66 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Sep 2023 11:38:50 +0200 Subject: sh: mm: re-add lost __ref to ioremap_prot() to fix modpost warning When __ioremap_caller() was replaced by ioremap_prot(), the __ref annotation added in commit af1415314a4190b8 ("sh: Flag __ioremap_caller() __init_refok.") was removed, causing a modpost warning: WARNING: modpost: vmlinux: section mismatch in reference: ioremap_prot+0x88 (section: .text) -> ioremap_fixed (section: .init.text) ioremap_prot() calls ioremap_fixed() (which is marked __init), but only before mem_init_done becomes true, so this is safe. Hence fix this by re-adding the lost __ref. Link: https://lkml.kernel.org/r/20230911093850.1517389-1-geert+renesas@glider.be Fixes: 0453c9a78015cb22 ("sh: mm: convert to GENERIC_IOREMAP") Signed-off-by: Geert Uytterhoeven Reviewed-by: Baoquan He Reviewed-by: John Paul Adrian Glaubitz Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/sh/mm/ioremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index c33b3daa4ad1..33d20f34560f 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -72,8 +72,8 @@ __ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) #define __ioremap_29bit(offset, size, prot) NULL #endif /* CONFIG_29BIT */ -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) +void __iomem __ref *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { void __iomem *mapped; pgprot_t pgprot = __pgprot(prot); -- cgit From 7b086755fb8cdbb6b3e45a1bbddc00e7f9b1dc03 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 11 Sep 2023 14:11:08 -0400 Subject: mm: page_alloc: fix CMA and HIGHATOMIC landing on the wrong buddy list Commit 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock") bypasses the pcplist on lock contention and returns the page directly to the buddy list of the page's migratetype. For pages that don't have their own pcplist, such as CMA and HIGHATOMIC, the migratetype is temporarily updated such that the page can hitch a ride on the MOVABLE pcplist. Their true type is later reassessed when flushing in free_pcppages_bulk(). However, when lock contention is detected after the type was already overridden, the bypass will then put the page on the wrong buddy list. Once on the MOVABLE buddy list, the page becomes eligible for fallbacks and even stealing. In the case of HIGHATOMIC, otherwise ineligible allocations can dip into the highatomic reserves. In the case of CMA, the page can be lost from the CMA region permanently. Use a separate pcpmigratetype variable for the pcplist override. Use the original migratetype when going directly to the buddy. This fixes the bug and should make the intentions more obvious in the code. Originally sent here to address the HIGHATOMIC case: https://lore.kernel.org/lkml/20230821183733.106619-4-hannes@cmpxchg.org/ Changelog updated in response to the CMA-specific bug report. [mgorman@techsingularity.net: updated changelog] Link: https://lkml.kernel.org/r/20230911181108.GA104295@cmpxchg.org Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock") Signed-off-by: Johannes Weiner Reported-by: Joe Liu Reviewed-by: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0c5be12f9336..95546f376302 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2400,7 +2400,7 @@ void free_unref_page(struct page *page, unsigned int order) struct per_cpu_pages *pcp; struct zone *zone; unsigned long pfn = page_to_pfn(page); - int migratetype; + int migratetype, pcpmigratetype; if (!free_unref_page_prepare(page, pfn, order)) return; @@ -2408,24 +2408,24 @@ void free_unref_page(struct page *page, unsigned int order) /* * We only track unmovable, reclaimable and movable on pcp lists. * Place ISOLATE pages on the isolated list because they are being - * offlined but treat HIGHATOMIC as movable pages so we can get those - * areas back if necessary. Otherwise, we may have to free + * offlined but treat HIGHATOMIC and CMA as movable pages so we can + * get those areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ - migratetype = get_pcppage_migratetype(page); + migratetype = pcpmigratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { if (unlikely(is_migrate_isolate(migratetype))) { free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); return; } - migratetype = MIGRATE_MOVABLE; + pcpmigratetype = MIGRATE_MOVABLE; } zone = page_zone(page); pcp_trylock_prepare(UP_flags); pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { - free_unref_page_commit(zone, pcp, page, migratetype, order); + free_unref_page_commit(zone, pcp, page, pcpmigratetype, order); pcp_spin_unlock(pcp); } else { free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); -- cgit From 4653e5dd04cb869526477a76b87d0aa1a5c65101 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Sep 2023 14:24:06 -0600 Subject: task_work: add kerneldoc annotation for 'data' argument A previous commit changed the arguments to task_work_cancel_match(), but didn't document all of them. Link: https://lkml.kernel.org/r/93938bff-baa3-4091-85f5-784aae297a07@kernel.dk Fixes: c7aab1a7c52b ("task_work: add helper for more targeted task_work canceling") Signed-off-by: Jens Axboe Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309120307.zis3yQGe-lkp@intel.com/ Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton --- kernel/task_work.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/task_work.c b/kernel/task_work.c index 065e1ef8fc8d..95a7e1b7f1da 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -78,6 +78,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work, * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @match: match function to call + * @data: data to be passed in to match function * * RETURNS: * The found work or NULL if not found. -- cgit From c652df8a4a9d7853fa1100b244024fd6f1a9c18a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 12 Sep 2023 14:50:48 +0100 Subject: selftests: link libasan statically for tests with -fsanitize=address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dynamically linking, Address Sanitizer requires its library to be the first one to be loaded; this is apparently to ensure that every call to malloc is intercepted. If using LD_PRELOAD, those listed libraries will be loaded before the libraries listed in the program's ELF and will therefore violate this requirement, leading to the below failure and output from ASan. commit 58e2847ad2e6 ("selftests: line buffer test program's stdout") modified the kselftest runner to force line buffering by forcing the test programs to run through `stdbuf`. It turns out that stdbuf implements line buffering by injecting a library via LD_PRELOAD. Therefore selftests that use ASan started failing. Fix this by statically linking libasan in the affected test programs, using the `-static-libasan` option. Note this is already the default for Clang, but not got GCC. Test output sample for failing case: TAP version 13 1..3 # timeout set to 300 # selftests: openat2: openat2_test # ==4052==ASan runtime does not come first in initial library list; you should either link runtime to your application or manually preload it with LD_PRELOAD. not ok 1 selftests: openat2: openat2_test # exit=1 # timeout set to 300 # selftests: openat2: resolve_test # ==4070==ASan runtime does not come first in initial library list; you should either link runtime to your application or manually preload it with LD_PRELOAD. not ok 2 selftests: openat2: resolve_test # exit=1 Link: https://lkml.kernel.org/r/20230912135048.1755771-1-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Fixes: 58e2847ad2e6 ("selftests: line buffer test program's stdout") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202309121342.97e2f008-oliver.sang@intel.com Cc: David Hildenbrand Cc: Florent Revest Cc: Jérôme Glisse Cc: John Hubbard Cc: Mark Brown Cc: Peter Xu Cc: Shuah Khan Cc: Tom Rix Signed-off-by: Andrew Morton --- tools/testing/selftests/fchmodat2/Makefile | 2 +- tools/testing/selftests/openat2/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 20839f8e43f2..71ec34bf1501 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 843ba56d8e49..254d676a2689 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk -- cgit From 493d4eecf45d15bb1850832f5f5ece2556308646 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Sep 2023 09:19:10 -0700 Subject: revert "scripts/gdb/symbols: add specific ko module load command" Revert 11f956538c07 ("scripts/gdb/symbols: add specific ko module load command") due to breakage identified by Johannes Berg in [1]. Fixes: 11f956538c07 ("scripts/gdb/symbols: add specific ko module load command") Reported-by: Johannes Berg Closes: https://lkml.kernel.org/r/c44b748307a074d0c250002cdcfe209b8cce93c9.camel@sipsolutions.net [1] Cc: AngeloGioacchino Del Regno Cc: Chinwen Chang Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kuan-Ying Lee Cc: Matthias Brugger Cc: Qun-Wei Lin Signed-off-by: Andrew Morton --- scripts/gdb/linux/symbols.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 5179edd1b627..c8047f4441e6 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -111,12 +111,11 @@ lx-symbols command.""" return "{textaddr} {sections}".format( textaddr=textaddr, sections="".join(args)) - def load_module_symbols(self, module, module_file=None): + def load_module_symbols(self, module): module_name = module['name'].string() module_addr = str(module['mem'][constants.LX_MOD_TEXT]['base']).split()[0] - if not module_file: - module_file = self._get_module_file(module_name) + module_file = self._get_module_file(module_name) if not module_file and not self.module_files_updated: self._update_module_files() module_file = self._get_module_file(module_name) @@ -139,19 +138,6 @@ lx-symbols command.""" else: gdb.write("no module object found for '{0}'\n".format(module_name)) - def load_ko_symbols(self, mod_path): - self.loaded_modules = [] - module_list = modules.module_list() - - for module in module_list: - module_name = module['name'].string() - module_pattern = ".*/{0}\.ko(?:.debug)?$".format( - module_name.replace("_", r"[_\-]")) - if re.match(module_pattern, mod_path) and os.path.exists(mod_path): - self.load_module_symbols(module, mod_path) - return - raise gdb.GdbError("%s is not a valid .ko\n" % mod_path) - def load_all_symbols(self): gdb.write("loading vmlinux\n") @@ -190,11 +176,6 @@ lx-symbols command.""" self.module_files = [] self.module_files_updated = False - argv = gdb.string_to_argv(arg) - if len(argv) == 1: - self.load_ko_symbols(argv[0]) - return - self.load_all_symbols() if hasattr(gdb, 'Breakpoint'): -- cgit From 9d1be94df5acd486b157e85ffa53d344e5b17e22 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Sep 2023 14:10:12 +0300 Subject: selftests/proc: fixup proc-empty-vm test after KSM changes /proc/${pid}/smaps_rollup is not empty file even if process's address space is empty, update the test. Link: https://lkml.kernel.org/r/725e041f-e9df-4f3d-b267-d4cd2774a78d@p183 Signed-off-by: Alexey Dobriyan Cc: David Hildenbrand Cc: Stefan Roesch Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-empty-vm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index b16c13688b88..ee71ce52cb6a 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -267,6 +267,7 @@ static const char g_smaps_rollup[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" -- cgit From c80da1fb85bf50decf0cc9803fbf9b0b926268f8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2023 23:08:48 -0700 Subject: scatterlist: add missing function params to kernel-doc Describe missing function parameters to prevent kernel-doc warnings: lib/scatterlist.c:288: warning: Function parameter or member 'first_chunk' not described in '__sg_alloc_table' lib/scatterlist.c:800: warning: Function parameter or member 'flags' not described in 'sg_miter_start' Link: https://lkml.kernel.org/r/20230912060848.4673-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton --- lib/scatterlist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c65566b4dc66..68b45c82c37a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -265,7 +265,8 @@ EXPORT_SYMBOL(sg_free_table); * @table: The sg table header to use * @nents: Number of entries in sg list * @max_ents: The maximum number of entries the allocator returns per call - * @nents_first_chunk: Number of entries int the (preallocated) first + * @first_chunk: first SGL if preallocated (may be %NULL) + * @nents_first_chunk: Number of entries in the (preallocated) first * scatterlist chunk, 0 means no such preallocated chunk provided by user * @gfp_mask: GFP allocation mask * @alloc_fn: Allocator to use @@ -788,6 +789,7 @@ EXPORT_SYMBOL(__sg_page_iter_dma_next); * @miter: sg mapping iter to be started * @sgl: sg list to iterate over * @nents: number of sg entries + * @flags: sg iterator flags * * Description: * Starts mapping iterator @miter. -- cgit From 36ee98b555c00c5b360d9cd63dce490f4dac2290 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2023 23:08:38 -0700 Subject: argv_split: fix kernel-doc warnings Use proper kernel-doc notation to prevent build warnings: lib/argv_split.c:36: warning: Function parameter or member 'argv' not described in 'argv_free' lib/argv_split.c:61: warning: No description found for return value of 'argv_split' Link: https://lkml.kernel.org/r/20230912060838.3794-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton --- lib/argv_split.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/argv_split.c b/lib/argv_split.c index 1a19a0a93dc1..e28db8e3b58c 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -28,7 +28,7 @@ static int count_argc(const char *str) /** * argv_free - free an argv - * @argv - the argument vector to be freed + * @argv: the argument vector to be freed * * Frees an argv and the strings it points to. */ @@ -46,7 +46,7 @@ EXPORT_SYMBOL(argv_free); * @str: the string to be split * @argcp: returned argument count * - * Returns an array of pointers to strings which are split out from + * Returns: an array of pointers to strings which are split out from * @str. This is performed by strictly splitting on white-space; no * quote processing is performed. Multiple whitespace characters are * considered to be a single argument separator. The returned array -- cgit From 0c7752d5b1ac62c8b926c907f34073ef7e9ad42b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2023 23:08:22 -0700 Subject: pidfd: prevent a kernel-doc warning Change the comment to match the function name that the SYSCALL_DEFINE() macros generate to prevent a kernel-doc warning. kernel/pid.c:628: warning: expecting prototype for pidfd_open(). Prototype was for sys_pidfd_open() instead Link: https://lkml.kernel.org/r/20230912060822.2500-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Christian Brauner Signed-off-by: Andrew Morton --- kernel/pid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/pid.c b/kernel/pid.c index fee14a4486a3..6500ef956f2f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -609,7 +609,7 @@ int pidfd_create(struct pid *pid, unsigned int flags) } /** - * pidfd_open() - Open new pid file descriptor. + * sys_pidfd_open() - Open new pid file descriptor. * * @pid: pid for which to retrieve a pidfd * @flags: flags to pass -- cgit From 9ea9cb00a82b53ec39630eac718776d37e41b35a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Sep 2023 11:21:39 -0400 Subject: mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement Breno and Josef report a deadlock scenario from cgroup reclaim re-entering the filesystem: [ 361.546690] ====================================================== [ 361.559210] WARNING: possible circular locking dependency detected [ 361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S E [ 361.589704] ------------------------------------------------------ [ 361.602277] find/9315 is trying to acquire lock: [ 361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0 [ 361.631437] [ 361.631437] but task is already holding lock: [ 361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40 [ 362.904457] mutex_lock_nested+0x1c/0x30 [ 362.912414] __btrfs_release_delayed_node+0x68/0x4f0 [ 362.922460] btrfs_evict_inode+0x301/0x770 [ 362.982726] evict+0x17c/0x380 [ 362.988944] prune_icache_sb+0x100/0x1d0 [ 363.005559] super_cache_scan+0x1f8/0x260 [ 363.013695] do_shrink_slab+0x2a2/0x540 [ 363.021489] shrink_slab_memcg+0x237/0x3d0 [ 363.050606] shrink_slab+0xa7/0x240 [ 363.083382] shrink_node_memcgs+0x262/0x3b0 [ 363.091870] shrink_node+0x1a4/0x720 [ 363.099150] shrink_zones+0x1f6/0x5d0 [ 363.148798] do_try_to_free_pages+0x19b/0x5e0 [ 363.157633] try_to_free_mem_cgroup_pages+0x266/0x370 [ 363.190575] reclaim_high+0x16f/0x1f0 [ 363.208409] mem_cgroup_handle_over_high+0x10b/0x270 [ 363.246678] try_charge_memcg+0xaf2/0xc70 [ 363.304151] charge_memcg+0xf0/0x350 [ 363.320070] __mem_cgroup_charge+0x28/0x40 [ 363.328371] __filemap_add_folio+0x870/0xd50 [ 363.371303] filemap_add_folio+0xdd/0x310 [ 363.399696] __filemap_get_folio+0x2fc/0x7d0 [ 363.419086] pagecache_get_page+0xe/0x30 [ 363.427048] alloc_extent_buffer+0x1cd/0x6a0 [ 363.435704] read_tree_block+0x43/0xc0 [ 363.443316] read_block_for_search+0x361/0x510 [ 363.466690] btrfs_search_slot+0xc8c/0x1520 This is caused by the mem_cgroup_handle_over_high() not respecting the gfp_mask of the allocation context. We used to only call this function on resume to userspace, where no locks were held. But c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added a call from the allocation context without considering the gfp. Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Breno Leitao Reported-by: Josef Bacik Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: [5.17+] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++-- include/linux/resume_user_mode.h | 2 +- mm/memcontrol.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ab94ad4597d0..e4e24da16d2c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -920,7 +920,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1458,7 +1458,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index 285189454449..f8f3e958e9cf 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) } #endif - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(GFP_KERNEL); blkcg_maybe_throttle_current(); rseq_handle_notify_resume(NULL, regs); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a4d3282493b6..d13dde2f8b56 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2555,7 +2555,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, * Scheduled by try_charge() to be executed from the userland return path * and reclaims memory over the high limit. */ -void mem_cgroup_handle_over_high(void) +void mem_cgroup_handle_over_high(gfp_t gfp_mask) { unsigned long penalty_jiffies; unsigned long pflags; @@ -2583,7 +2583,7 @@ retry_reclaim: */ nr_reclaimed = reclaim_high(memcg, in_retry ? SWAP_CLUSTER_MAX : nr_pages, - GFP_KERNEL); + gfp_mask); /* * memory.high is breached and reclaim is unable to keep up. Throttle @@ -2819,7 +2819,7 @@ done_restock: if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && gfpflags_allow_blocking(gfp_mask)) { - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(gfp_mask); } return 0; } -- cgit From 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Thu, 14 Sep 2023 12:30:20 -0400 Subject: proc: nommu: /proc//maps: release mmap read lock The no-MMU implementation of /proc//map doesn't normally release the mmap read lock, because it uses !IS_ERR_OR_NULL(_vml) to determine whether to release the lock. Since _vml is NULL when the end of the mappings is reached, the lock is not released. Reading /proc/1/maps twice doesn't cause a hang because it only takes the read lock, which can be taken multiple times and therefore doesn't show any problem if the lock isn't released. Instead, you need to perform some operation that attempts to take the write lock after reading /proc//maps. To actually reproduce the bug, compile the following code as 'proc_maps_bug': #include #include #include int main(int argc, char *argv[]) { void *buf; sleep(1); buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); puts("mmap returned"); return 0; } Then, run: ./proc_maps_bug &; cat /proc/$!/maps; fg Without this patch, mmap() will hang and the command will never complete. This code was incorrectly adapted from the MMU implementation, which at the time released the lock in m_next() before returning the last entry. The MMU implementation has diverged further from the no-MMU version since then, so this patch brings their locking and error handling into sync, fixing the bug and hopefully avoiding similar issues in the future. Link: https://lkml.kernel.org/r/20230914163019.4050530-2-ben.wolsieffer@hefring.com Fixes: 47fecca15c09 ("fs/proc/task_nommu.c: don't use priv->task->mm") Signed-off-by: Ben Wolsieffer Acked-by: Oleg Nesterov Cc: Giulio Benetti Cc: Greg Ungerer Cc: Signed-off-by: Andrew Morton --- fs/proc/task_nommu.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index a8ac0dd8041e..bc2e843f4810 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -192,11 +192,16 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !mmget_not_zero(mm)) + if (!mm || !mmget_not_zero(mm)) { + put_task_struct(priv->task); + priv->task = NULL; return NULL; + } if (mmap_read_lock_killable(mm)) { mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; return ERR_PTR(-EINTR); } @@ -205,23 +210,21 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (vma) return vma; - mmap_read_unlock(mm); - mmput(mm); return NULL; } -static void m_stop(struct seq_file *m, void *_vml) +static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; + struct mm_struct *mm = priv->mm; - if (!IS_ERR_OR_NULL(_vml)) { - mmap_read_unlock(priv->mm); - mmput(priv->mm); - } - if (priv->task) { - put_task_struct(priv->task); - priv->task = NULL; - } + if (!priv->task) + return; + + mmap_read_unlock(mm); + mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; } static void *m_next(struct seq_file *m, void *_p, loff_t *pos) -- cgit From c8be03806738c86521dbf1e0503bc90855fb99a3 Mon Sep 17 00:00:00 2001 From: Yin Fengwei Date: Thu, 14 Sep 2023 21:47:41 +0800 Subject: filemap: add filemap_map_order0_folio() to handle order0 folio Kernel test robot reported regressions for several benchmarks [1]. The regression are related with commit: de74976eb65151a2f568e477fc2e0032df5b22b4 ("filemap: add filemap_map_folio_range()") It turned out that function filemap_map_folio_range() brings these regressions when handle folio with order0. Add filemap_map_order0_folio() to handle order0 folio. The benefit come from two perspectives: - the code size is smaller (around 126 bytes) - no loop Testing showed the regressions reported by 0day [1] all are fixed: commit 9f1f5b60e76d44fa: parent commit of de74976eb65151a2 commit fbdf9263a3d7fdbd: latest mm-unstable commit commit 7fbfe2003f84686d: this fixing patch 9f1f5b60e76d44fa fbdf9263a3d7fdbd 7fbfe2003f84686d ---------------- --------------------------- --------------------------- 3843810 -21.4% 3020268 +4.6% 4018708 stress-ng.bad-altstack.ops 64061 -21.4% 50336 +4.6% 66977 stress-ng.bad-altstack.ops_per_sec 1709026 -14.4% 1462102 +2.4% 1750757 stress-ng.fork.ops 28483 -14.4% 24368 +2.4% 29179 stress-ng.fork.ops_per_sec 3685088 -53.6% 1710976 +0.5% 3702454 stress-ng.zombie.ops 56732 -65.3% 19667 +0.7% 57107 stress-ng.zombie.ops_per_sec 61874 -12.1% 54416 +0.4% 62136 vm-scalability.median 13527663 -11.7% 11942117 -0.1% 13513946 vm-scalability.throughput 4.066e+09 -11.7% 3.59e+09 -0.1% 4.061e+09 vm-scalability.workload [1]: https://lore.kernel.org/oe-lkp/72e017b9-deb6-44fa-91d6-716ee2c39cbc@intel.com/T/#m7d2bba30f75a9cee8eab07e5809abd9b3b206c84 Link: https://lkml.kernel.org/r/20230914134741.1937654-1-fengwei.yin@intel.com Fixes: de74976eb65151a2f568e477fc2e0032df5b22b4 ("filemap: add filemap_map_folio_range()") Signed-off-by: Yin Fengwei Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202309111556.b2aa3d7a-oliver.sang@intel.com Cc: Feng Tang Cc: Huang Ying Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/filemap.c | 69 ++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 582f5317ff71..4ea4387053e8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3475,13 +3475,11 @@ skip: */ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, - unsigned long addr, unsigned int nr_pages) + unsigned long addr, unsigned int nr_pages, + unsigned int *mmap_miss) { vm_fault_t ret = 0; - struct vm_area_struct *vma = vmf->vma; - struct file *file = vma->vm_file; struct page *page = folio_page(folio, start); - unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); unsigned int count = 0; pte_t *old_ptep = vmf->pte; @@ -3489,8 +3487,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, if (PageHWPoison(page + count)) goto skip; - if (mmap_miss > 0) - mmap_miss--; + (*mmap_miss)++; /* * NOTE: If there're PTE markers, we'll leave them to be @@ -3525,7 +3522,35 @@ skip: } vmf->pte = old_ptep; - WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); + + return ret; +} + +static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, + struct folio *folio, unsigned long addr, + unsigned int *mmap_miss) +{ + vm_fault_t ret = 0; + struct page *page = &folio->page; + + if (PageHWPoison(page)) + return ret; + + (*mmap_miss)++; + + /* + * NOTE: If there're PTE markers, we'll leave them to be + * handled in the specific fault path, and it'll prohibit + * the fault-around logic. + */ + if (!pte_none(ptep_get(vmf->pte))) + return ret; + + if (vmf->address == addr) + ret = VM_FAULT_NOPAGE; + + set_pte_range(vmf, folio, page, 1, addr); + folio_ref_inc(folio); return ret; } @@ -3541,7 +3566,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, XA_STATE(xas, &mapping->i_pages, start_pgoff); struct folio *folio; vm_fault_t ret = 0; - int nr_pages = 0; + unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved; rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); @@ -3569,25 +3594,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, end = folio->index + folio_nr_pages(folio) - 1; nr_pages = min(end, end_pgoff) - xas.xa_index + 1; - /* - * NOTE: If there're PTE markers, we'll leave them to be - * handled in the specific fault path, and it'll prohibit the - * fault-around logic. - */ - if (!pte_none(ptep_get(vmf->pte))) - goto unlock; - - ret |= filemap_map_folio_range(vmf, folio, - xas.xa_index - folio->index, addr, nr_pages); + if (!folio_test_large(folio)) + ret |= filemap_map_order0_folio(vmf, + folio, addr, &mmap_miss); + else + ret |= filemap_map_folio_range(vmf, folio, + xas.xa_index - folio->index, addr, + nr_pages, &mmap_miss); -unlock: folio_unlock(folio); folio_put(folio); - folio = next_uptodate_folio(&xas, mapping, end_pgoff); - } while (folio); + } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); + + mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss); + if (mmap_miss >= mmap_miss_saved) + WRITE_ONCE(file->f_ra.mmap_miss, 0); + else + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss); + return ret; } EXPORT_SYMBOL(filemap_map_pages); -- cgit From fe4419801617514765974f3e796269bc512ad146 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Fri, 15 Sep 2023 12:00:56 -0400 Subject: proc: nommu: fix empty /proc//maps On no-MMU, /proc//maps reads as an empty file. This happens because find_vma(mm, 0) always returns NULL (assuming no vma actually contains the zero address, which is normally the case). To fix this bug and improve the maintainability in the future, this patch makes the no-MMU implementation as similar as possible to the MMU implementation. The only remaining differences are the lack of hold/release_task_mempolicy and the extra code to shoehorn the gate vma into the iterator. This has been tested on top of 6.5.3 on an STM32F746. Link: https://lkml.kernel.org/r/20230915160055.971059-2-ben.wolsieffer@hefring.com Fixes: 0c563f148043 ("proc: remove VMA rbtree use from nommu") Signed-off-by: Ben Wolsieffer Cc: Davidlohr Bueso Cc: Giulio Benetti Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Oleg Nesterov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- fs/proc/internal.h | 2 -- fs/proc/task_nommu.c | 37 ++++++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 9dda7e54b2d0..9a8f32f21ff5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -289,9 +289,7 @@ struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; -#ifdef CONFIG_MMU struct vma_iterator iter; -#endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index bc2e843f4810..7cebd397cc26 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -175,15 +175,28 @@ static int show_map(struct seq_file *m, void *_p) return nommu_vma_show(m, _p); } -static void *m_start(struct seq_file *m, loff_t *pos) +static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, + loff_t *ppos) +{ + struct vm_area_struct *vma = vma_next(&priv->iter); + + if (vma) { + *ppos = vma->vm_start; + } else { + *ppos = -1UL; + } + + return vma; +} + +static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; + unsigned long last_addr = *ppos; struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned long addr = *pos; - /* See m_next(). Zero at the start or after lseek. */ - if (addr == -1UL) + /* See proc_get_vma(). Zero at the start or after lseek. */ + if (last_addr == -1UL) return NULL; /* pin the task and mm whilst we play with them */ @@ -205,12 +218,9 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-EINTR); } - /* start the next element from addr */ - vma = find_vma(mm, addr); - if (vma) - return vma; + vma_iter_init(&priv->iter, mm, last_addr); - return NULL; + return proc_get_vma(priv, ppos); } static void m_stop(struct seq_file *m, void *v) @@ -227,12 +237,9 @@ static void m_stop(struct seq_file *m, void *v) priv->task = NULL; } -static void *m_next(struct seq_file *m, void *_p, loff_t *pos) +static void *m_next(struct seq_file *m, void *_p, loff_t *ppos) { - struct vm_area_struct *vma = _p; - - *pos = vma->vm_end; - return find_vma(vma->vm_mm, vma->vm_end); + return proc_get_vma(m->private, ppos); } static const struct seq_operations proc_pid_maps_ops = { -- cgit From 7ece3fc9b76b2d4596607fd8751f36c4e5f1f072 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 16 Sep 2023 03:14:58 +0200 Subject: drm/nouveau: fence: fix type cast warning in nouveau_fence_emit() Fix the following warning. drivers/gpu/drm/nouveau/nouveau_fence.c:210:45: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct nouveau_channel *chan @@ got struct nouveau_channel [noderef] __rcu *channel We're just about to emit the fence, there is nothing to protect against yet, hence it is safe to just cast __rcu away. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309140340.BwKXzaDx-lkp@intel.com/ Fixes: 978474dc8278 ("drm/nouveau: fence: fix undefined fence state after emit") Signed-off-by: Danilo Krummrich Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230916011501.15813-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 61d9e70da9fd..ca762ea55413 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -207,7 +207,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha int nouveau_fence_emit(struct nouveau_fence *fence) { - struct nouveau_channel *chan = fence->channel; + struct nouveau_channel *chan = unrcu_pointer(fence->channel); struct nouveau_fence_chan *fctx = chan->fence; struct nouveau_fence_priv *priv = (void*)chan->drm->fence; int ret; -- cgit From 31499b0192cea06bbfe2782f288ac5cfe3dc9167 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 16 Sep 2023 18:28:31 +0200 Subject: drm/nouveau: sched: fix leaking memory of timedout job Always stop and re-start the scheduler in order to let the scheduler free up the timedout job in case it got signaled. In case of exec jobs the job type specific callback will take care to signal all fences and tear down the channel. Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI") Signed-off-by: Danilo Krummrich Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230916162835.5719-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 19024ce21fbb..5dda94e1318c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -213,7 +213,7 @@ nouveau_exec_job_timeout(struct nouveau_job *job) nouveau_sched_entity_fini(job->entity); - return DRM_GPU_SCHED_STAT_ENODEV; + return DRM_GPU_SCHED_STAT_NOMINAL; } static struct nouveau_job_ops nouveau_exec_job_ops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 88217185e0f3..3b7ea5221226 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -375,14 +375,20 @@ nouveau_sched_run_job(struct drm_sched_job *sched_job) static enum drm_gpu_sched_stat nouveau_sched_timedout_job(struct drm_sched_job *sched_job) { + struct drm_gpu_scheduler *sched = sched_job->sched; struct nouveau_job *job = to_nouveau_job(sched_job); + enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; - NV_PRINTK(warn, job->cli, "Job timed out.\n"); + drm_sched_stop(sched, sched_job); if (job->ops->timeout) - return job->ops->timeout(job); + stat = job->ops->timeout(job); + else + NV_PRINTK(warn, job->cli, "Generic job timeout.\n"); + + drm_sched_start(sched, true); - return DRM_GPU_SCHED_STAT_ENODEV; + return stat; } static void -- cgit From e3885f71213437e7fa3e347d16b2bf59d03ae05d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Aug 2023 04:50:20 +1000 Subject: nouveau/u_memcpya: use vmemdup_user I think there are limit checks in place for most things but the new uAPI wants to not have them. Add a limit check and use the vmemdup_user helper instead. Signed-off-by: Dave Airlie Reviewed-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20230810185020.231135-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_drv.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 1fe17ff95f5e..3666a7403e47 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -189,21 +189,12 @@ u_free(void *addr) static inline void * u_memcpya(uint64_t user, unsigned int nmemb, unsigned int size) { - void *mem; - void __user *userptr = (void __force __user *)(uintptr_t)user; + void __user *userptr = u64_to_user_ptr(user); + size_t bytes; - size *= nmemb; - - mem = kvmalloc(size, GFP_KERNEL); - if (!mem) - return ERR_PTR(-ENOMEM); - - if (copy_from_user(mem, userptr, size)) { - u_free(mem); - return ERR_PTR(-EFAULT); - } - - return mem; + if (unlikely(check_mul_overflow(nmemb, size, &bytes))) + return NULL; + return vmemdup_user(userptr, bytes); } #include -- cgit From c5f9362307c685fe6a90d344bf81579578fd25d8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Sep 2023 15:59:21 +0300 Subject: nouveau/u_memcpya: fix NULL vs error pointer bug The u_memcpya() function is supposed to return error pointers on error. Returning NULL will lead to an Oops. Fixes: e3885f712134 ("nouveau/u_memcpya: use vmemdup_user") Reviewed-by: Lyude Paul Reviewed-by: Danilo Krummrich Signed-off-by: Dan Carpenter Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/10fd258b-466f-4c5b-9d48-fe61a3f21424@moroto.mountain --- drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 3666a7403e47..e73a233c6572 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -193,7 +193,7 @@ u_memcpya(uint64_t user, unsigned int nmemb, unsigned int size) size_t bytes; if (unlikely(check_mul_overflow(nmemb, size, &bytes))) - return NULL; + return ERR_PTR(-EOVERFLOW); return vmemdup_user(userptr, bytes); } -- cgit From b724a6418f1f853bcb39c8923bf14a50c7bdbd07 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 17 Sep 2023 23:38:46 +0800 Subject: bpf: Fix tr dereferencing Fix 'tr' dereferencing bug when CONFIG_BPF_JIT is turned off. When CONFIG_BPF_JIT is turned off, 'bpf_trampoline_get()' returns NULL, which is same as the cases when CONFIG_BPF_JIT is turned on. Closes: https://lore.kernel.org/r/202309131936.5Nc8eUD0-lkp@intel.com/ Fixes: f7b12b6fea00 ("bpf: verifier: refactor check_attach_btf_id()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Leon Hwang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230917153846.88732-1-hffilwlqm@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..49f8b691496c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) -- cgit From d527f51331cace562393a8038d870b3e9916686f Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 19 Sep 2023 13:38:04 -0500 Subject: cifs: Fix UAF in cifs_demultiplex_thread() There is a UAF when xfstests on cifs: BUG: KASAN: use-after-free in smb2_is_network_name_deleted+0x27/0x160 Read of size 4 at addr ffff88810103fc08 by task cifsd/923 CPU: 1 PID: 923 Comm: cifsd Not tainted 6.1.0-rc4+ #45 ... Call Trace: dump_stack_lvl+0x34/0x44 print_report+0x171/0x472 kasan_report+0xad/0x130 kasan_check_range+0x145/0x1a0 smb2_is_network_name_deleted+0x27/0x160 cifs_demultiplex_thread.cold+0x172/0x5a4 kthread+0x165/0x1a0 ret_from_fork+0x1f/0x30 Allocated by task 923: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_slab_alloc+0x54/0x60 kmem_cache_alloc+0x147/0x320 mempool_alloc+0xe1/0x260 cifs_small_buf_get+0x24/0x60 allocate_buffers+0xa1/0x1c0 cifs_demultiplex_thread+0x199/0x10d0 kthread+0x165/0x1a0 ret_from_fork+0x1f/0x30 Freed by task 921: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 ____kasan_slab_free+0x143/0x1b0 kmem_cache_free+0xe3/0x4d0 cifs_small_buf_release+0x29/0x90 SMB2_negotiate+0x8b7/0x1c60 smb2_negotiate+0x51/0x70 cifs_negotiate_protocol+0xf0/0x160 cifs_get_smb_ses+0x5fa/0x13c0 mount_get_conns+0x7a/0x750 cifs_mount+0x103/0xd00 cifs_smb3_do_mount+0x1dd/0xcb0 smb3_get_tree+0x1d5/0x300 vfs_get_tree+0x41/0xf0 path_mount+0x9b3/0xdd0 __x64_sys_mount+0x190/0x1d0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The UAF is because: mount(pid: 921) | cifsd(pid: 923) -------------------------------|------------------------------- | cifs_demultiplex_thread SMB2_negotiate | cifs_send_recv | compound_send_recv | smb_send_rqst | wait_for_response | wait_event_state [1] | | standard_receive3 | cifs_handle_standard | handle_mid | mid->resp_buf = buf; [2] | dequeue_mid [3] KILL the process [4] | resp_iov[i].iov_base = buf | free_rsp_buf [5] | | is_network_name_deleted [6] | callback 1. After send request to server, wait the response until mid->mid_state != SUBMITTED; 2. Receive response from server, and set it to mid; 3. Set the mid state to RECEIVED; 4. Kill the process, the mid state already RECEIVED, get 0; 5. Handle and release the negotiate response; 6. UAF. It can be easily reproduce with add some delay in [3] - [6]. Only sync call has the problem since async call's callback is executed in cifsd process. Add an extra state to mark the mid state to READY before wakeup the waitter, then it can get the resp safely. Fixes: ec637e3ffb6b ("[CIFS] Avoid extra large buffer allocation (and memcpy) in cifs_readpages") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/transport.c | 34 +++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index f594fcc0e889..02082621d8e0 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1807,6 +1807,7 @@ static inline bool is_retryable_error(int error) #define MID_RETRY_NEEDED 8 /* session closed while this request out */ #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 +#define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ /* Flags */ #define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index d52057a511ee..14710afdc2a3 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -35,6 +35,8 @@ void cifs_wake_up_task(struct mid_q_entry *mid) { + if (mid->mid_state == MID_RESPONSE_RECEIVED) + mid->mid_state = MID_RESPONSE_READY; wake_up_process(mid->callback_data); } @@ -87,7 +89,8 @@ static void __release_mid(struct kref *refcount) struct TCP_Server_Info *server = midEntry->server; if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) && - midEntry->mid_state == MID_RESPONSE_RECEIVED && + (midEntry->mid_state == MID_RESPONSE_RECEIVED || + midEntry->mid_state == MID_RESPONSE_READY) && server->ops->handle_cancelled_mid) server->ops->handle_cancelled_mid(midEntry, server); @@ -737,7 +740,8 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) int error; error = wait_event_state(server->response_q, - midQ->mid_state != MID_REQUEST_SUBMITTED, + midQ->mid_state != MID_REQUEST_SUBMITTED && + midQ->mid_state != MID_RESPONSE_RECEIVED, (TASK_KILLABLE|TASK_FREEZABLE_UNSAFE)); if (error < 0) return -ERESTARTSYS; @@ -890,7 +894,7 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) spin_lock(&server->mid_lock); switch (mid->mid_state) { - case MID_RESPONSE_RECEIVED: + case MID_RESPONSE_READY: spin_unlock(&server->mid_lock); return rc; case MID_RETRY_NEEDED: @@ -989,6 +993,9 @@ cifs_compound_callback(struct mid_q_entry *mid) credits.instance = server->reconnect_instance; add_credits(server, &credits, mid->optype); + + if (mid->mid_state == MID_RESPONSE_RECEIVED) + mid->mid_state = MID_RESPONSE_READY; } static void @@ -1209,7 +1216,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, send_cancel(server, &rqst[i], midQ[i]); spin_lock(&server->mid_lock); midQ[i]->mid_flags |= MID_WAIT_CANCELLED; - if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { + if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || + midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { midQ[i]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; credits[i].value = 0; @@ -1230,7 +1238,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } if (!midQ[i]->resp_buf || - midQ[i]->mid_state != MID_RESPONSE_RECEIVED) { + midQ[i]->mid_state != MID_RESPONSE_READY) { rc = -EIO; cifs_dbg(FYI, "Bad MID state?\n"); goto out; @@ -1417,7 +1425,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (rc != 0) { send_cancel(server, &rqst, midQ); spin_lock(&server->mid_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED) { + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; spin_unlock(&server->mid_lock); @@ -1434,7 +1443,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, } if (!midQ->resp_buf || !out_buf || - midQ->mid_state != MID_RESPONSE_RECEIVED) { + midQ->mid_state != MID_RESPONSE_READY) { rc = -EIO; cifs_server_dbg(VFS, "Bad MID state?\n"); goto out; @@ -1558,14 +1567,16 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, /* Wait for a reply - allow signals to interrupt. */ rc = wait_event_interruptible(server->response_q, - (!(midQ->mid_state == MID_REQUEST_SUBMITTED)) || + (!(midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED)) || ((server->tcpStatus != CifsGood) && (server->tcpStatus != CifsNew))); /* Were we interrupted by a signal ? */ spin_lock(&server->srv_lock); if ((rc == -ERESTARTSYS) && - (midQ->mid_state == MID_REQUEST_SUBMITTED) && + (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) && ((server->tcpStatus == CifsGood) || (server->tcpStatus == CifsNew))) { spin_unlock(&server->srv_lock); @@ -1596,7 +1607,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, if (rc) { send_cancel(server, &rqst, midQ); spin_lock(&server->mid_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED) { + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; spin_unlock(&server->mid_lock); @@ -1616,7 +1628,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, return rc; /* rcvd frame is ok */ - if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_RECEIVED) { + if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { rc = -EIO; cifs_tcon_dbg(VFS, "Bad MID state?\n"); goto out; -- cgit From 21155620fbf2edbb071144894ff9d67ba9a1faa0 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 18 Sep 2023 16:38:50 +0800 Subject: crypto: sm2 - Fix crash caused by uninitialized context In sm2_compute_z_digest() function, the newly allocated structure mpi_ec_ctx is used, but forget to initialize it, which will cause a crash when performing subsequent operations. Fixes: e5221fa6a355 ("KEYS: asymmetric: Move sm2 code into x509_public_key") Cc: stable@vger.kernel.org # v6.5 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/sm2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/sm2.c b/crypto/sm2.c index 285b3cb7c0bc..5ab120d74c59 100644 --- a/crypto/sm2.c +++ b/crypto/sm2.c @@ -278,10 +278,14 @@ int sm2_compute_z_digest(struct shash_desc *desc, if (!ec) return -ENOMEM; - err = __sm2_set_pub_key(ec, key, keylen); + err = sm2_ec_ctx_init(ec); if (err) goto out_free_ec; + err = __sm2_set_pub_key(ec, key, keylen); + if (err) + goto out_deinit_ec; + bits_len = SM2_DEFAULT_USERID_LEN * 8; entl[0] = bits_len >> 8; entl[1] = bits_len & 0xff; -- cgit From 68ffa230daa0d35b7cce476098433d763d5fd42f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 20 Sep 2023 14:26:28 +0800 Subject: LoongArch: Fix lockdep static memory detection Since commit 0a6b58c5cd0d ("lockdep: fix static memory detection even more") the lockdep code uses is_kernel_core_data(), is_kernel_rodata() and init_section_contains() to verify if a lock is located inside a kernel static data section. This change triggers a failure on LoongArch, for which the vmlinux.lds.S script misses to put the locks (as part of in the .data.rel symbols) into the Linux data section. This patch fixes the lockdep problem by moving *(.data.rel*) symbols into the kernel data section (from _sdata to _edata). Additionally, move other wrongly assigned symbols too: - altinstructions into the _initdata section, - PLT symbols behind the read-only section, and - *(.la_abs) into the data section. Cc: stable # v6.4+ Fixes: 0a6b58c5cd0d ("lockdep: fix static memory detection even more") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Huacai Chen --- arch/loongarch/kernel/vmlinux.lds.S | 55 +++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index b1686afcf876..bb2ec86f37a8 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -53,33 +53,6 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; - /* - * struct alt_inst entries. From the header (alternative.h): - * "Alternative instructions for different CPU types or capabilities" - * Think locking instructions on spinlocks. - */ - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - -#ifdef CONFIG_RELOCATABLE - . = ALIGN(8); - .la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) { - __la_abs_begin = .; - *(.la_abs) - __la_abs_end = .; - } -#endif - - .got : ALIGN(16) { *(.got) } - .plt : ALIGN(16) { *(.plt) } - .got.plt : ALIGN(16) { *(.got.plt) } - - .data.rel : { *(.data.rel*) } - . = ALIGN(PECOFF_SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -94,6 +67,18 @@ SECTIONS __initdata_begin = .; + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + INIT_DATA_SECTION(16) .exit.data : { EXIT_DATA @@ -113,6 +98,11 @@ SECTIONS _sdata = .; RO_DATA(4096) + + .got : ALIGN(16) { *(.got) } + .plt : ALIGN(16) { *(.plt) } + .got.plt : ALIGN(16) { *(.got.plt) } + RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) .rela.dyn : ALIGN(8) { @@ -121,6 +111,17 @@ SECTIONS __rela_dyn_end = .; } + .data.rel : { *(.data.rel*) } + +#ifdef CONFIG_RELOCATABLE + . = ALIGN(8); + .la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) { + __la_abs_begin = .; + *(.la_abs) + __la_abs_end = .; + } +#endif + .sdata : { *(.sdata) } -- cgit From c718a0bad75ccef117000223b00fd6a14f849135 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Sep 2023 14:26:28 +0800 Subject: LoongArch: Fix some build warnings with W=1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some building warnings when building LoongArch kernel with W=1 as following, this patch fixes them. arch/loongarch/kernel/acpi.c:284:13: warning: no previous prototype for ‘acpi_numa_arch_fixup’ [-Wmissing-prototypes] 284 | void __init acpi_numa_arch_fixup(void) {} | ^~~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/time.c:32:13: warning: no previous prototype for ‘constant_timer_interrupt’ [-Wmissing-prototypes] 32 | irqreturn_t constant_timer_interrupt(int irq, void *data) | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/traps.c:496:25: warning: no previous prototype for 'do_fpe' [-Wmissing-prototypes] 496 | asmlinkage void noinstr do_fpe(struct pt_regs *regs | ^~~~~~ arch/loongarch/kernel/traps.c:813:22: warning: variable ‘opcode’ set but not used [-Wunused-but-set-variable] 813 | unsigned int opcode; | ^~~~~~ arch/loongarch/kernel/signal.c:895:14: warning: no previous prototype for ‘get_sigframe’ [-Wmissing-prototypes] 895 | void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, | ^~~~~~~~~~~~ arch/loongarch/kernel/syscall.c:21:40: warning: initialized field overwritten [-Woverride-init] 21 | #define __SYSCALL(nr, call) [nr] = (call), | ^ arch/loongarch/kernel/syscall.c:40:14: warning: no previous prototype for ‘do_syscall’ [-Wmissing-prototypes] 40 | void noinstr do_syscall(struct pt_regs *regs) | ^~~~~~~~~~ arch/loongarch/kernel/smp.c:502:17: warning: no previous prototype for ‘start_secondary’ [-Wmissing-prototypes] 502 | asmlinkage void start_secondary(void) | ^~~~~~~~~~~~~~~ arch/loongarch/kernel/process.c:309:15: warning: no previous prototype for ‘arch_align_stack’ [-Wmissing-prototypes] 309 | unsigned long arch_align_stack(unsigned long sp) | ^~~~~~~~~~~~~~~~ arch/loongarch/kernel/topology.c:13:5: warning: no previous prototype for ‘arch_register_cpu’ [-Wmissing-prototypes] 13 | int arch_register_cpu(int cpu) | ^~~~~~~~~~~~~~~~~ arch/loongarch/kernel/topology.c:27:6: warning: no previous prototype for ‘arch_unregister_cpu’ [-Wmissing-prototypes] 27 | void arch_unregister_cpu(int cpu) | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/module-sections.c:103:5: warning: no previous prototype for ‘module_frob_arch_sections’ [-Wmissing-prototypes] 103 | int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/loongarch/mm/hugetlbpage.c:56:5: warning: no previous prototype for ‘is_aligned_hugepage_range’ [-Wmissing-prototypes] 56 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ^~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/exception.h | 45 +++++++++++++++++++++++++++++++++ arch/loongarch/include/asm/smp.h | 1 + arch/loongarch/kernel/Makefile | 4 +++ arch/loongarch/kernel/acpi.c | 1 - arch/loongarch/kernel/module-sections.c | 1 + arch/loongarch/kernel/process.c | 1 + arch/loongarch/kernel/signal.c | 7 ++--- arch/loongarch/kernel/smp.c | 3 +++ arch/loongarch/kernel/syscall.c | 1 + arch/loongarch/kernel/time.c | 2 +- arch/loongarch/kernel/topology.c | 3 +++ arch/loongarch/kernel/traps.c | 25 ++++-------------- arch/loongarch/mm/fault.c | 2 +- arch/loongarch/mm/hugetlbpage.c | 12 --------- arch/loongarch/mm/ioremap.c | 1 + arch/loongarch/mm/tlb.c | 2 +- 16 files changed, 72 insertions(+), 39 deletions(-) create mode 100644 arch/loongarch/include/asm/exception.h diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h new file mode 100644 index 000000000000..af74a3fdcad1 --- /dev/null +++ b/arch/loongarch/include/asm/exception.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_EXCEPTION_H +#define __ASM_EXCEPTION_H + +#include +#include + +void show_registers(struct pt_regs *regs); + +asmlinkage void cache_parity_error(void); +asmlinkage void noinstr do_ade(struct pt_regs *regs); +asmlinkage void noinstr do_ale(struct pt_regs *regs); +asmlinkage void noinstr do_bce(struct pt_regs *regs); +asmlinkage void noinstr do_bp(struct pt_regs *regs); +asmlinkage void noinstr do_ri(struct pt_regs *regs); +asmlinkage void noinstr do_fpu(struct pt_regs *regs); +asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr); +asmlinkage void noinstr do_lsx(struct pt_regs *regs); +asmlinkage void noinstr do_lasx(struct pt_regs *regs); +asmlinkage void noinstr do_lbt(struct pt_regs *regs); +asmlinkage void noinstr do_watch(struct pt_regs *regs); +asmlinkage void noinstr do_syscall(struct pt_regs *regs); +asmlinkage void noinstr do_reserved(struct pt_regs *regs); +asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp); +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address); + +asmlinkage void handle_ade(void); +asmlinkage void handle_ale(void); +asmlinkage void handle_bce(void); +asmlinkage void handle_sys(void); +asmlinkage void handle_bp(void); +asmlinkage void handle_ri(void); +asmlinkage void handle_fpu(void); +asmlinkage void handle_fpe(void); +asmlinkage void handle_lsx(void); +asmlinkage void handle_lasx(void); +asmlinkage void handle_lbt(void); +asmlinkage void handle_watch(void); +asmlinkage void handle_reserved(void); +asmlinkage void handle_vint(void); +asmlinkage void noinstr handle_loongarch_irq(struct pt_regs *regs); + +#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 66ecb480c894..f81e5f01d619 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -70,6 +70,7 @@ struct secondary_data { extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); +extern asmlinkage void start_secondary(void); extern void calculate_cpu_foreign_map(void); diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index c56ea0b75448..4fcc168f0732 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -19,6 +19,10 @@ obj-$(CONFIG_CPU_HAS_LBT) += lbt.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o +CFLAGS_module.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) +CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) + ifdef CONFIG_FUNCTION_TRACER ifndef CONFIG_DYNAMIC_FTRACE obj-y += mcount.o ftrace.o diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 9450e09073eb..8e00a754e548 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -281,7 +281,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node); } -void __init acpi_numa_arch_fixup(void) {} #endif void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index d4dbcda1c4b0..e2f30ff9afde 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -6,6 +6,7 @@ #include #include #include +#include #include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 3cb082e0c992..767d94cce0de 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 504fdfe85203..4a3686d13349 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -891,8 +892,8 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon return new_sp; } -void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, - struct extctx_layout *extctx) +static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + struct extctx_layout *extctx) { unsigned long sp; @@ -922,7 +923,7 @@ void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { int sig; sigset_t set; diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 6667b0a90f81..ef35c871244f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -556,10 +557,12 @@ void smp_send_stop(void) smp_call_function(stop_this_cpu, NULL, 0); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif static void flush_tlb_all_ipi(void *info) { diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 3fc4211db989..b4c5acd7aa3b 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index c189e03cd5da..3064af94db9c 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -29,7 +29,7 @@ static void constant_event_handler(struct clock_event_device *dev) { } -irqreturn_t constant_timer_interrupt(int irq, void *data) +static irqreturn_t constant_timer_interrupt(int irq, void *data) { int cpu = smp_processor_id(); struct clock_event_device *cd; diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index caa7cd859078..3fd166006698 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -7,6 +8,8 @@ #include #include +#include + static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 65214774ef7c..aebfc3733a76 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -53,21 +53,6 @@ #include "access-helper.h" -extern asmlinkage void handle_ade(void); -extern asmlinkage void handle_ale(void); -extern asmlinkage void handle_bce(void); -extern asmlinkage void handle_sys(void); -extern asmlinkage void handle_bp(void); -extern asmlinkage void handle_ri(void); -extern asmlinkage void handle_fpu(void); -extern asmlinkage void handle_fpe(void); -extern asmlinkage void handle_lbt(void); -extern asmlinkage void handle_lsx(void); -extern asmlinkage void handle_lasx(void); -extern asmlinkage void handle_reserved(void); -extern asmlinkage void handle_watch(void); -extern asmlinkage void handle_vint(void); - static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -439,8 +424,8 @@ static inline void setup_vint_size(unsigned int size) * happen together with Overflow or Underflow, and `ptrace' can set * any bits. */ -void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, - struct task_struct *tsk) +static void force_fcsr_sig(unsigned long fcsr, + void __user *fault_addr, struct task_struct *tsk) { int si_code = FPE_FLTUNK; @@ -458,7 +443,7 @@ void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, force_sig_fault(SIGFPE, si_code, fault_addr); } -int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) +static int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) { int si_code; @@ -824,7 +809,7 @@ out: asmlinkage void noinstr do_ri(struct pt_regs *regs) { int status = SIGILL; - unsigned int opcode = 0; + unsigned int __maybe_unused opcode; unsigned int __user *era = (unsigned int __user *)exception_era(regs); irqentry_state_t state = irqentry_enter(regs); diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index e6376e3dce86..1fc2f6813ea0 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -20,12 +20,12 @@ #include #include #include -#include #include #include #include #include +#include #include #include diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index ba138117b124..1e76fcb83093 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -50,18 +50,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return (pte_t *) pmd; } -/* - * This function checks for proper alignment of input addr and len parameters. - */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 73b0980ab6f5..70ca73019811 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -4,6 +4,7 @@ */ #include +#include void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) { diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index eb8572e201ea..2c0a411f23aa 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -261,7 +261,7 @@ unsigned long pcpu_handlers[NR_CPUS]; #endif extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; -void setup_tlb_handler(int cpu) +static void setup_tlb_handler(int cpu) { setup_ptwalker(); local_flush_tlb_all(); -- cgit From 3563b477ddfe057ff1ef63636cacf198130276cb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: LoongArch: Use _UL() and _ULL() Use _UL() and _ULL() that are provided by const.h. Signed-off-by: Andy Shevchenko Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index 5c9c03bdf915..b24437e28c6e 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -19,7 +19,7 @@ */ #ifndef __ASSEMBLY__ #ifndef PHYS_OFFSET -#define PHYS_OFFSET _AC(0, UL) +#define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; #endif /* __ASSEMBLY__ */ @@ -43,7 +43,7 @@ extern unsigned long vm_map_base; * Memory above this physical address will be considered highmem. */ #ifndef HIGHMEM_START -#define HIGHMEM_START (_AC(1, UL) << _AC(DMW_PABITS, UL)) +#define HIGHMEM_START (_UL(1) << _UL(DMW_PABITS)) #endif #define TO_PHYS(x) ( ((x) & TO_PHYS_MASK)) @@ -65,16 +65,16 @@ extern unsigned long vm_map_base; #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ -#define _CONST64_(x) x #else #define _ATYPE_ __PTRDIFF_TYPE__ #define _ATYPE32_ int #define _ATYPE64_ __s64 +#endif + #ifdef CONFIG_64BIT -#define _CONST64_(x) x ## UL +#define _CONST64_(x) _UL(x) #else -#define _CONST64_(x) x ## ULL -#endif +#define _CONST64_(x) _ULL(x) #endif /* -- cgit From d0b933ae7a0e1b86b7af1462028ef0ca78144ef0 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: LoongArch: Remove dead code in relocate_new_kernel The initial aim is to silence the following objtool warning: arch/loongarch/kernel/relocate_kernel.o: warning: objtool: relocate_new_kernel+0x74: unreachable instruction There are two adjacent "b" instructions, the second one is unreachable, it is dead code, just remove it. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate_kernel.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index d13252553a7c..f49f6b053763 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -72,7 +72,6 @@ copy_word: LONG_ADDI s5, s5, -1 beqz s5, process_entry b copy_word - b process_entry done: ibar 0 -- cgit From b795fb9f5861ee256070d59e33130980a01fadd7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: LoongArch: Set all reserved memblocks on Node#0 at initialization After commit 61167ad5fecdea ("mm: pass nid to reserve_bootmem_region()") we get a panic if DEFERRED_STRUCT_PAGE_INIT is enabled: [ 0.000000] CPU 0 Unable to handle kernel paging request at virtual address 0000000000002b82, era == 90000000040e3f28, ra == 90000000040e3f18 [ 0.000000] Oops[#1]: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.5.0+ #733 [ 0.000000] pc 90000000040e3f28 ra 90000000040e3f18 tp 90000000046f4000 sp 90000000046f7c90 [ 0.000000] a0 0000000000000001 a1 0000000000200000 a2 0000000000000040 a3 90000000046f7ca0 [ 0.000000] a4 90000000046f7ca4 a5 0000000000000000 a6 90000000046f7c38 a7 0000000000000000 [ 0.000000] t0 0000000000000002 t1 9000000004b00ac8 t2 90000000040e3f18 t3 90000000040f0800 [ 0.000000] t4 00000000000f0000 t5 80000000ffffe07e t6 0000000000000003 t7 900000047fff5e20 [ 0.000000] t8 aaaaaaaaaaaaaaab u0 0000000000000018 s9 0000000000000000 s0 fffffefffe000000 [ 0.000000] s1 0000000000000000 s2 0000000000000080 s3 0000000000000040 s4 0000000000000000 [ 0.000000] s5 0000000000000000 s6 fffffefffe000000 s7 900000000470b740 s8 9000000004ad4000 [ 0.000000] ra: 90000000040e3f18 reserve_bootmem_region+0xec/0x21c [ 0.000000] ERA: 90000000040e3f28 reserve_bootmem_region+0xfc/0x21c [ 0.000000] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 0.000000] PRMD: 00000000 (PPLV0 -PIE -PWE) [ 0.000000] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 0.000000] ECFG: 00070800 (LIE=11 VS=7) [ 0.000000] ESTAT: 00010800 [PIL] (IS=11 ECode=1 EsubCode=0) [ 0.000000] BADV: 0000000000002b82 [ 0.000000] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000) [ 0.000000] Modules linked in: [ 0.000000] Process swapper (pid: 0, threadinfo=(____ptrval____), task=(____ptrval____)) [ 0.000000] Stack : 0000000000000000 9000000002eb5430 0000003a00000020 90000000045ccd00 [ 0.000000] 900000000470e000 90000000002c1918 0000000000000000 9000000004110780 [ 0.000000] 00000000fe6c0000 0000000480000000 9000000004b4e368 9000000004110748 [ 0.000000] 0000000000000000 900000000421ca84 9000000004620000 9000000004564970 [ 0.000000] 90000000046f7d78 9000000002cc9f70 90000000002c1918 900000000470e000 [ 0.000000] 9000000004564970 90000000040bc0e0 90000000046f7d78 0000000000000000 [ 0.000000] 0000000000004000 90000000045ccd00 0000000000000000 90000000002c1918 [ 0.000000] 90000000002c1900 900000000470b700 9000000004b4df78 9000000004620000 [ 0.000000] 90000000046200a8 90000000046200a8 0000000000000000 9000000004218b2c [ 0.000000] 9000000004270008 0000000000000001 0000000000000000 90000000045ccd00 [ 0.000000] ... [ 0.000000] Call Trace: [ 0.000000] [<90000000040e3f28>] reserve_bootmem_region+0xfc/0x21c [ 0.000000] [<900000000421ca84>] memblock_free_all+0x114/0x350 [ 0.000000] [<9000000004218b2c>] mm_core_init+0x138/0x3cc [ 0.000000] [<9000000004200e38>] start_kernel+0x488/0x7a4 [ 0.000000] [<90000000040df0d8>] kernel_entry+0xd8/0xdc [ 0.000000] [ 0.000000] Code: 02eb21ad 00410f4c 380c31ac <262b818d> 6800b70d 02c1c196 0015001c 57fe4bb1 260002cd The reason is early memblock_reserve() in memblock_init() set node id to MAX_NUMNODES, making NODE_DATA(nid) a NULL dereference in the call chain reserve_bootmem_region() -> init_reserved_page(). After memblock_init(), those late calls of memblock_reserve() operate on subregions of memblock .memory regions. As a result, these reserved regions will be set to the correct node at the first iteration of memmap_init_reserved_pages(). So set all reserved memblocks on Node#0 at initialization can avoid this panic. Reported-by: WANG Xuerui Tested-by: WANG Xuerui Reviewed-by: WANG Xuerui # with nits addressed Signed-off-by: Huacai Chen --- arch/loongarch/kernel/mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index 4a4107a6a965..aed901c57fb4 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -50,7 +50,6 @@ void __init memblock_init(void) } memblock_set_current_limit(PFN_PHYS(max_low_pfn)); - memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); /* Reserve the first 2MB */ memblock_reserve(PHYS_OFFSET, 0x200000); @@ -58,4 +57,7 @@ void __init memblock_init(void) /* Reserve the kernel text/data/bss */ memblock_reserve(__pa_symbol(&_text), __pa_symbol(&_end) - __pa_symbol(&_text)); + + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.reserved, 0); } -- cgit From 2a86f1b56a30e242caf7ee1268af68f4f49ce847 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: kasan: Cleanup the __HAVE_ARCH_SHADOW_MAP usage As Linus suggested, __HAVE_ARCH_XYZ is "stupid" and "having historical uses of it doesn't make it good". So migrate __HAVE_ARCH_SHADOW_MAP to separate macros named after the respective functions. Suggested-by: Linus Torvalds Reviewed-by: WANG Xuerui Reviewed-by: Andrey Konovalov Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kasan.h | 10 ++++++++-- include/linux/kasan.h | 2 +- mm/kasan/kasan.h | 8 +++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index deeff8158f45..a12ecab37da7 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -10,8 +10,6 @@ #include #include -#define __HAVE_ARCH_SHADOW_MAP - #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) @@ -68,6 +66,7 @@ static __always_inline bool kasan_arch_is_ready(void) return !kasan_early_stage; } +#define kasan_mem_to_shadow kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { if (!kasan_arch_is_ready()) { @@ -97,6 +96,7 @@ static inline void *kasan_mem_to_shadow(const void *addr) } } +#define kasan_shadow_to_mem kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { unsigned long addr = (unsigned long)shadow_addr; @@ -119,6 +119,12 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } } +#define addr_has_metadata addr_has_metadata +static __always_inline bool addr_has_metadata(const void *addr) +{ + return (kasan_mem_to_shadow((void *)addr) != NULL); +} + void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3df5499f7936..842623d708c2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,7 +54,7 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index f70e3d7a602e..d37831b8511c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -291,7 +291,7 @@ struct kasan_stack_ring { #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) @@ -299,15 +299,13 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } #endif +#ifndef addr_has_metadata static __always_inline bool addr_has_metadata(const void *addr) { -#ifdef __HAVE_ARCH_SHADOW_MAP - return (kasan_mem_to_shadow((void *)addr) != NULL); -#else return (kasan_reset_tag(addr) >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); -#endif } +#endif /** * kasan_check_range - Check memory region, and report if invalid access. -- cgit From 99e5a2472a506d9dc6fe54863bf6c5b43bc25a97 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: LoongArch: Don't inline kasan_mem_to_shadow()/kasan_shadow_to_mem() As Linus suggested, kasan_mem_to_shadow()/kasan_shadow_to_mem() are not performance-critical and too big to inline. This is simply wrong so just define them out-of-line. If they really need to be inlined in future, such as the objtool / SMAP issue for X86, we should mark them __always_inline. Suggested-by: Linus Torvalds Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kasan.h | 59 ++++---------------------------------- arch/loongarch/mm/kasan_init.c | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index a12ecab37da7..cd6084f4e153 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -60,63 +60,16 @@ extern bool kasan_early_stage; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -#define kasan_arch_is_ready kasan_arch_is_ready -static __always_inline bool kasan_arch_is_ready(void) -{ - return !kasan_early_stage; -} - #define kasan_mem_to_shadow kasan_mem_to_shadow -static inline void *kasan_mem_to_shadow(const void *addr) -{ - if (!kasan_arch_is_ready()) { - return (void *)(kasan_early_shadow_page); - } else { - unsigned long maddr = (unsigned long)addr; - unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; - unsigned long offset = 0; - - maddr &= XRANGE_SHADOW_MASK; - switch (xrange) { - case XKPRANGE_CC_SEG: - offset = XKPRANGE_CC_SHADOW_OFFSET; - break; - case XKPRANGE_UC_SEG: - offset = XKPRANGE_UC_SHADOW_OFFSET; - break; - case XKVRANGE_VC_SEG: - offset = XKVRANGE_VC_SHADOW_OFFSET; - break; - default: - WARN_ON(1); - return NULL; - } - - return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); - } -} +void *kasan_mem_to_shadow(const void *addr); #define kasan_shadow_to_mem kasan_shadow_to_mem -static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +const void *kasan_shadow_to_mem(const void *shadow_addr); + +#define kasan_arch_is_ready kasan_arch_is_ready +static __always_inline bool kasan_arch_is_ready(void) { - unsigned long addr = (unsigned long)shadow_addr; - - if (unlikely(addr > KASAN_SHADOW_END) || - unlikely(addr < KASAN_SHADOW_START)) { - WARN_ON(1); - return NULL; - } - - if (addr >= XKVRANGE_VC_SHADOW_OFFSET) - return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); - else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); - else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); - else { - WARN_ON(1); - return NULL; - } + return !kasan_early_stage; } #define addr_has_metadata addr_has_metadata diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index da68bc1a4643..cc3e81fe0186 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -35,6 +35,57 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); bool kasan_early_stage = true; +void *kasan_mem_to_shadow(const void *addr) +{ + if (!kasan_arch_is_ready()) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + /* * Alloc memory for shadow memory page table. */ -- cgit From 84fafe9810350be1583d57cd6b2f44841ad7f336 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: docs/LoongArch: Update the links of ABI The current links of ABI can not be found for some time, let us fix the broken links. By the way, the latest and official ABI documentation releases are available at https://github.com/loongson/la-abi-specs, but there are no Chinese and pdf versions for now, so just do the minimal changes to update the links so that they can be found, hope there are stable links in the future. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- Documentation/arch/loongarch/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst index 49135d451ced..8c568cfc2107 100644 --- a/Documentation/arch/loongarch/introduction.rst +++ b/Documentation/arch/loongarch/introduction.rst @@ -381,9 +381,9 @@ Documentation of LoongArch ISA: Documentation of LoongArch ELF psABI: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.00-CN.pdf (in Chinese) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-CN.pdf (in Chinese) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.00-EN.pdf (in English) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-EN.pdf (in English) Linux kernel repository of Loongson and LoongArch: -- cgit From e74a6b7f3744d122ff4544f19393dfab167166ec Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Sep 2023 14:26:38 +0800 Subject: docs/zh_CN/LoongArch: Update the links of ABI The current links of ABI can not be found for some time, let us fix the broken links. By the way, the latest and official ABI documentation releases are available at https://github.com/loongson/la-abi-specs, but there are no Chinese and pdf versions for now, so just do the minimal changes to update the links so that they can be found, hope there are stable links in the future. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- Documentation/translations/zh_CN/arch/loongarch/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst index cba04befc950..59d6bf33050c 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst @@ -344,9 +344,9 @@ LoongArch指令集架构的文档: LoongArch的ELF psABI文档: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.00-CN.pdf (中文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-CN.pdf (中文版) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.00-EN.pdf (英文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-EN.pdf (英文版) Loongson与LoongArch的Linux内核源码仓库: -- cgit From 6d2779ecaeb56f92d7105c56772346c71c88c278 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Sep 2023 18:14:29 +0100 Subject: locking/atomic: scripts: fix fallback ifdeffery Since commit: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") The ordering fallbacks for atomic*_read_acquire() and atomic*_set_release() erroneously fall back to the implictly relaxed atomic*_read() and atomic*_set() variants respectively, without any additional barriers. This loses the ACQUIRE and RELEASE ordering semantics, which can result in a wide variety of problems, even on strongly-ordered architectures where the implementation of atomic*_read() and/or atomic*_set() allows the compiler to reorder those relative to other accesses. In practice this has been observed to break bit spinlocks on arm64, resulting in dentry cache corruption. The fallback logic was intended to allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops, but where an op had RELAXED ordering by default, this unintentionally permitted the ACQUIRE/RELEASE ops to be defined in terms of the implicitly RELAXED default. This patch corrects the logic to avoid falling back to implicitly RELAXED ops, resulting in the same behaviour as prior to commit 9257959a6e5b4fca. I've verified the resulting assembly on arm64 by generating outlined wrappers of the atomics. Prior to this patch the compiler generates sequences using relaxed load (LDR) and store (STR) instructions, e.g. | : | ldr x0, [x0] | ret | | : | str x1, [x0] | ret With this patch applied the compiler generates sequences using the intended load-acquire (LDAR) and store-release (STLR) instructions, e.g. | : | ldar x0, [x0] | ret | | : | stlr x1, [x0] | ret To make sure that there were no other victims of the ifdeffery rewrite, I generated outlined copies of all of the {atomic,atomic64,atomic_long} atomic operations before and after commit 9257959a6e5b4fca. A diff of the generated assembly on arm64 shows that only the read_acquire() and set_release() operations were changed, and only lost their intended ordering: | [mark@lakrids:~/src/linux]% diff -u \ | <(aarch64-linux-gnu-objdump -d before-9257959a6e5b4fca.o) | <(aarch64-linux-gnu-objdump -d after-9257959a6e5b4fca.o) | --- /proc/self/fd/11 2023-09-19 16:51:51.114779415 +0100 | +++ /proc/self/fd/16 2023-09-19 16:51:51.114779415 +0100 | @@ -1,5 +1,5 @@ | | -before-9257959a6e5b4fca.o: file format elf64-littleaarch64 | +after-9257959a6e5b4fca.o: file format elf64-littleaarch64 | | | Disassembly of section .text: | @@ -9,7 +9,7 @@ | 4: d65f03c0 ret | | 0000000000000008 : | - 8: 88dffc00 ldar w0, [x0] | + 8: b9400000 ldr w0, [x0] | c: d65f03c0 ret | | 0000000000000010 : | @@ -17,7 +17,7 @@ | 14: d65f03c0 ret | | 0000000000000018 : | - 18: 889ffc01 stlr w1, [x0] | + 18: b9000001 str w1, [x0] | 1c: d65f03c0 ret | | 0000000000000020 : | @@ -1230,7 +1230,7 @@ | 1070: d65f03c0 ret | | 0000000000001074 : | - 1074: c8dffc00 ldar x0, [x0] | + 1074: f9400000 ldr x0, [x0] | 1078: d65f03c0 ret | | 000000000000107c : | @@ -1238,7 +1238,7 @@ | 1080: d65f03c0 ret | | 0000000000001084 : | - 1084: c89ffc01 stlr x1, [x0] | + 1084: f9000001 str x1, [x0] | 1088: d65f03c0 ret | | 000000000000108c : | @@ -2427,7 +2427,7 @@ | 207c: d65f03c0 ret | | 0000000000002080 : | - 2080: c8dffc00 ldar x0, [x0] | + 2080: f9400000 ldr x0, [x0] | 2084: d65f03c0 ret | | 0000000000002088 : | @@ -2435,7 +2435,7 @@ | 208c: d65f03c0 ret | | 0000000000002090 : | - 2090: c89ffc01 stlr x1, [x0] | + 2090: f9000001 str x1, [x0] | 2094: d65f03c0 ret | | 0000000000002098 : I've build tested this with a variety of configs for alpha, arm, arm64, csky, i386, m68k, microblaze, mips, nios2, openrisc, powerpc, riscv, s390, sh, sparc, x86_64, and xtensa, for which I've seen no issues. I was unable to build test for ia64 and parisc due to existing build breakage in v6.6-rc2. Fixes: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") Reported-by: Ming Lei Reported-by: Darrick J. Wong Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Tested-by: Baokun Li Link: https://lkml.kernel.org/r/20230919171430.2697727-1-mark.rutland@arm.com --- include/linux/atomic/atomic-arch-fallback.h | 10 +--------- scripts/atomic/gen-atomic-fallback.sh | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 18f5744dfb5d..b83ef19da13d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -459,8 +459,6 @@ raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); -#elif defined(arch_atomic_read) - return arch_atomic_read(v); #else int ret; @@ -508,8 +506,6 @@ raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); -#elif defined(arch_atomic_set) - arch_atomic_set(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); @@ -2575,8 +2571,6 @@ raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); -#elif defined(arch_atomic64_read) - return arch_atomic64_read(v); #else s64 ret; @@ -2624,8 +2618,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); -#elif defined(arch_atomic64_set) - arch_atomic64_set(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); @@ -4657,4 +4649,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d +// 2fdd6702823fa842f9cea57a002e6e4476ae780c diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index c0c8a85d7c81..a45154cefa48 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -102,7 +102,7 @@ gen_proto_order_variant() fi # Allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops - if [ ! -z "${order}" ]; then + if [ ! -z "${order}" ] && ! meta_is_implicitly_relaxed "${meta}"; then printf "#elif defined(arch_${basename})\n" printf "\t${retstmt}arch_${basename}(${args});\n" fi -- cgit From f1d95df0f31048f1c59092648997686e3f7d9478 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Mon, 18 Sep 2023 16:56:23 +0300 Subject: net: rds: Fix possible NULL-pointer dereference In rds_rdma_cm_event_handler_cmn() check, if conn pointer exists before dereferencing it as rdma_set_service_type() argument Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: fd261ce6a30e ("rds: rdma: update rdma transport for tos") Signed-off-by: Artem Chernyshev Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index d36f3f6b4351..b15cf316b23a 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_RESOLVED: - rdma_set_service_type(cm_id, conn->c_tos); - rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); - /* XXX do we need to clean up if this fails? */ - ret = rdma_resolve_route(cm_id, - RDS_RDMA_RESOLVE_TIMEOUT_MS); + if (conn) { + rdma_set_service_type(cm_id, conn->c_tos); + rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); + /* XXX do we need to clean up if this fails? */ + ret = rdma_resolve_route(cm_id, + RDS_RDMA_RESOLVE_TIMEOUT_MS); + } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: -- cgit From 4e4b1798cc90e376b8b61d0098b4093898a32227 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 18 Sep 2023 11:40:15 -0400 Subject: vxlan: Add missing entries to vxlan_get_size() There are some attributes added by vxlan_fill_info() which are not accounted for in vxlan_get_size(). Add them. I didn't find a way to trigger an actual problem from this miscalculation since there is usually extra space in netlink size calculations like if_nlmsg_size(); but maybe I just didn't search long enough. Fixes: 3511494ce2f3 ("vxlan: Group Policy extension") Fixes: e1e5314de08b ("vxlan: implement GPE") Fixes: 0ace2ca89cbd ("vxlan: Use checksum partial with remote checksum offload") Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Benjamin Poirier Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e463f59e95c2..5b5597073b00 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -4331,6 +4331,10 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */ + nla_total_size(0) + /* IFLA_VXLAN_GBP */ + nla_total_size(0) + /* IFLA_VXLAN_GPE */ + nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */ 0; } -- cgit From c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 15 Sep 2023 15:18:11 +0200 Subject: netfilter: nf_tables: disable toggling dormant table state more than once nft -f -< Cc: Bing-Jhong Billy Jheng Cc: info@starlabs.sg Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d819b4d42962..a3680638ec60 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1219,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx) flags & NFT_TABLE_F_OWNER)) return -EOPNOTSUPP; + /* No dormant off/on/off/on games in single transaction */ + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) -- cgit From cf5000a7787cbc10341091d37245a42c119d26c5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 Sep 2023 15:36:13 +0200 Subject: netfilter: nf_tables: fix memleak when more than 255 elements expired When more than 255 elements expired we're supposed to switch to a new gc container structure. This never happens: u8 type will wrap before reaching the boundary and nft_trans_gc_space() always returns true. This means we recycle the initial gc container structure and lose track of the elements that came before. While at it, don't deref 'gc' after we've passed it to call_rcu. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a4455f4995ab..7c816359d5a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1682,7 +1682,7 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; + u16 count; void *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a3680638ec60..4356189360fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9579,12 +9579,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { + struct nft_set *set; + if (nft_trans_gc_space(gc)) return gc; + set = gc->set; nft_trans_gc_queue_work(gc); - return nft_trans_gc_alloc(gc->set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gc_seq, gfp); } void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) @@ -9599,15 +9602,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { + struct nft_set *set; + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL; if (nft_trans_gc_space(gc)) return gc; + set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); - return nft_trans_gc_alloc(gc->set, 0, gfp); + return nft_trans_gc_alloc(set, 0, gfp); } void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) -- cgit From 7433b6d2afd512d04398c73aa984d1e285be125b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 19 Sep 2023 20:04:45 +0200 Subject: netfilter: ipset: Fix race between IPSET_CMD_CREATE and IPSET_CMD_SWAP Kyle Zeng reported that there is a race between IPSET_CMD_ADD and IPSET_CMD_SWAP in netfilter/ip_set, which can lead to the invocation of `__ip_set_put` on a wrong `set`, triggering the `BUG_ON(set->ref == 0);` check in it. The race is caused by using the wrong reference counter, i.e. the ref counter instead of ref_netlink. Fixes: 24e227896bbf ("netfilter: ipset: Add schedule point in call_ad().") Reported-by: Kyle Zeng Closes: https://lore.kernel.org/netfilter-devel/ZPZqetxOmH+w%2Fmyc@westworld/#r Tested-by: Kyle Zeng Signed-off-by: Jozsef Kadlecsik Signed-off-by: Florian Westphal --- net/netfilter/ipset/ip_set_core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e564b5174261..35d2f9c9ada0 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -682,6 +682,14 @@ __ip_set_put(struct ip_set *set) /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ +static void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + static void __ip_set_put_netlink(struct ip_set *set) { @@ -1693,11 +1701,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, do { if (retried) { - __ip_set_get(set); + __ip_set_get_netlink(set); nfnl_unlock(NFNL_SUBSYS_IPSET); cond_resched(); nfnl_lock(NFNL_SUBSYS_IPSET); - __ip_set_put(set); + __ip_set_put_netlink(set); } ip_set_lock(set); -- cgit From b06fab003ae181c6690fe6d1f806636f816f4e50 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 15 Sep 2023 12:01:13 +0200 Subject: riscv: kselftests: Fix mm build by removing testcases subdirectory kselftests fails to build because the mm/testcases subdirectory is not created and then the compiler fails to output the binary there. So fix this by simply removing this subdirectory which is not very useful. Signed-off-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20230915100113.13131-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/mm/Makefile | 6 +- tools/testing/selftests/riscv/mm/mmap_bottomup.c | 35 ++++++++++++ tools/testing/selftests/riscv/mm/mmap_default.c | 35 ++++++++++++ tools/testing/selftests/riscv/mm/mmap_test.h | 64 ++++++++++++++++++++++ tools/testing/selftests/riscv/mm/run_mmap.sh | 12 ++++ .../selftests/riscv/mm/testcases/mmap_bottomup.c | 35 ------------ .../selftests/riscv/mm/testcases/mmap_default.c | 35 ------------ .../selftests/riscv/mm/testcases/mmap_test.h | 64 ---------------------- .../selftests/riscv/mm/testcases/run_mmap.sh | 12 ---- 9 files changed, 149 insertions(+), 149 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/mmap_bottomup.c create mode 100644 tools/testing/selftests/riscv/mm/mmap_default.c create mode 100644 tools/testing/selftests/riscv/mm/mmap_test.h create mode 100755 tools/testing/selftests/riscv/mm/run_mmap.sh delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_default.c delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_test.h delete mode 100755 tools/testing/selftests/riscv/mm/testcases/run_mmap.sh diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index 11e0f0568923..c333263f2b27 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -5,11 +5,11 @@ # Additional include paths needed by kselftest.h and local headers CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. -TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup +TEST_GEN_FILES := mmap_default mmap_bottomup -TEST_PROGS := testcases/run_mmap.sh +TEST_PROGS := run_mmap.sh include ../../lib.mk -$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h +$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c new file mode 100644 index 000000000000..1757d19ca89b --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include "../../kselftest_harness.h" + +TEST(infinite_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(BOTTOM_UP, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c new file mode 100644 index 000000000000..c63c60b9397e --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include "../../kselftest_harness.h" + +TEST(default_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(TOP_DOWN, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h new file mode 100644 index 000000000000..9b8434f62f57 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _TESTCASES_MMAP_TEST_H +#define _TESTCASES_MMAP_TEST_H +#include +#include +#include + +#define TOP_DOWN 0 +#define BOTTOM_UP 1 + +struct addresses { + int *no_hint; + int *on_37_addr; + int *on_38_addr; + int *on_46_addr; + int *on_47_addr; + int *on_55_addr; + int *on_56_addr; +}; + +static inline void do_mmaps(struct addresses *mmap_addresses) +{ + /* + * Place all of the hint addresses on the boundaries of mmap + * sv39, sv48, sv57 + * User addresses end at 1<<38, 1<<47, 1<<56 respectively + */ + void *on_37_bits = (void *)(1UL << 37); + void *on_38_bits = (void *)(1UL << 38); + void *on_46_bits = (void *)(1UL << 46); + void *on_47_bits = (void *)(1UL << 47); + void *on_55_bits = (void *)(1UL << 55); + void *on_56_bits = (void *)(1UL << 56); + + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + mmap_addresses->no_hint = + mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_37_addr = + mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_38_addr = + mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_46_addr = + mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_47_addr = + mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_55_addr = + mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_56_addr = + mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); +} + +static inline int memory_layout(void) +{ + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + + return value2 > value1; +} +#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh new file mode 100755 index 000000000000..ca5ad7c48bad --- /dev/null +++ b/tools/testing/selftests/riscv/mm/run_mmap.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +original_stack_limit=$(ulimit -s) + +./mmap_default + +# Force mmap_bottomup to be ran with bottomup memory due to +# the unlimited stack +ulimit -s unlimited +./mmap_bottomup +ulimit -s $original_stack_limit diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c deleted file mode 100644 index b29379f7e478..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include - -#include "../../kselftest_harness.h" - -TEST(infinite_rlimit) -{ -// Only works on 64 bit -#if __riscv_xlen == 64 - struct addresses mmap_addresses; - - EXPECT_EQ(BOTTOM_UP, memory_layout()); - - do_mmaps(&mmap_addresses); - - EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); - - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); - EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); -#endif -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c deleted file mode 100644 index d1accb91b726..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include - -#include "../../kselftest_harness.h" - -TEST(default_rlimit) -{ -// Only works on 64 bit -#if __riscv_xlen == 64 - struct addresses mmap_addresses; - - EXPECT_EQ(TOP_DOWN, memory_layout()); - - do_mmaps(&mmap_addresses); - - EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); - - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); - EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); -#endif -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h deleted file mode 100644 index 9b8434f62f57..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _TESTCASES_MMAP_TEST_H -#define _TESTCASES_MMAP_TEST_H -#include -#include -#include - -#define TOP_DOWN 0 -#define BOTTOM_UP 1 - -struct addresses { - int *no_hint; - int *on_37_addr; - int *on_38_addr; - int *on_46_addr; - int *on_47_addr; - int *on_55_addr; - int *on_56_addr; -}; - -static inline void do_mmaps(struct addresses *mmap_addresses) -{ - /* - * Place all of the hint addresses on the boundaries of mmap - * sv39, sv48, sv57 - * User addresses end at 1<<38, 1<<47, 1<<56 respectively - */ - void *on_37_bits = (void *)(1UL << 37); - void *on_38_bits = (void *)(1UL << 38); - void *on_46_bits = (void *)(1UL << 46); - void *on_47_bits = (void *)(1UL << 47); - void *on_55_bits = (void *)(1UL << 55); - void *on_56_bits = (void *)(1UL << 56); - - int prot = PROT_READ | PROT_WRITE; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - - mmap_addresses->no_hint = - mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_37_addr = - mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_38_addr = - mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_46_addr = - mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_47_addr = - mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_55_addr = - mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_56_addr = - mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); -} - -static inline int memory_layout(void) -{ - int prot = PROT_READ | PROT_WRITE; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - - void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); - void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); - - return value2 > value1; -} -#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh deleted file mode 100755 index ca5ad7c48bad..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -original_stack_limit=$(ulimit -s) - -./mmap_default - -# Force mmap_bottomup to be ran with bottomup memory due to -# the unlimited stack -ulimit -s unlimited -./mmap_bottomup -ulimit -s $original_stack_limit -- cgit From e5028011885a85032aa3c1b7e3e493bcdacb4a0a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 17 Aug 2023 17:19:51 +0100 Subject: coresight: tmc-etr: Disable warnings for allocation failures Running the following command on Juno triggers the warning: $ perf record -e cs_etm// -m ,128M ... ------------[ cut here ]------------ WARNING: CPU: 1 PID: 412 at mm/page_alloc.c:4453 __alloc_pages+0x334/0x1420 CPU: 1 PID: 412 Comm: perf Not tainted 6.5.0-rc3+ #181 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __alloc_pages+0x334/0x1420 lr : dma_common_alloc_pages+0x108/0x138 sp : ffffffc087fb7440 x29: ffffffc087fb7440 x28: 0000000000000000 x27: ffffffc07e48fba0 x26: 0000000000000001 x25: 000000000000000f x24: ffffffc081f24880 x23: 0000000000000cc0 x22: ffffff88012b6f08 x21: 0000000008000000 x20: ffffff8801433000 x19: 0000000000000000 x18: 0000000000000000 x17: ffffffc080316e5c x16: ffffffc07e46406c x15: ffffffc0803af580 x14: ffffffc08036b460 x13: ffffffc080025cbc x12: ffffffb8108c3fc4 x11: 1ffffff8108c3fc3 x10: 1ffffff810ff6eac x9 : 00000000f204f204 x8 : 000000000000f204 x7 : 00000000f2f2f2f2 x6 : 00000000f3f3f3f3 x5 : 0000000000000001 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000cc0 x1 : 0000000000000000 x0 : ffffffc085333000 Call trace: __alloc_pages+0x334/0x1420 dma_common_alloc_pages+0x108/0x138 __dma_alloc_pages+0xf4/0x108 dma_alloc_pages+0x18/0x30 tmc_etr_alloc_flat_buf+0xa0/0x190 [coresight_tmc] tmc_alloc_etr_buf.constprop.0+0x124/0x298 [coresight_tmc] alloc_etr_buf.constprop.0.isra.0+0x88/0xc8 [coresight_tmc] tmc_alloc_etr_buffer+0x164/0x2f0 [coresight_tmc] etm_setup_aux+0x32c/0x520 [coresight] rb_alloc_aux+0x29c/0x3f8 perf_mmap+0x59c/0xce0 mmap_region+0x340/0x10e0 do_mmap+0x48c/0x580 vm_mmap_pgoff+0x160/0x248 ksys_mmap_pgoff+0x1e8/0x278 __arm64_sys_mmap+0x8c/0xb8 With the flat mode, we only attempt to allocate large memory if there is an IOMMU connected to the ETR. If the allocation fails, we always have a fallback path and return an error if nothing else worked. So, suppress the warning for flat mode allocations. Cc: Mike Leach Cc: James Clark Cc: Anshuman Khandual Signed-off-by: Suzuki K Poulose Reviewed-by: James Clark Link: https://lore.kernel.org/r/20230817161951.658534-1-suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 6132c5b3db9c..8311e1028ddb 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -610,7 +610,8 @@ static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, flat_buf->vaddr = dma_alloc_noncoherent(real_dev, etr_buf->size, &flat_buf->daddr, - DMA_FROM_DEVICE, GFP_KERNEL); + DMA_FROM_DEVICE, + GFP_KERNEL | __GFP_NOWARN); if (!flat_buf->vaddr) { kfree(flat_buf); return -ENOMEM; -- cgit From b547b5e52a0587e6b25ea520bf2f9e03d00cbcb6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Sep 2023 08:13:21 +0200 Subject: gpio: tb10x: Fix an error handling path in tb10x_gpio_probe() If an error occurs after a successful irq_domain_add_linear() call, it should be undone by a corresponding irq_domain_remove(), as already done in the remove function. Fixes: c6ce2b6bffe5 ("gpio: add TB10x GPIO driver") Signed-off-by: Christophe JAILLET Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tb10x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c index 78f8790168ae..f96d260a4a19 100644 --- a/drivers/gpio/gpio-tb10x.c +++ b/drivers/gpio/gpio-tb10x.c @@ -195,7 +195,7 @@ static int tb10x_gpio_probe(struct platform_device *pdev) handle_edge_irq, IRQ_NOREQUEST, IRQ_NOPROBE, IRQ_GC_INIT_MASK_CACHE); if (ret) - return ret; + goto err_remove_domain; gc = tb10x_gpio->domain->gc->gc[0]; gc->reg_base = tb10x_gpio->base; @@ -209,6 +209,10 @@ static int tb10x_gpio_probe(struct platform_device *pdev) } return 0; + +err_remove_domain: + irq_domain_remove(tb10x_gpio->domain); + return ret; } static int tb10x_gpio_remove(struct platform_device *pdev) -- cgit From c7f5bd9f3aa04a5d0ced8c8f7835bead62380fa6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 5 Sep 2023 09:55:58 +0200 Subject: dt-bindings: mfd: Revert "dt-bindings: mfd: maxim,max77693: Add USB connector" This reverts commit da7ee30ae6662f016f28a9ef090b2132b3c0fb48. Commit da7ee30ae666 ("dt-bindings: mfd: maxim,max77693: Add USB connector") was an earlier version of my patch adding the connector, later superseded by commit 789c9ce9b46f ("dt-bindings: mfd: maxim,max77693: Add USB connector"). Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20230905075558.21219-1-krzysztof.kozlowski@linaro.org Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/maxim,max77693.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml index 9804d13de648..6a6f222b868f 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml @@ -31,10 +31,6 @@ properties: charger: $ref: /schemas/power/supply/maxim,max77693.yaml - connector: - $ref: /schemas/connector/usb-connector.yaml# - unevaluatedProperties: false - led: $ref: /schemas/leds/maxim,max77693.yaml -- cgit From 22b6e7f3d6d51ff2716480f3d8f3098d90d69165 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 19 Sep 2023 10:27:15 +0800 Subject: net: hinic: Fix warning-hinic_set_vlan_fliter() warn: variable dereferenced before check 'hwdev' 'hwdev' is checked too late and hwdev will not be NULL, so remove the check Fixes: 2acf960e3be6 ("net: hinic: Add support for configuration of rx-vlan-filter by ethtool") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309112354.pikZCmyk-lkp@intel.com/ Signed-off-by: Cai Huoqing Reviewed-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_port.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 9406237c461e..f81a43d2cdfc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -456,9 +456,6 @@ int hinic_set_vlan_fliter(struct hinic_dev *nic_dev, u32 en) u16 out_size = sizeof(vlan_filter); int err; - if (!hwdev) - return -EINVAL; - vlan_filter.func_idx = HINIC_HWIF_FUNC_IDX(hwif); vlan_filter.enable = en; -- cgit From 81335f90e8a88b81932df011105c46e708744f44 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Sep 2023 14:01:10 -0700 Subject: bpf: unconditionally reset backtrack_state masks on global func exit In mark_chain_precision() logic, when we reach the entry to a global func, it is expected that R1-R5 might be still requested to be marked precise. This would correspond to some integer input arguments being tracked as precise. This is all expected and handled as a special case. What's not expected is that we'll leave backtrack_state structure with some register bits set. This is because for subsequent precision propagations backtrack_state is reused without clearing masks, as all code paths are carefully written in a way to leave empty backtrack_state with zeroed out masks, for speed. The fix is trivial, we always clear register bit in the register mask, and then, optionally, set reg->precise if register is SCALAR_VALUE type. Reported-by: Chris Mason Fixes: be2ef8161572 ("bpf: allow precision tracking for programs with subprogs") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230918210110.2241458-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb78212fa5b2..c0c7d137066a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4047,11 +4047,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bitmap_from_u64(mask, bt_reg_mask(bt)); for_each_set_bit(i, mask, 32) { reg = &st->frame[0]->regs[i]; - if (reg->type != SCALAR_VALUE) { - bt_clear_reg(bt, i); - continue; - } - reg->precise = true; + bt_clear_reg(bt, i); + if (reg->type == SCALAR_VALUE) + reg->precise = true; } return 0; } -- cgit From 4a0f07d71b0483cc08c03cefa7c85749e187c214 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 19 Sep 2023 18:44:06 +0800 Subject: net/handshake: Fix memory leak in __sock_create() and sock_alloc_file() When making CONFIG_DEBUG_KMEMLEAK=y and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y, modprobe handshake-test and then rmmmod handshake-test, the below memory leak is detected. The struct socket_alloc which is allocated by alloc_inode_sb() in __sock_create() is not freed. And the struct dentry which is allocated by __d_alloc() in sock_alloc_file() is not freed. Since fput() will call file->f_op->release() which is sock_close() here and it will call __sock_release(). and fput() will call dput(dentry) to free the struct dentry. So replace sock_release() with fput() to fix the below memory leak. After applying this patch, the following memory leak is never detected. unreferenced object 0xffff888109165840 (size 768): comm "kunit_try_catch", pid 1852, jiffies 4294685807 (age 976.262s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0209ba2 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810f472008 (size 192): comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 08 20 47 0f 81 88 ff ff ......... G..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0209bbb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810958e580 (size 224): comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0209bbb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926dc88 (size 192): comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 88 dc 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208fdc [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a241380 (size 224): comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208fdc [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109165040 (size 768): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0208860 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926d568 (size 192): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 68 d5 26 09 81 88 ff ff ........h.&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208879 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240580 (size 224): comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.347s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208879 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164c40 (size 768): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa0208541 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926cd18 (size 192): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 18 cd 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa020855a [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240200 (size 224): comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa020855a [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164840 (size 768): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa02093e2 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926cab8 (size 192): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 b8 ca 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02093fb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810a240040 (size 224): comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02093fb [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109166440 (size 768): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa02097c1 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810926c398 (size 192): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 98 c3 26 09 81 88 ff ff ..........&..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02097da [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107e0b8c0 (size 224): comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa02097da [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888109164440 (size 768): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.487s) hex dump (first 32 bytes): 01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00 ......ZZ ....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] sock_alloc_inode+0x1f/0x1b0 [] alloc_inode+0x5b/0x1a0 [] new_inode_pseudo+0xd/0x70 [] sock_alloc+0x3c/0x260 [] __sock_create+0x66/0x3d0 [] 0xffffffffa020824e [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810f4cf698 (size 192): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s) hex dump (first 32 bytes): 00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00 ..P@............ 00 00 00 00 00 00 00 00 98 f6 4c 0f 81 88 ff ff ..........L..... backtrace: [] __d_alloc+0x31/0x8a0 [] d_alloc_pseudo+0xe/0x50 [] alloc_file_pseudo+0xce/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208267 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107e0b000 (size 224): comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00 ................ backtrace: [] alloc_empty_file+0x50/0x160 [] alloc_file+0x59/0x730 [] alloc_file_pseudo+0x154/0x210 [] sock_alloc_file+0x42/0x1b0 [] 0xffffffffa0208267 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 Fixes: 88232ec1ec5e ("net/handshake: Add Kunit tests for the handshake consumer API") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- net/handshake/handshake-test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 6d37bab35c8f..16ed7bfd29e4 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, req, result); handshake_req_cancel(sock->sk); - sock_release(sock); + fput(filp); } static void handshake_req_submit_test5(struct kunit *test) @@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test) /* Assert */ KUNIT_EXPECT_EQ(test, err, -EAGAIN); - sock_release(sock); + fput(filp); hn->hn_pending = saved; } @@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test) KUNIT_EXPECT_EQ(test, err, -EBUSY); handshake_req_cancel(sock->sk); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test1(struct kunit *test) @@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test2(struct kunit *test) @@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); - sock_release(sock); + fput(filp); } static void handshake_req_cancel_test3(struct kunit *test) @@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Assert */ KUNIT_EXPECT_FALSE(test, result); - sock_release(sock); + fput(filp); } static struct handshake_req *handshake_req_destroy_test; @@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test) handshake_req_cancel(sock->sk); /* Act */ - sock_release(sock); + fput(filp); /* Assert */ KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req); -- cgit From e2ee60ad9aba41afb68f4387574610ee390029f1 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Tue, 22 Aug 2023 11:52:38 +0200 Subject: accel/ivpu/40xx: Fix buttress interrupt handling Buttress spec requires that the interrupt status is cleared at the source first (before clearing MTL_BUTTRESS_INTERRUPT_STAT), that implies that we have to mask out the global interrupt while handling buttress interrupts. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20230822095238.3722815-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 34626d66fa10..00c5dbbe6847 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1046,7 +1046,8 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) if (status == 0) return IRQ_NONE; - REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); + /* Disable global interrupt before handling local buttress interrupts */ + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); @@ -1092,6 +1093,12 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) schedule_recovery = true; } + /* This must be done after interrupts are cleared at the source. */ + REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); + + /* Re-enable global interrupt */ + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); + if (schedule_recovery) ivpu_pm_schedule_recovery(vdev); -- cgit From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 15 Sep 2023 19:20:41 +0800 Subject: xfrm6: fix inet6_dev refcount underflow problem There are race conditions that may lead to inet6_dev refcount underflow in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev(). One of the refcount underflow bugs is shown below: (cpu 1) | (cpu 2) xfrm6_dst_destroy() | ... | in6_dev_put() | | rt6_uncached_list_flush_dev() ... | ... | in6_dev_put() rt6_uncached_list_del() | ... ... | xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(), so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put() again for the same inet6_dev. Fix it by moving in6_dev_put() after rt6_uncached_list_del() in xfrm6_dst_destroy(). Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts") Signed-off-by: Zhang Changzhong Reviewed-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 188224a76685..45d0f9a8b28c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (likely(xdst->u.rt6.rt6i_idev)) - in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); rt6_uncached_list_del(&xdst->u.rt6); + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } -- cgit From 494e87ffa0159b3f879694a9231089707792a44d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 20 Sep 2023 04:15:22 -0700 Subject: xtensa: add default definition for XCHAL_HAVE_DIV32 When variant FSF is set, XCHAL_HAVE_DIV32 is not defined. Add default definition for that macro to prevent build warnings: arch/xtensa/lib/divsi3.S:9:5: warning: "XCHAL_HAVE_DIV32" is not defined, evaluates to 0 [-Wundef] 9 | #if XCHAL_HAVE_DIV32 arch/xtensa/lib/modsi3.S:9:5: warning: "XCHAL_HAVE_DIV32" is not defined, evaluates to 0 [-Wundef] 9 | #if XCHAL_HAVE_DIV32 Fixes: 173d6681380a ("xtensa: remove extra header files") Suggested-by: Randy Dunlap Signed-off-by: Max Filippov Reported-by: kernel test robot Closes: lore.kernel.org/r/202309150556.t0yCdv3g-lkp@intel.com --- arch/xtensa/include/asm/core.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 3f5ffae89b58..6f02f6f21890 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -6,6 +6,10 @@ #include +#ifndef XCHAL_HAVE_DIV32 +#define XCHAL_HAVE_DIV32 0 +#endif + #ifndef XCHAL_HAVE_EXCLUSIVE #define XCHAL_HAVE_EXCLUSIVE 0 #endif -- cgit From 84e34a99fd403ba3c131584fa023a0a5ce217feb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:25 -0700 Subject: xtensa: fault: include Use to provide the function prototype for do_page_fault() to prevent a build warning: arch/xtensa/mm/fault.c:87:6: warning: no previous prototype for 'do_page_fault' [-Wmissing-prototypes] 87 | void do_page_fault(struct pt_regs *regs) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-3-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index d1eb8d6c5b82..16e11b6f6f78 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -20,6 +20,7 @@ #include #include #include +#include void bad_page_fault(struct pt_regs*, unsigned long, int); -- cgit From 4052a37aa84abac7d0f1a7f28378edc78c9d6dd3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:26 -0700 Subject: xtensa: irq: include Use to provide the function prototype for do_IRQ() to prevent a build warning: arch/xtensa/kernel/irq.c:34:17: warning: no previous prototype for 'do_IRQ' [-Wmissing-prototypes] 34 | asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-4-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/kernel/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 42f106004400..b1e410f6b5ab 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -28,6 +28,7 @@ #include #include #include +#include DECLARE_PER_CPU(unsigned long, nmi_count); -- cgit From 8cf543c0a074b32b618bea44645abd0e525ef93f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:27 -0700 Subject: xtensa: ptrace: add prototypes to Add prototype for do_syscall_trace_enter() to asm/ptrace.h. Move prototype for do_syscall_trace_leave() there to be consistent. Fixes a build warning: arch/xtensa/kernel/ptrace.c:545:5: warning: no previous prototype for 'do_syscall_trace_enter' [-Wmissing-prototypes] 545 | int do_syscall_trace_enter(struct pt_regs *regs) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-5-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/ptrace.h | 3 +++ arch/xtensa/kernel/ptrace.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 308f209a4740..a270467556dc 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -106,6 +106,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->areg[2]; } +int do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + #else /* __ASSEMBLY__ */ # include diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index f29477162ede..9056cd1a8302 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -541,7 +541,6 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -void do_syscall_trace_leave(struct pt_regs *regs); int do_syscall_trace_enter(struct pt_regs *regs) { if (regs->syscall == NO_SYSCALL) -- cgit From ccf9d278e5bba465fe7990aa2dde2825697db79c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:28 -0700 Subject: xtensa: processor.h: add init_arch() prototype Add the prototype for init_arch() to asm/processor.h to prevent a build warning: arch/xtensa/kernel/setup.c:244:13: warning: no previous prototype for 'init_arch' [-Wmissing-prototypes] 244 | void __init init_arch(bp_tag_t *bp_start) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-6-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index a6d09fe04831..5e4f4a474131 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -14,6 +14,8 @@ #include #include + +#include #include #include #include @@ -217,6 +219,8 @@ struct mm_struct; extern unsigned long __get_wchan(struct task_struct *p); +void init_arch(bp_tag_t *bp_start); + #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) -- cgit From 4ec4b8b1ec313af90a0488fe3c6a47dba2b8c198 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:29 -0700 Subject: xtensa: signal: include headers for function prototypes Add to satisfy the xtensa_rt_sigreturn() prototype warning. Add to satisfy the do_notify_resume() prototype warning. arch/xtensa/kernel/signal.c:246:17: warning: no previous prototype for 'xtensa_rt_sigreturn' [-Wmissing-prototypes] arch/xtensa/kernel/signal.c:525:6: warning: no previous prototype for 'do_notify_resume' [-Wmissing-prototypes] 525 | void do_notify_resume(struct pt_regs *regs) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-7-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 1 + arch/xtensa/kernel/signal.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 5e4f4a474131..d008a153a2b9 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -220,6 +220,7 @@ struct mm_struct; extern unsigned long __get_wchan(struct task_struct *p); void init_arch(bp_tag_t *bp_start); +void do_notify_resume(struct pt_regs *regs); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 5c01d7e70d90..81f0b106cfc1 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include extern struct task_struct *coproc_owners[]; -- cgit From 1b6ceeb99ee05eb2c62a9e5512623e63cf8490ba Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:30 -0700 Subject: xtensa: stacktrace: include for prototype Use to prevent a build warning: arch/xtensa/kernel/stacktrace.c:263:15: warning: no previous prototype for 'return_address' [-Wmissing-prototypes] 263 | unsigned long return_address(unsigned level) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-8-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/kernel/stacktrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index f643ea5e36da..831ffb648bda 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include -- cgit From 1c4087e97eb53b45709d12d2c96f03e26bcaee12 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:31 -0700 Subject: xtensa: traps: add for function prototype Use to provide the prototype for trap_init(), to prevent a build warning: arch/xtensa/kernel/traps.c:484:13: warning: no previous prototype for 'trap_init' [-Wmissing-prototypes] 484 | void __init trap_init(void) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-9-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 427c125a137a..38092d21acf8 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -23,6 +23,7 @@ * for more details. */ +#include #include #include #include -- cgit From 373e41633c35992df4e8c1bde8f0a3a29d4ade08 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:32 -0700 Subject: irqchip: irq-xtensa-mx: include header for missing prototype Add to provide the function prototype to prevent a build warning: drivers/irqchip/irq-xtensa-mx.c:166:12: warning: no previous prototype for 'xtensa_mx_init_legacy' [-Wmissing-prototypes] 166 | int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent) Signed-off-by: Randy Dunlap Acked-by: Marc Zyngier Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Gleixner Cc: Marc Zyngier Message-Id: <20230920052139.10570-10-rdunlap@infradead.org> Signed-off-by: Max Filippov --- drivers/irqchip/irq-xtensa-mx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 8c581c985aa7..7f314e58f3ce 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include -- cgit From 0f95df6246fe9d870cb9753c9376d72af84211a0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:33 -0700 Subject: xtensa: smp: add headers for missing function prototypes Use to provide the prototype for secondary_start_kernel(). Use to provide the prototype for setup_profiling_timer(). arch/xtensa/kernel/smp.c:119:6: warning: no previous prototype for 'secondary_start_kernel' [-Wmissing-prototypes] 119 | void secondary_start_kernel(void) arch/xtensa/kernel/smp.c:461:5: warning: no previous prototype for 'setup_profiling_timer' [-Wmissing-prototypes] 461 | int setup_profiling_timer(unsigned int multiplier) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-11-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/smp.h | 1 + arch/xtensa/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/xtensa/include/asm/smp.h b/arch/xtensa/include/asm/smp.h index 5dc5bf8cdd77..e446e6fc4557 100644 --- a/arch/xtensa/include/asm/smp.h +++ b/arch/xtensa/include/asm/smp.h @@ -23,6 +23,7 @@ struct cpumask; void arch_send_call_function_ipi_mask(const struct cpumask *mask); void arch_send_call_function_single_ipi(int cpu); +void secondary_start_kernel(void); void smp_init_cpus(void); void secondary_init_irq(void); void ipi_init(void); diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 07dd6baf18cf..94a23f100726 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include -- cgit From 2e413b1ebc30937882ed894897bee226896f262e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:34 -0700 Subject: xtensa: hw_breakpoint: include header for missing prototype Add the prototype for restore_dbreak() to and use that header in hw_breakpoint.c to prevent a build warning: arch/xtensa/kernel/hw_breakpoint.c:263:6: warning: no previous prototype for 'restore_dbreak' [-Wmissing-prototypes] 263 | void restore_dbreak(void) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-12-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/hw_breakpoint.h | 1 + arch/xtensa/kernel/hw_breakpoint.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/xtensa/include/asm/hw_breakpoint.h b/arch/xtensa/include/asm/hw_breakpoint.h index 9f119c1ca0b5..9ec86f440a48 100644 --- a/arch/xtensa/include/asm/hw_breakpoint.h +++ b/arch/xtensa/include/asm/hw_breakpoint.h @@ -48,6 +48,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); int check_hw_breakpoint(struct pt_regs *regs); void clear_ptrace_hw_breakpoint(struct task_struct *tsk); +void restore_dbreak(void); #else diff --git a/arch/xtensa/kernel/hw_breakpoint.c b/arch/xtensa/kernel/hw_breakpoint.c index 285fb2942b06..1eeecd58eb0c 100644 --- a/arch/xtensa/kernel/hw_breakpoint.c +++ b/arch/xtensa/kernel/hw_breakpoint.c @@ -13,6 +13,7 @@ #include #include #include +#include /* Breakpoint currently in use for each IBREAKA. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[XCHAL_NUM_IBREAK]); -- cgit From 25b9a3caf886b12eec3bc2608e852d8471db124e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:35 -0700 Subject: xtensa: tlb: include for missing prototype Add the prototype for check_tlb_sanity() to and use that header to prevent a build warning: arch/xtensa/mm/tlb.c:273:6: warning: no previous prototype for 'check_tlb_sanity' [-Wmissing-prototypes] 273 | void check_tlb_sanity(void) Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-13-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/tlb.h | 2 ++ arch/xtensa/mm/tlb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h index 50889935138a..8c3ceb427018 100644 --- a/arch/xtensa/include/asm/tlb.h +++ b/arch/xtensa/include/asm/tlb.h @@ -18,4 +18,6 @@ #define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) +void check_tlb_sanity(void); + #endif /* _XTENSA_TLB_H */ diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 0a11fc5f185b..4f974b74883c 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include -- cgit From 1b59efeb59851277266318f4e0132aa61ce3455e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:36 -0700 Subject: xtensa: iss/network: make functions static Make 2 functions static to prevent build warnings: arch/xtensa/platforms/iss/network.c:204:16: warning: no previous prototype for 'tuntap_protocol' [-Wmissing-prototypes] 204 | unsigned short tuntap_protocol(struct sk_buff *skb) arch/xtensa/platforms/iss/network.c:444:6: warning: no previous prototype for 'iss_net_user_timer_expire' [-Wmissing-prototypes] 444 | void iss_net_user_timer_expire(struct timer_list *unused) Fixes: 7282bee78798 ("xtensa: Architecture support for Tensilica Xtensa Part 8") Fixes: d8479a21a98b ("xtensa: Convert timers to use timer_setup()") Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-14-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/platforms/iss/network.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 85c82cd42188..e89f27f2bb18 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -201,7 +201,7 @@ static int tuntap_write(struct iss_net_private *lp, struct sk_buff **skb) return simc_write(lp->tp.info.tuntap.fd, (*skb)->data, (*skb)->len); } -unsigned short tuntap_protocol(struct sk_buff *skb) +static unsigned short tuntap_protocol(struct sk_buff *skb) { return eth_type_trans(skb, skb->dev); } @@ -441,7 +441,7 @@ static int iss_net_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } -void iss_net_user_timer_expire(struct timer_list *unused) +static void iss_net_user_timer_expire(struct timer_list *unused) { } -- cgit From 54d3d7d363823782c3444ddc41bb8cf1edc80514 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:37 -0700 Subject: xtensa: boot: don't add include-dirs Drop the -I options to prevent build warnings since there is not boot/include directory: cc1: warning: arch/xtensa/boot/include: No such file or directory [-Wmissing-include-dirs] Fixes: 437374e9a950 ("restore arch/{ppc/xtensa}/boot cflags") Fixes: 4bedea945451 ("xtensa: Architecture support for Tensilica Xtensa Part 2") Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-15-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/boot/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index a65b7a9ebff2..d8b0fadf429a 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -9,8 +9,7 @@ # KBUILD_CFLAGS used when building rest of boot (takes effect recursively) -KBUILD_CFLAGS += -fno-builtin -Iarch/$(ARCH)/boot/include -HOSTFLAGS += -Iarch/$(ARCH)/boot/include +KBUILD_CFLAGS += -fno-builtin subdir-y := lib targets += vmlinux.bin vmlinux.bin.gz -- cgit From 9aecda97ec3deecbfa7670877c8ddfd3d0fc87c4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Sep 2023 22:21:38 -0700 Subject: xtensa: umulsidi3: fix conditional expression Even when a variant has one or more of these defines set to 1, the multiplier code paths are not used. Change the expression so that the correct code paths are used. arch/xtensa/lib/umulsidi3.S:44:38: warning: "XCHAL_NO_MUL" is not defined, evaluates to 0 [-Wundef] 44 | #if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL arch/xtensa/lib/umulsidi3.S:145:38: warning: "XCHAL_NO_MUL" is not defined, evaluates to 0 [-Wundef] 145 | #if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL arch/xtensa/lib/umulsidi3.S:159:5: warning: "XCHAL_NO_MUL" is not defined, evaluates to 0 [-Wundef] 159 | #if XCHAL_NO_MUL Fixes: 8939c58d68f9 ("xtensa: add __umulsidi3 helper") Signed-off-by: Randy Dunlap Cc: Chris Zankel Cc: Max Filippov Message-Id: <20230920052139.10570-16-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/lib/umulsidi3.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/lib/umulsidi3.S b/arch/xtensa/lib/umulsidi3.S index 8c7a94a0c5d0..5da501b57813 100644 --- a/arch/xtensa/lib/umulsidi3.S +++ b/arch/xtensa/lib/umulsidi3.S @@ -3,7 +3,9 @@ #include #include -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 0 +#else #define XCHAL_NO_MUL 1 #endif -- cgit From f54d02c8f2cc4b46ba2a3bd8252a6750453b6f2b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 20 Sep 2023 04:41:09 -0700 Subject: xtensa: boot/lib: fix function prototypes Add function prototype for gunzip() to the boot library code and make exit() and zalloc() static. arch/xtensa/boot/lib/zmem.c:8:6: warning: no previous prototype for 'exit' [-Wmissing-prototypes] 8 | void exit (void) arch/xtensa/boot/lib/zmem.c:13:7: warning: no previous prototype for 'zalloc' [-Wmissing-prototypes] 13 | void *zalloc(unsigned size) arch/xtensa/boot/lib/zmem.c:35:6: warning: no previous prototype for 'gunzip' [-Wmissing-prototypes] 35 | void gunzip (void *dst, int dstlen, unsigned char *src, int *lenp) Fixes: 4bedea945451 ("xtensa: Architecture support for Tensilica Xtensa Part 2") Fixes: e7d163f76665 ("xtensa: Removed local copy of zlib and fixed O= support") Suggested-by: Randy Dunlap Signed-off-by: Max Filippov --- arch/xtensa/boot/lib/zmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/boot/lib/zmem.c b/arch/xtensa/boot/lib/zmem.c index e3ecd743c515..b89189355122 100644 --- a/arch/xtensa/boot/lib/zmem.c +++ b/arch/xtensa/boot/lib/zmem.c @@ -4,13 +4,14 @@ /* bits taken from ppc */ extern void *avail_ram, *end_avail; +void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp); -void exit (void) +static void exit(void) { for (;;); } -void *zalloc(unsigned size) +static void *zalloc(unsigned int size) { void *p = avail_ram; -- cgit From 8287474aa5ffb41df52552c4ae4748e791d2faf2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 13 Sep 2023 18:28:15 +0200 Subject: direct_write_fallback(): on error revert the ->ki_pos update from buffered write If we fail filemap_write_and_wait_range() on the range the buffered write went into, we only report the "number of bytes which we direct-written", to quote the comment in there. Which is fine, but buffered write has already advanced iocb->ki_pos, so we need to roll that back. Otherwise we end up with e.g. write(2) advancing position by more than the amount it reports having written. Fixes: 182c25e9c157 "filemap: update ki_pos in generic_perform_write" Signed-off-by: Al Viro Message-Id: <20230827214518.GU3390869@ZenIV> Signed-off-by: Christian Brauner --- fs/libfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/libfs.c b/fs/libfs.c index a4eb12757886..37f2d34ee090 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1903,6 +1903,7 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, * We don't know how much we wrote, so just return the number of * bytes which were direct-written */ + iocb->ki_pos -= buffered_written; if (direct_written) return direct_written; return err; -- cgit From db7fcc884d8a1e8265a87306e728c3d3239b0ca2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:14:14 -0700 Subject: aio: Annotate struct kioctx_table with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct kioctx_table. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Benjamin LaHaise Cc: Alexander Viro Cc: Christian Brauner Cc: linux-aio@kvack.org Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: "Gustavo A. R. Silva" Message-Id: <20230915201413.never.881-kees@kernel.org> Signed-off-by: Christian Brauner --- fs/aio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index a4c2a6bac72c..f8589caef9c1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -80,7 +80,7 @@ struct aio_ring { struct kioctx_table { struct rcu_head rcu; unsigned nr; - struct kioctx __rcu *table[]; + struct kioctx __rcu *table[] __counted_by(nr); }; struct kioctx_cpu { -- cgit From be049c3a088d512187407b7fd036cecfab46d565 Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Fri, 15 Sep 2023 22:51:31 -0600 Subject: fs-writeback: do not requeue a clean inode having skipped pages When writing back an inode and performing an fsync on it concurrently, a deadlock issue may arise as shown below. In each writeback iteration, a clean inode is requeued to the wb->b_dirty queue due to non-zero pages_skipped, without anything actually being written. This causes an infinite loop and prevents the plug from being flushed, resulting in a deadlock. We now avoid requeuing the clean inode to prevent this issue. wb_writeback fsync (inode-Y) blk_start_plug(&plug) for (;;) { iter i-1: some reqs with page-X added into plug->mq_list // f2fs node page-X with PG_writeback filemap_fdatawrite __filemap_fdatawrite_range // write inode-Y with sync_mode WB_SYNC_ALL do_writepages f2fs_write_data_pages __f2fs_write_data_pages // wb_sync_req[DATA]++ for WB_SYNC_ALL f2fs_write_cache_pages f2fs_write_single_data_page f2fs_do_write_data_page f2fs_outplace_write_data f2fs_update_data_blkaddr f2fs_wait_on_page_writeback wait_on_page_writeback // wait for f2fs node page-X iter i: progress = __writeback_inodes_wb(wb, work) . writeback_sb_inodes . __writeback_single_inode // write inode-Y with sync_mode WB_SYNC_NONE . . do_writepages . . f2fs_write_data_pages . . . __f2fs_write_data_pages // skip writepages due to (wb_sync_req[DATA]>0) . . . wbc->pages_skipped += get_dirty_pages(inode) // wbc->pages_skipped = 1 . if (!(inode->i_state & I_DIRTY_ALL)) // i_state = I_SYNC | I_SYNC_QUEUED . total_wrote++; // total_wrote = 1 . requeue_inode // requeue inode-Y to wb->b_dirty queue due to non-zero pages_skipped if (progress) // progress = 1 continue; iter i+1: queue_io // similar process with iter i, infinite for-loop ! } blk_finish_plug(&plug) // flush plug won't be called Signed-off-by: Chunhai Guo Reviewed-by: Jan Kara Message-Id: <20230916045131.957929-1-guochunhai@vivo.com> Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 969ce991b0b0..c1af01b2c42d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1535,10 +1535,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, if (wbc->pages_skipped) { /* - * writeback is not making progress due to locked - * buffers. Skip this inode for now. + * Writeback is not making progress due to locked buffers. + * Skip this inode for now. Although having skipped pages + * is odd for clean inodes, it can happen for some + * filesystems so handle that gracefully. */ - redirty_tail_locked(inode, wb); + if (inode->i_state & I_DIRTY_ALL) + redirty_tail_locked(inode, wb); + else + inode_cgwb_move_to_attached(inode, wb); return; } -- cgit From ae81711c1edd769b7d9952dde40a579dceca4815 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 19 Sep 2023 09:40:44 +0200 Subject: fs/pipe: remove duplicate "offset" initializer This code duplication was introduced by commit a194dfe6e6f6 ("pipe: Rearrange sequence in pipe_write() to preallocate slot"), but since the pipe's mutex is locked, nobody else can modify the value meanwhile. Signed-off-by: Max Kellermann Message-Id: <20230919074045.1066796-1-max.kellermann@ionos.com> Signed-off-by: Christian Brauner --- fs/pipe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/pipe.c b/fs/pipe.c index 6c1a9b1db907..139190165a1c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -537,7 +537,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) break; } ret += copied; - buf->offset = 0; buf->len = copied; if (!iov_iter_count(from)) -- cgit From 2ba0dd6562f2c42ef1ae61145bdfc882fc7a6f79 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 15 Sep 2023 16:01:02 +0200 Subject: porting: document new block device opening order We've changed the order of opening block devices and superblock handling. Let's document this so filesystem and vfs developers have a proper digital paper trail. Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index deac4e973ddc..787d10b9e8b5 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -949,3 +949,29 @@ mmap_lock held. All in-tree users have been audited and do not seem to depend on the mmap_lock being held, but out of tree users should verify for themselves. If they do need it, they can return VM_FAULT_RETRY to be called with the mmap_lock held. + +--- + +**mandatory** + +The order of opening block devices and matching or creating superblocks has +changed. + +The old logic opened block devices first and then tried to find a +suitable superblock to reuse based on the block device pointer. + +The new logic tries to find a suitable superblock first based on the device +number, and opening the block device afterwards. + +Since opening block devices cannot happen under s_umount because of lock +ordering requirements s_umount is now dropped while opening block devices and +reacquired before calling fill_super(). + +In the old logic concurrent mounters would find the superblock on the list of +superblocks for the filesystem type. Since the first opener of the block device +would hold s_umount they would wait until the superblock became either born or +was discarded due to initialization failure. + +Since the new logic drops s_umount concurrent mounters could grab s_umount and +would spin. Instead they are now made to wait using an explicit wait-wake +mechanism without having to hold s_umount. -- cgit From 060e6c7d179ed2f2088a23ceedf60d63320e9311 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 15 Sep 2023 16:01:40 +0200 Subject: porting: document superblock as block device holder We've changed the holder of the block device which has consequences. Document this clearly and in detail so filesystem and vfs developers have a proper digital paper trail. Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 70 +++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 787d10b9e8b5..4d05b9862451 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -975,3 +975,73 @@ was discarded due to initialization failure. Since the new logic drops s_umount concurrent mounters could grab s_umount and would spin. Instead they are now made to wait using an explicit wait-wake mechanism without having to hold s_umount. + +--- + +**mandatory** + +The holder of a block device is now the superblock. + +The holder of a block device used to be the file_system_type which wasn't +particularly useful. It wasn't possible to go from block device to owning +superblock without matching on the device pointer stored in the superblock. +This mechanism would only work for a single device so the block layer couldn't +find the owning superblock of any additional devices. + +In the old mechanism reusing or creating a superblock for a racing mount(2) and +umount(2) relied on the file_system_type as the holder. This was severly +underdocumented however: + +(1) Any concurrent mounter that managed to grab an active reference on an + existing superblock was made to wait until the superblock either became + ready or until the superblock was removed from the list of superblocks of + the filesystem type. If the superblock is ready the caller would simple + reuse it. + +(2) If the mounter came after deactivate_locked_super() but before + the superblock had been removed from the list of superblocks of the + filesystem type the mounter would wait until the superblock was shutdown, + reuse the block device and allocate a new superblock. + +(3) If the mounter came after deactivate_locked_super() and after + the superblock had been removed from the list of superblocks of the + filesystem type the mounter would reuse the block device and allocate a new + superblock (the bd_holder point may still be set to the filesystem type). + +Because the holder of the block device was the file_system_type any concurrent +mounter could open the block devices of any superblock of the same +file_system_type without risking seeing EBUSY because the block device was +still in use by another superblock. + +Making the superblock the owner of the block device changes this as the holder +is now a unique superblock and thus block devices associated with it cannot be +reused by concurrent mounters. So a concurrent mounter in (2) could suddenly +see EBUSY when trying to open a block device whose holder was a different +superblock. + +The new logic thus waits until the superblock and the devices are shutdown in +->kill_sb(). Removal of the superblock from the list of superblocks of the +filesystem type is now moved to a later point when the devices are closed: + +(1) Any concurrent mounter managing to grab an active reference on an existing + superblock is made to wait until the superblock is either ready or until + the superblock and all devices are shutdown in ->kill_sb(). If the + superblock is ready the caller will simply reuse it. + +(2) If the mounter comes after deactivate_locked_super() but before + the superblock has been removed from the list of superblocks of the + filesystem type the mounter is made to wait until the superblock and the + devices are shut down in ->kill_sb() and the superblock is removed from the + list of superblocks of the filesystem type. The mounter will allocate a new + superblock and grab ownership of the block device (the bd_holder pointer of + the block device will be set to the newly allocated superblock). + +(3) This case is now collapsed into (2) as the superblock is left on the list + of superblocks of the filesystem type until all devices are shutdown in + ->kill_sb(). In other words, if the superblock isn't on the list of + superblock of the filesystem type anymore then it has given up ownership of + all associated block devices (the bd_holder pointer is NULL). + +As this is a VFS level change it has no practical consequences for filesystems +other than that all of them must use one of the provided kill_litter_super(), +kill_anon_super(), or kill_block_super() helpers. -- cgit From 8446a4deb6b6bc998f1d8d2a85d1a0c64b9e3a71 Mon Sep 17 00:00:00 2001 From: David Laight Date: Thu, 7 Sep 2023 12:42:20 +0000 Subject: slab: kmalloc_size_roundup() must not return 0 for non-zero size The typical use of kmalloc_size_roundup() is: ptr = kmalloc(sz = kmalloc_size_roundup(size), ...); if (!ptr) return -ENOMEM. This means it is vitally important that the returned value isn't less than the argument even if the argument is insane. In particular if kmalloc_slab() fails or the value is above (MAX_ULONG - PAGE_SIZE) zero is returned and kmalloc() will return its single zero-length buffer ZERO_SIZE_PTR. Fix this by returning the input size if the size exceeds KMALLOC_MAX_SIZE. kmalloc() will then return NULL as the size really is too big. kmalloc_slab() should not normally return NULL, unless called too early. Again, returning zero is not the correct action as it can be in some usage scenarios stored to a variable and only later cause kmalloc() return ZERO_SIZE_PTR and subsequent crashes on access. Instead we can simply stop checking the kmalloc_slab() result completely, as calling kmalloc_size_roundup() too early would then result in an immediate crash during boot and the developer noticing an issue in their code. [vbabka@suse.cz: remove kmalloc_slab() result check, tweak comments and commit log] Fixes: 05a940656e1e ("slab: Introduce kmalloc_size_roundup()") Signed-off-by: David Laight Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index e99e821065c3..306e6f0074ff 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -745,24 +745,24 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) size_t kmalloc_size_roundup(size_t size) { - struct kmem_cache *c; + if (size && size <= KMALLOC_MAX_CACHE_SIZE) { + /* + * The flags don't matter since size_index is common to all. + * Neither does the caller for just getting ->object_size. + */ + return kmalloc_slab(size, GFP_KERNEL, 0)->object_size; + } - /* Short-circuit the 0 size case. */ - if (unlikely(size == 0)) - return 0; - /* Short-circuit saturated "too-large" case. */ - if (unlikely(size == SIZE_MAX)) - return SIZE_MAX; /* Above the smaller buckets, size is a multiple of page size. */ - if (size > KMALLOC_MAX_CACHE_SIZE) + if (size && size <= KMALLOC_MAX_SIZE) return PAGE_SIZE << get_order(size); /* - * The flags don't matter since size_index is common to all. - * Neither does the caller for just getting ->object_size. + * Return 'size' for 0 - kmalloc() returns ZERO_SIZE_PTR + * and very large size - kmalloc() may fail. */ - c = kmalloc_slab(size, GFP_KERNEL, 0); - return c ? c->object_size : 0; + return size; + } EXPORT_SYMBOL(kmalloc_size_roundup); -- cgit From 7e37c851374eca2d1f6128de03195c9f7b4baaf2 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 20 Sep 2023 16:53:34 +0800 Subject: regulator: mt6358: split ops for buck and linear range LDO regulators The buck and linear range LDO (VSRAM_*) regulators share one set of ops. This set includes support for get/set mode. However this only makes sense for buck regulators, not LDOs. The callbacks were not checking whether the register offset and/or mask for mode setting was valid or not. This ends up making the kernel report "normal" mode operation for the LDOs. Create a new set of ops without the get/set mode callbacks for the linear range LDO regulators. Fixes: f67ff1bd58f0 ("regulator: mt6358: Add support for MT6358 regulator") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20230920085336.136238-1-wenst@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/mt6358-regulator.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c index b9cda2210c33..65fbd95f1dbb 100644 --- a/drivers/regulator/mt6358-regulator.c +++ b/drivers/regulator/mt6358-regulator.c @@ -43,7 +43,7 @@ struct mt6358_regulator_info { .desc = { \ .name = #vreg, \ .of_match = of_match_ptr(match), \ - .ops = &mt6358_volt_range_ops, \ + .ops = &mt6358_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .id = MT6358_ID_##vreg, \ .owner = THIS_MODULE, \ @@ -139,7 +139,7 @@ struct mt6358_regulator_info { .desc = { \ .name = #vreg, \ .of_match = of_match_ptr(match), \ - .ops = &mt6358_volt_range_ops, \ + .ops = &mt6358_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .id = MT6366_ID_##vreg, \ .owner = THIS_MODULE, \ @@ -450,7 +450,7 @@ static unsigned int mt6358_regulator_get_mode(struct regulator_dev *rdev) } } -static const struct regulator_ops mt6358_volt_range_ops = { +static const struct regulator_ops mt6358_buck_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, .set_voltage_sel = regulator_set_voltage_sel_regmap, @@ -464,6 +464,18 @@ static const struct regulator_ops mt6358_volt_range_ops = { .get_mode = mt6358_regulator_get_mode, }; +static const struct regulator_ops mt6358_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear, + .map_voltage = regulator_map_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = mt6358_get_buck_voltage_sel, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6358_get_status, +}; + static const struct regulator_ops mt6358_volt_table_ops = { .list_voltage = regulator_list_voltage_table, .map_voltage = regulator_map_voltage_iterate, -- cgit From 099f0af9d98231bb74956ce92508e87cbcb896be Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 14 Sep 2023 16:10:15 +0300 Subject: drm/meson: fix memory leak on ->hpd_notify callback The EDID returned by drm_bridge_get_edid() needs to be freed. Fixes: 0af5e0b41110 ("drm/meson: encoder_hdmi: switch to bridge DRM_BRIDGE_ATTACH_NO_CONNECTOR") Cc: Neil Armstrong Cc: Sam Ravnborg Cc: Martin Blumenstingl Cc: Neil Armstrong Cc: Kevin Hilman Cc: Jerome Brunet Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Jani Nikula Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230914131015.2472029-1-jani.nikula@intel.com --- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 9913971fa5d2..25ea76558690 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -334,6 +334,8 @@ static void meson_encoder_hdmi_hpd_notify(struct drm_bridge *bridge, return; cec_notifier_set_phys_addr_from_edid(encoder_hdmi->cec_notifier, edid); + + kfree(edid); } else cec_notifier_phys_addr_invalidate(encoder_hdmi->cec_notifier); } -- cgit From 9f564b92cf6d0ecb398f9348600a7d8a7f8ea804 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 12 Sep 2023 08:56:19 +0200 Subject: riscv: Only consider swbp/ss handlers for correct privileged mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V software breakpoint trap handlers are used for {k,u}probes. When trapping from kernelmode, only the kernelmode handlers should be considered. Vice versa, only usermode handlers for usermode traps. This is not the case on RISC-V, which can trigger a bug if a userspace process uses uprobes, and a WARN() is triggered from kernelmode (which is implemented via {c.,}ebreak). The kernel will trap on the kernelmode {c.,}ebreak, look for uprobes handlers, realize incorrectly that uprobes need to be handled, and exit the trap handler early. The trap returns to re-executing the {c.,}ebreak, and enter an infinite trap-loop. The issue was found running the BPF selftest [1]. Fix this issue by only considering the swbp/ss handlers for kernel/usermode respectively. Also, move CONFIG ifdeffery from traps.c to the asm/{k,u}probes.h headers. Note that linux/uprobes.h only include asm/uprobes.h if CONFIG_UPROBES is defined, which is why asm/uprobes.h needs to be unconditionally included in traps.c Link: https://lore.kernel.org/linux-riscv/87v8d19aun.fsf@all.your.base.are.belong.to.us/ # [1] Fixes: 74784081aac8 ("riscv: Add uprobes supported") Reviewed-by: Guo Ren Reviewed-by: Nam Cao Tested-by: Puranjay Mohan Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20230912065619.62020-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kprobes.h | 11 ++++++++++- arch/riscv/include/asm/uprobes.h | 13 ++++++++++++- arch/riscv/kernel/traps.c | 28 ++++++++++++++++++---------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index e7882ccb0fd4..78ea44f76718 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,6 +40,15 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool kprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h index f2183e00fdd2..3fc7deda9190 100644 --- a/arch/riscv/include/asm/uprobes.h +++ b/arch/riscv/include/asm/uprobes.h @@ -34,7 +34,18 @@ struct arch_uprobe { bool simulate; }; +#ifdef CONFIG_UPROBES bool uprobe_breakpoint_handler(struct pt_regs *regs); bool uprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool uprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ #endif /* _ASM_RISCV_UPROBES_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 19807c4d3805..fae8f610d867 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -247,22 +249,28 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } +static bool probe_single_step_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_single_step_handler(regs) : kprobe_single_step_handler(regs); +} + +static bool probe_breakpoint_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_breakpoint_handler(regs) : kprobe_breakpoint_handler(regs); +} + void handle_break(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs)) + if (probe_single_step_handler(regs)) return; - if (kprobe_breakpoint_handler(regs)) - return; -#endif -#ifdef CONFIG_UPROBES - if (uprobe_single_step_handler(regs)) + if (probe_breakpoint_handler(regs)) return; - if (uprobe_breakpoint_handler(regs)) - return; -#endif current->thread.bad_cause = regs->cause; if (user_mode(regs)) -- cgit From f17cc0f11fa18c06b4938c20f0244620199af0b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Sep 2023 11:17:41 +0300 Subject: drm/i915/gt: Prevent error pointer dereference Move the check for "if (IS_ERR(obj))" in front of the call to i915_gem_object_set_cache_coherency() which dereferences "obj". Otherwise it will lead to a crash. Fixes: 43aa755eae2c ("drm/i915/mtl: Update cache coherency setting for context structure") Signed-off-by: Dan Carpenter Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/455b2279-2e08-4d00-9784-be56d8ee42e3@moroto.mountain (cherry picked from commit c92ec50822fb84306d951520d81919328421acbd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 957d0aeb0c02..c378cc7c953c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1094,6 +1094,9 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) { obj = i915_gem_object_create_shmem(engine->i915, context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + /* * Wa_22016122933: For Media version 13.0, all Media GT shared * memory needs to be mapped as WC on CPU side and UC (PAT @@ -1102,8 +1105,6 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (intel_gt_needs_wa_22016122933(engine->gt)) i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); } - if (IS_ERR(obj)) - return ERR_CAST(obj); vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { -- cgit From c524cd40e8a2a1a36f4898eaf2024beefeb815f3 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Tue, 12 Sep 2023 14:22:47 -0700 Subject: i915/pmu: Move execlist stats initialization to execlist specific setup engine->stats is a union of execlist and guc stat objects. When execlist specific fields are initialized, the initial state of guc stats is affected. This results in bad busyness values when using GuC mode. Move the execlist initialization from common code to execlist specific code. Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230912212247.1828681-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 4485bd519f5d6d620a29d0547ff3c982bdeeb468) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 - drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ee15486fed0d..e85d70a62123 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 8a641bcf777c..3292524469d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3550,6 +3550,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + seqcount_init(&engine->stats.execlists.lock); + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); -- cgit From 997a3e24dcc12b079eb7d545982d03657c17d526 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 11 Aug 2023 10:53:59 +0200 Subject: arm64: dts: freescale: tqma9352: Fix gpio hog The PMIC IRQ line is attached to GPIO1_IO03, as indicated by pca9451grp pinctrl config. Fixes: c982ecfa7992a ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi index 1c71c08becde..f6e422dc2663 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi @@ -81,7 +81,7 @@ &gpio1 { pmic-irq-hog { gpio-hog; - gpios = <2 GPIO_ACTIVE_LOW>; + gpios = <3 GPIO_ACTIVE_LOW>; input; line-name = "PMIC_IRQ#"; }; -- cgit From db58b5eea8a47da3bed6b128dfcbdaf336c6a244 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:22 +0200 Subject: Revert "tmpfs: add support for multigrain timestamps" This reverts commit d48c3397291690c3576d6c983b0a86ecbc203cac. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 02e62fccc80d..69595d341882 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4586,7 +4586,7 @@ static struct file_system_type shmem_fs_type = { #endif .kill_sb = kill_litter_super, #ifdef CONFIG_SHMEM - .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, #else .fs_flags = FS_USERNS_MOUNT, #endif -- cgit From f798accd5987dc2280e0ba9055edf1124af46a5f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:18 +0200 Subject: Revert "xfs: switch to multigrain timestamps" This reverts commit e44df2664746aed8b6dd5245eb711a0ce33c5cf5. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/xfs/libxfs/xfs_trans_inode.c | 6 +++--- fs/xfs/xfs_iops.c | 6 +++--- fs/xfs/xfs_super.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index ad22656376d3..6b2296ff248a 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -62,12 +62,12 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - /* If the mtime changes, then ctime must also change */ - ASSERT(flags & XFS_ICHGTIME_CHG); + tv = current_time(inode); - tv = inode_set_ctime_current(inode); if (flags & XFS_ICHGTIME_MOD) inode->i_mtime = tv; + if (flags & XFS_ICHGTIME_CHG) + inode_set_ctime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_CREATE) ip->i_crtime = tv; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2ededd3f6b8c..1c1e6171209d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -573,10 +573,10 @@ xfs_vn_getattr( stat->gid = vfsgid_into_kgid(vfsgid); stat->ino = ip->i_ino; stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); - fill_mg_cmtime(stat, request_mask, inode); - if (xfs_has_v3inodes(mp)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; @@ -917,7 +917,7 @@ xfs_setattr_size( if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = - current_mgtime(inode); + current_time(inode); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1f77014c6e1a..b5c202f5d96c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2065,7 +2065,7 @@ static struct file_system_type xfs_fs_type = { .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, .kill_sb = xfs_kill_sb, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("xfs"); -- cgit From 50ec1d721e117496df0582e34f1f2f946a03e1be Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:45 +0200 Subject: Revert "ext4: switch to multigrain timestamps" This reverts commit 0269b585868e59b6a2ecc6ea685d39310e4fc18b. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 38217422f938..dbebd8b3127e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -7314,7 +7314,7 @@ static struct file_system_type ext4_fs_type = { .init_fs_context = ext4_init_fs_context, .parameters = ext4_param_specs, .kill_sb = ext4_kill_sb, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ext4"); -- cgit From efd34f0316169bf182c40d3b63068ca95df6bb99 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:41:47 +0200 Subject: Revert "btrfs: convert to multigrain timestamps" This reverts commit 50e9ceef1d4f644ee0049e82e360058a64ec284c. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/btrfs/file.c | 24 ++++++++++++++++++++---- fs/btrfs/super.c | 5 ++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ca46a529d56b..ad6f401ac0e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1106,6 +1106,25 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode) btrfs_drew_write_unlock(&inode->root->snapshot_lock); } +static void update_time_for_write(struct inode *inode) +{ + struct timespec64 now, ctime; + + if (IS_NOCMTIME(inode)) + return; + + now = current_time(inode); + if (!timespec64_equal(&inode->i_mtime, &now)) + inode->i_mtime = now; + + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, &now)) + inode_set_ctime_to_ts(inode, now); + + if (IS_I_VERSION(inode)) + inode_inc_iversion(inode); +} + static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { @@ -1137,10 +1156,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, * need to start yet another transaction to update the inode as we will * update the inode when we finish writing whatever data we write. */ - if (!IS_NOCMTIME(inode)) { - inode->i_mtime = inode_set_ctime_current(inode); - inode_inc_iversion(inode); - } + update_time_for_write(inode); start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 09bfe68d2ea3..cffdd6f7f8e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2150,7 +2150,7 @@ static struct file_system_type btrfs_fs_type = { .name = "btrfs", .mount = btrfs_mount, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; static struct file_system_type btrfs_root_fs_type = { @@ -2158,8 +2158,7 @@ static struct file_system_type btrfs_root_fs_type = { .name = "btrfs", .mount = btrfs_mount_root, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | - FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("btrfs"); -- cgit From 647aa768281f38cb1002edb3a1f673c3d66a8d81 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:13 +0200 Subject: Revert "fs: add infrastructure for multigrain timestamps" This reverts commit ffb6cf19e06334062744b7e3493f71e500964f8e. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/inode.c | 82 ++---------------------------------------------------- fs/stat.c | 41 ++------------------------- include/linux/fs.h | 46 ++---------------------------- 3 files changed, 7 insertions(+), 162 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 35fd688168c5..84bc3c76e5cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2102,52 +2102,10 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -/** - * current_mgtime - Return FS time (possibly fine-grained) - * @inode: inode. - * - * Return the current time truncated to the time granularity supported by - * the fs, as suitable for a ctime/mtime change. If the ctime is flagged - * as having been QUERIED, get a fine-grained timestamp. - */ -struct timespec64 current_mgtime(struct inode *inode) -{ - struct timespec64 now, ctime; - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - long nsec = atomic_long_read(pnsec); - - if (nsec & I_CTIME_QUERIED) { - ktime_get_real_ts64(&now); - return timestamp_truncate(now, inode); - } - - ktime_get_coarse_real_ts64(&now); - now = timestamp_truncate(now, inode); - - /* - * If we've recently fetched a fine-grained timestamp - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime = inode_get_ctime(inode); - if (timespec64_compare(&ctime, &now) > 0) - now = ctime; - - return now; -} -EXPORT_SYMBOL(current_mgtime); - -static struct timespec64 current_ctime(struct inode *inode) -{ - if (is_mgtime(inode)) - return current_mgtime(inode); - return current_time(inode); -} - static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; - struct timespec64 now = current_ctime(inode); + struct timespec64 now = current_time(inode); struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ @@ -2578,43 +2536,9 @@ EXPORT_SYMBOL(current_time); */ struct timespec64 inode_set_ctime_current(struct inode *inode) { - struct timespec64 now; - struct timespec64 ctime; - - ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); - if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { - now = current_time(inode); + struct timespec64 now = current_time(inode); - /* Just copy it into place if it's not multigrain */ - if (!is_mgtime(inode)) { - inode_set_ctime_to_ts(inode, now); - return now; - } - - /* - * If we've recently updated with a fine-grained timestamp, - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime.tv_sec = inode->__i_ctime.tv_sec; - if (timespec64_compare(&ctime, &now) > 0) - return ctime; - - /* - * Ctime updates are usually protected by the inode_lock, but - * we can still race with someone setting the QUERIED flag. - * Try to swap the new nsec value into place. If it's changed - * in the interim, then just go with a fine-grained timestamp. - */ - if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, - now.tv_nsec) != ctime.tv_nsec) - goto fine_grained; - inode->__i_ctime.tv_sec = now.tv_sec; - return now; - } -fine_grained: - ktime_get_real_ts64(&now); - inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); + inode_set_ctime(inode, now.tv_sec, now.tv_nsec); return now; } EXPORT_SYMBOL(inode_set_ctime_current); diff --git a/fs/stat.c b/fs/stat.c index 6e60389d6a15..d43a5cc1bfa4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -26,37 +26,6 @@ #include "internal.h" #include "mount.h" -/** - * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED - * @stat: where to store the resulting values - * @request_mask: STATX_* values requested - * @inode: inode from which to grab the c/mtime - * - * Given @inode, grab the ctime and mtime out if it and store the result - * in @stat. When fetching the value, flag it as queried so the next write - * will use a fine-grained timestamp. - */ -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) -{ - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - - /* If neither time was requested, then don't report them */ - if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { - stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); - return; - } - - stat->mtime = inode->i_mtime; - stat->ctime.tv_sec = inode->__i_ctime.tv_sec; - /* - * Atomically set the QUERIED flag and fetch the new value with - * the flag masked off. - */ - stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) & - ~I_CTIME_QUERIED; -} -EXPORT_SYMBOL(fill_mg_cmtime); - /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from @@ -89,14 +58,8 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; - - if (is_mgtime(inode)) { - fill_mg_cmtime(stat, request_mask, inode); - } else { - stat->mtime = inode->i_mtime; - stat->ctime = inode_get_ctime(inode); - } - + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..b528f063e8ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,47 +1508,18 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -struct timespec64 current_mgtime(struct inode *inode); struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/* - * Multigrain timestamps - * - * Conditionally use fine-grained ctime and mtime timestamps when there - * are users actively observing them via getattr. The primary use-case - * for this is NFS clients that use the ctime to distinguish between - * different states of the file, and that are often fooled by multiple - * operations that occur in the same coarse-grained timer tick. - * - * The kernel always keeps normalized struct timespec64 values in the ctime, - * which means that only the first 30 bits of the value are used. Use the - * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value - * has been queried since it was last updated. - */ -#define I_CTIME_QUERIED (1L<<30) - /** * inode_get_ctime - fetch the current ctime from the inode * @inode: inode from which to fetch ctime * - * Grab the current ctime tv_nsec field from the inode, mask off the - * I_CTIME_QUERIED flag and return it. This is mostly intended for use by - * internal consumers of the ctime that aren't concerned with ensuring a - * fine-grained update on the next change (e.g. when preparing to store - * the value in the backing store for later retrieval). - * - * This is safe to call regardless of whether the underlying filesystem - * is using multigrain timestamps. + * Grab the current ctime from the inode and return it. */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - struct timespec64 ctime; - - ctime.tv_sec = inode->__i_ctime.tv_sec; - ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED; - - return ctime; + return inode->__i_ctime; } /** @@ -2334,7 +2305,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2358,17 +2328,6 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -/** - * is_mgtime: is this inode using multigrain timestamps - * @inode: inode to test for multigrain timestamps - * - * Return true if the inode uses multigrain timestamps, false otherwise. - */ -static inline bool is_mgtime(const struct inode *inode) -{ - return inode->i_sb->s_type->fs_flags & FS_MGTIME; -} - extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3054,7 +3013,6 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); -- cgit From 7c329bbd3bb87ddb5843853f6e08f97d2f271496 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Sep 2023 18:06:36 -0700 Subject: KVM: selftests: Assert that vasprintf() is successful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assert that vasprintf() succeeds as the "returned" string is undefined on failure. Checking the result also eliminates the only warning with default options in KVM selftests, i.e. is the only thing getting in the way of compile with -Werror. lib/test_util.c: In function ‘strdup_printf’: lib/test_util.c:390:9: error: ignoring return value of ‘vasprintf’ declared with attribute ‘warn_unused_result’ [-Werror=unused-result] 390 | vasprintf(&str, fmt, ap); | ^~~~~~~~~~~~~~~~~~~~~~~~ Don't bother capturing the return value, allegedly vasprintf() can only fail due to a memory allocation failure. Fixes: dfaf20af7649 ("KVM: arm64: selftests: Replace str_with_index with strdup_printf") Cc: Andrew Jones Cc: Haibo Xu Cc: Anup Patel Signed-off-by: Sean Christopherson Reviewed-by: Andrew Jones Tested-by: Andrew Jones Message-Id: <20230914010636.1391735-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/test_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 3e36019eeb4a..5d7f28b02d73 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -387,7 +387,7 @@ char *strdup_printf(const char *fmt, ...) char *str; va_start(ap, fmt); - vasprintf(&str, fmt, ap); + TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed"); va_end(ap); return str; -- cgit From 488ef44c068e79752dba8eda0b75f524f111a695 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 20 Sep 2023 09:56:44 -0500 Subject: power: supply: rk817: Fix node refcount leak Dan Carpenter reports that the Smatch static checker warning has found that there is another refcount leak in the probe function. While of_node_put() was added in one of the return paths, it should in fact be added for ALL return paths that return an error and at driver removal time. Fixes: 54c03bfd094f ("power: supply: Fix refcount leak in rk817_charger_probe") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-pm/dc0bb0f8-212d-4be7-be69-becd2a3f9a80@kili.mountain/ Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20230920145644.57964-1-macroalpha82@gmail.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index c2510078eb2d..f64daf5a41d9 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -1045,6 +1045,13 @@ static void rk817_charging_monitor(struct work_struct *work) queue_delayed_work(system_wq, &charger->work, msecs_to_jiffies(8000)); } +static void rk817_cleanup_node(void *data) +{ + struct device_node *node = data; + + of_node_put(node); +} + static int rk817_charger_probe(struct platform_device *pdev) { struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent); @@ -1061,11 +1068,13 @@ static int rk817_charger_probe(struct platform_device *pdev) if (!node) return -ENODEV; + ret = devm_add_action_or_reset(&pdev->dev, rk817_cleanup_node, node); + if (ret) + return ret; + charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL); - if (!charger) { - of_node_put(node); + if (!charger) return -ENOMEM; - } charger->rk808 = rk808; -- cgit From 9850ccd5dd88075b2b7fd28d96299d5535f58cc5 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 20 Sep 2023 13:51:16 +0300 Subject: dm zoned: free dmz->ddev array in dmz_put_zoned_devices Commit 4dba12881f88 ("dm zoned: support arbitrary number of devices") made the pointers to additional zoned devices to be stored in a dynamically allocated dmz->ddev array. However, this array is not freed. Rename dmz_put_zoned_device to dmz_put_zoned_devices and fix it to free the dmz->ddev array when cleaning up zoned device information. Remove NULL assignment for all dmz->ddev elements and just free the dmz->ddev array instead. Found by Linux Verification Center (linuxtesting.org). Fixes: 4dba12881f88 ("dm zoned: support arbitrary number of devices") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-target.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ad8e670a2f9b..b487f7acc860 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -748,17 +748,16 @@ err: /* * Cleanup zoned device information. */ -static void dmz_put_zoned_device(struct dm_target *ti) +static void dmz_put_zoned_devices(struct dm_target *ti) { struct dmz_target *dmz = ti->private; int i; - for (i = 0; i < dmz->nr_ddevs; i++) { - if (dmz->ddev[i]) { + for (i = 0; i < dmz->nr_ddevs; i++) + if (dmz->ddev[i]) dm_put_device(ti, dmz->ddev[i]); - dmz->ddev[i] = NULL; - } - } + + kfree(dmz->ddev); } static int dmz_fixup_devices(struct dm_target *ti) @@ -948,7 +947,7 @@ err_bio: err_meta: dmz_dtr_metadata(dmz->metadata); err_dev: - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); err: kfree(dmz->dev); kfree(dmz); @@ -978,7 +977,7 @@ static void dmz_dtr(struct dm_target *ti) bioset_exit(&dmz->bio_set); - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); mutex_destroy(&dmz->chunk_lock); -- cgit From cbaabbcdcbd355f0a1ccc09a925575c51c270750 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sat, 26 Aug 2023 16:13:13 +0800 Subject: Bluetooth: Delete unused hci_req_prepare_suspend() declaration hci_req_prepare_suspend() has been deprecated in favor of hci_suspend_sync(). Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Yao Xiao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_request.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b9c5a9823837..0be75cf0efed 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -71,7 +71,5 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); void hci_req_add_le_passive_scan(struct hci_request *req); -void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); - void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); -- cgit From 187f8b648cc16f07c66ab1d89d961bdcff779bf7 Mon Sep 17 00:00:00 2001 From: Rocky Liao Date: Mon, 7 Aug 2023 14:46:26 +0800 Subject: Bluetooth: btusb: add shutdown function for QCA6174 We should send hci reset command before bt turn off, which can reset bt firmware status. Signed-off-by: Rocky Liao Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 82597ab4f747..499f4809fcdf 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4419,6 +4419,7 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; + hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; hdev->cmd_timeout = btusb_qca_cmd_timeout; set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); -- cgit From 941c998b42f5c90384f49da89a6e11233de567cf Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Aug 2023 13:50:06 -0700 Subject: Bluetooth: hci_sync: Fix handling of HCI_QUIRK_STRICT_DUPLICATE_FILTER When HCI_QUIRK_STRICT_DUPLICATE_FILTER is set LE scanning requires periodic restarts of the scanning procedure as the controller would consider device previously found as duplicated despite of RSSI changes, but in order to set the scan timeout properly set le_scan_restart needs to be synchronous so it shall not use hci_cmd_sync_queue which defers the command processing to cmd_sync_work. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-bluetooth/578e6d7afd676129decafba846a933f5@agner.ch/#t Fixes: 27d54b778ad1 ("Bluetooth: Rework le_scan_restart for hci_sync") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 9b93653c6197..fd7c5d902856 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -413,11 +413,6 @@ static int hci_le_scan_restart_sync(struct hci_dev *hdev) LE_SCAN_FILTER_DUP_ENABLE); } -static int le_scan_restart_sync(struct hci_dev *hdev, void *data) -{ - return hci_le_scan_restart_sync(hdev); -} - static void le_scan_restart(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -427,15 +422,15 @@ static void le_scan_restart(struct work_struct *work) bt_dev_dbg(hdev, ""); - hci_dev_lock(hdev); - - status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL); + status = hci_le_scan_restart_sync(hdev); if (status) { bt_dev_err(hdev, "failed to restart LE scan: status %d", status); - goto unlock; + return; } + hci_dev_lock(hdev); + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || !hdev->discovery.scan_start) goto unlock; -- cgit From c7eaf80bfb0c8cef852cce9501b95dd5a6bddcb9 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Mon, 4 Sep 2023 14:11:51 +0000 Subject: Bluetooth: Fix hci_link_tx_to RCU lock usage Syzbot found a bug "BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580". It is because hci_link_tx_to holds an RCU read lock and calls hci_disconnect which would hold a mutex lock since the commit a13f316e90fd ("Bluetooth: hci_conn: Consolidate code for aborting connections"). Here's an example call trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xfc/0x174 lib/dump_stack.c:106 ___might_sleep+0x4a9/0x4d3 kernel/sched/core.c:9663 __mutex_lock_common kernel/locking/mutex.c:576 [inline] __mutex_lock+0xc7/0x6e7 kernel/locking/mutex.c:732 hci_cmd_sync_queue+0x3a/0x287 net/bluetooth/hci_sync.c:388 hci_abort_conn+0x2cd/0x2e4 net/bluetooth/hci_conn.c:1812 hci_disconnect+0x207/0x237 net/bluetooth/hci_conn.c:244 hci_link_tx_to net/bluetooth/hci_core.c:3254 [inline] __check_timeout net/bluetooth/hci_core.c:3419 [inline] __check_timeout+0x310/0x361 net/bluetooth/hci_core.c:3399 hci_sched_le net/bluetooth/hci_core.c:3602 [inline] hci_tx_work+0xe8f/0x12d0 net/bluetooth/hci_core.c:3652 process_one_work+0x75c/0xba1 kernel/workqueue.c:2310 worker_thread+0x5b2/0x73a kernel/workqueue.c:2457 kthread+0x2f7/0x30b kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 This patch releases RCU read lock before calling hci_disconnect and reacquires it afterward to fix the bug. Fixes: a13f316e90fd ("Bluetooth: hci_conn: Consolidate code for aborting connections") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a5992f1b3c9b..db4f28d68d71 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3418,7 +3418,12 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); + /* hci_disconnect might sleep, so, we have to release + * the RCU read lock before calling it. + */ + rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); + rcu_read_lock(); } } -- cgit From e0275ea52169412b8faccb4e2f4fed8a057844c6 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 28 Aug 2023 13:05:45 -0700 Subject: Bluetooth: ISO: Fix handling of listen for unicast iso_listen_cis shall only return -EADDRINUSE if the listening socket has the destination set to BDADDR_ANY otherwise if the destination is set to a specific address it is for broadcast which shall be ignored. Fixes: f764a6c2c1e4 ("Bluetooth: ISO: Add broadcast support") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 16da946f5881..71248163ce9a 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -502,7 +502,7 @@ drop: } /* -------- Socket interface ---------- */ -static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba) +static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *src, bdaddr_t *dst) { struct sock *sk; @@ -510,7 +510,10 @@ static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba) if (sk->sk_state != BT_LISTEN) continue; - if (!bacmp(&iso_pi(sk)->src, ba)) + if (bacmp(&iso_pi(sk)->dst, dst)) + continue; + + if (!bacmp(&iso_pi(sk)->src, src)) return sk; } @@ -952,7 +955,7 @@ static int iso_listen_cis(struct sock *sk) write_lock(&iso_sk_list.lock); - if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src)) + if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src, &iso_pi(sk)->dst)) err = -EADDRINUSE; write_unlock(&iso_sk_list.lock); -- cgit From 1d8e801422d66e4b8c7b187c52196bef94eed887 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Thu, 7 Sep 2023 04:39:34 +0000 Subject: Bluetooth: Avoid redundant authentication While executing the Android 13 CTS Verifier Secure Server test on a ChromeOS device, it was observed that the Bluetooth host initiates authentication for an RFCOMM connection after SSP completes. When this happens, some Intel Bluetooth controllers, like AC9560, would disconnect with "Connection Rejected due to Security Reasons (0x0e)". Historically, BlueZ did not mandate this authentication while an authenticated combination key was already in use for the connection. This behavior was changed since commit 7b5a9241b780 ("Bluetooth: Introduce requirements for security level 4"). So, this patch addresses the aforementioned disconnection issue by restoring the previous behavior. Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 63 +++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 9d5057cef30a..7a6f20338db8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2413,34 +2413,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, if (!test_bit(HCI_CONN_AUTH, &conn->flags)) goto auth; - /* An authenticated FIPS approved combination key has sufficient - * security for security level 4. */ - if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 && - sec_level == BT_SECURITY_FIPS) - goto encrypt; - - /* An authenticated combination key has sufficient security for - security level 3. */ - if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 || - conn->key_type == HCI_LK_AUTH_COMBINATION_P256) && - sec_level == BT_SECURITY_HIGH) - goto encrypt; - - /* An unauthenticated combination key has sufficient security for - security level 1 and 2. */ - if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 || - conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) && - (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW)) - goto encrypt; - - /* A combination key has always sufficient security for the security - levels 1 or 2. High security level requires the combination key - is generated using maximum PIN code length (16). - For pre 2.1 units. */ - if (conn->key_type == HCI_LK_COMBINATION && - (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW || - conn->pin_length == 16)) - goto encrypt; + switch (conn->key_type) { + case HCI_LK_AUTH_COMBINATION_P256: + /* An authenticated FIPS approved combination key has + * sufficient security for security level 4 or lower. + */ + if (sec_level <= BT_SECURITY_FIPS) + goto encrypt; + break; + case HCI_LK_AUTH_COMBINATION_P192: + /* An authenticated combination key has sufficient security for + * security level 3 or lower. + */ + if (sec_level <= BT_SECURITY_HIGH) + goto encrypt; + break; + case HCI_LK_UNAUTH_COMBINATION_P192: + case HCI_LK_UNAUTH_COMBINATION_P256: + /* An unauthenticated combination key has sufficient security + * for security level 2 or lower. + */ + if (sec_level <= BT_SECURITY_MEDIUM) + goto encrypt; + break; + case HCI_LK_COMBINATION: + /* A combination key has always sufficient security for the + * security levels 2 or lower. High security level requires the + * combination key is generated using maximum PIN code length + * (16). For pre 2.1 units. + */ + if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16) + goto encrypt; + break; + default: + break; + } auth: if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) -- cgit From dcda165706b9fbfd685898d46a6749d7d397e0c0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 15 Sep 2023 14:42:27 -0700 Subject: Bluetooth: hci_core: Fix build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following warnings: net/bluetooth/hci_core.c: In function ‘hci_register_dev’: net/bluetooth/hci_core.c:2620:54: warning: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size 5 [-Wformat-truncation=] 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); | ^~ net/bluetooth/hci_core.c:2620:50: note: directive argument in the range [0, 2147483647] 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); | ^~~~~~~ net/bluetooth/hci_core.c:2620:9: note: ‘snprintf’ output between 5 and 14 bytes into a destination of size 8 2620 | snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e6359f7346f1..c33348ba1657 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -350,7 +350,7 @@ struct hci_dev { struct list_head list; struct mutex lock; - char name[8]; + const char *name; unsigned long flags; __u16 id; __u8 bus; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db4f28d68d71..9e89843c259b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2617,7 +2617,11 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); + error = dev_set_name(&hdev->dev, "hci%u", id); + if (error) + return error; + + hdev->name = dev_name(&hdev->dev); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); @@ -2639,8 +2643,6 @@ int hci_register_dev(struct hci_dev *hdev) if (!IS_ERR_OR_NULL(bt_debugfs)) hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs); - dev_set_name(&hdev->dev, "%s", hdev->name); - error = device_add(&hdev->dev); if (error < 0) goto err_wqueue; -- cgit From b938790e70540bf4f2e653dcd74b232494d06c8f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 15 Sep 2023 13:24:47 -0700 Subject: Bluetooth: hci_codec: Fix leaking content of local_codecs The following memory leak can be observed when the controller supports codecs which are stored in local_codecs list but the elements are never freed: unreferenced object 0xffff88800221d840 (size 32): comm "kworker/u3:0", pid 36, jiffies 4294898739 (age 127.060s) hex dump (first 32 bytes): f8 d3 02 03 80 88 ff ff 80 d8 21 02 80 88 ff ff ..........!..... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __kmalloc+0x47/0x120 [] hci_codec_list_add.isra.0+0x2d/0x160 [] hci_read_codec_capabilities+0x183/0x270 [] hci_read_supported_codecs+0x1bb/0x2d0 [] hci_read_local_codecs_sync+0x3e/0x60 [] hci_dev_open_sync+0x943/0x11e0 [] hci_power_on+0x10d/0x3f0 [] process_one_work+0x404/0x800 [] worker_thread+0x374/0x670 [] kthread+0x188/0x1c0 [] ret_from_fork+0x2b/0x50 [] ret_from_fork_asm+0x1a/0x30 Cc: stable@vger.kernel.org Fixes: 8961987f3f5f ("Bluetooth: Enumerate local supported codec and cache details") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 1 + net/bluetooth/hci_event.c | 1 + net/bluetooth/hci_sync.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9e89843c259b..195aea2198a9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2786,6 +2786,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_conn_params_clear_all(hdev); hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); + hci_codec_list_clear(&hdev->local_codecs); hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 35f251041eeb..31d02b54eea1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -33,6 +33,7 @@ #include "hci_request.h" #include "hci_debugfs.h" +#include "hci_codec.h" #include "a2mp.h" #include "amp.h" #include "smp.h" diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index fd7c5d902856..d06e07a0ea5a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5074,6 +5074,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); + hci_codec_list_clear(&hdev->local_codecs); hci_dev_put(hdev); return err; -- cgit From 2ed45c0f1879079b30248568c515cf60fc668d8a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Sep 2023 18:20:18 +0100 Subject: btrfs: fix race when refilling delayed refs block reserve If we have two (or more) tasks attempting to refill the delayed refs block reserve we can end up with the delayed block reserve being over reserved, that is, with a reserved space greater than its size. If this happens, we are holding to more reserved space than necessary for a while. The race happens like this: 1) The delayed refs block reserve has a size of 8M and a reserved space of 6M for example; 2) Task A calls btrfs_delayed_refs_rsv_refill(); 3) Task B also calls btrfs_delayed_refs_rsv_refill(); 4) Task A sees there's a 2M difference between the size and the reserved space of the delayed refs rsv, so it will reserve 2M of space by calling btrfs_reserve_metadata_bytes(); 5) Task B also sees that 2M difference, and like task A, it reserves another 2M of metadata space; 6) Both task A and task B increase the reserved space of block reserve by 2M, by calling btrfs_block_rsv_add_bytes(), so the block reserve ends up with a size of 8M and a reserved space of 10M; 7) The extra, over reserved space will eventually be freed by some task calling btrfs_delayed_refs_rsv_release() -> btrfs_block_rsv_release() -> block_rsv_release_bytes(), as there we will detect the over reserve and release that space. So fix this by checking if we still need to add space to the delayed refs block reserve after reserving the metadata space, and if we don't, just release that space immediately. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6a13cf00218b..1043f66cc130 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -163,6 +163,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1); u64 num_bytes = 0; + u64 refilled_bytes; + u64 to_free; int ret = -ENOSPC; spin_lock(&block_rsv->lock); @@ -178,9 +180,38 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); if (ret) return ret; - btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); + + /* + * We may have raced with someone else, so check again if we the block + * reserve is still not full and release any excess space. + */ + spin_lock(&block_rsv->lock); + if (block_rsv->reserved < block_rsv->size) { + u64 needed = block_rsv->size - block_rsv->reserved; + + if (num_bytes >= needed) { + block_rsv->reserved += needed; + block_rsv->full = true; + to_free = num_bytes - needed; + refilled_bytes = needed; + } else { + block_rsv->reserved += num_bytes; + to_free = 0; + refilled_bytes = num_bytes; + } + } else { + to_free = num_bytes; + refilled_bytes = 0; + } + spin_unlock(&block_rsv->lock); + + if (to_free > 0) + btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info, + to_free); + + if (refilled_bytes > 0) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0, + refilled_bytes, 1); return 0; } -- cgit From a7ddeeb079505961355cf0106154da0110f1fdff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Sep 2023 18:20:19 +0100 Subject: btrfs: prevent transaction block reserve underflow when starting transaction When starting a transaction, with a non-zero number of items, we reserve metadata space for that number of items and for delayed refs by doing a call to btrfs_block_rsv_add(), with the transaction block reserve passed as the block reserve argument. This reserves metadata space and adds it to the transaction block reserve. Later we migrate the space we reserved for delayed references from the transaction block reserve into the delayed refs block reserve, by calling btrfs_migrate_to_delayed_refs_rsv(). btrfs_migrate_to_delayed_refs_rsv() decrements the number of bytes to migrate from the source block reserve, and this however may result in an underflow in case the space added to the transaction block reserve ended up being used by another task that has not reserved enough space for its own use - examples are tasks doing reflinks or hole punching because they end up calling btrfs_replace_file_extents() -> btrfs_drop_extents() and may need to modify/COW a variable number of leaves/paths, so they keep trying to use space from the transaction block reserve when they need to COW an extent buffer, and may end up trying to use more space then they have reserved (1 unit/path only for removing file extent items). This can be avoided by simply reserving space first without adding it to the transaction block reserve, then add the space for delayed refs to the delayed refs block reserve and finally add the remaining reserved space to the transaction block reserve. This also makes the code a bit shorter and simpler. So just do that. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 9 +-------- fs/btrfs/delayed-ref.h | 1 - fs/btrfs/transaction.c | 6 +++--- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 1043f66cc130..9fe4ccca50a0 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -103,24 +103,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) * Transfer bytes to our delayed refs rsv. * * @fs_info: the filesystem - * @src: source block rsv to transfer from * @num_bytes: number of bytes to transfer * - * This transfers up to the num_bytes amount from the src rsv to the + * This transfers up to the num_bytes amount, previously reserved, to the * delayed_refs_rsv. Any extra bytes are returned to the space info. */ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes) { struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; u64 to_free = 0; - spin_lock(&src->lock); - src->reserved -= num_bytes; - src->size -= num_bytes; - spin_unlock(&src->lock); - spin_lock(&delayed_refs_rsv->lock); if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { u64 delta = delayed_refs_rsv->size - diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index b8e14b0ba5f1..fd9bf2b709c0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -407,7 +407,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 341363beaf10..3b60e56e5e02 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -631,14 +631,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush); if (ret) goto reserve_fail; if (delayed_refs_bytes) { - btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv, - delayed_refs_bytes); + btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes); num_bytes -= delayed_refs_bytes; } + btrfs_block_rsv_add_bytes(rsv, num_bytes, true); if (rsv->space_info->force_alloc) do_chunk_alloc = true; -- cgit From 1bf76df3fee56d6637718e267f7c34ed70d0c7dc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Sep 2023 18:20:23 +0100 Subject: btrfs: return -EUCLEAN for delayed tree ref with a ref count not equals to 1 When running a delayed tree reference, if we find a ref count different from 1, we return -EIO. This isn't an IO error, as it indicates either a bug in the delayed refs code or a memory corruption, so change the error code from -EIO to -EUCLEAN. Also tag the branch as 'unlikely' as this is not expected to ever happen, and change the error message to print the tree block's bytenr without the parenthesis (and there was a missing space between the 'block' word and the opening parenthesis), for consistency as that's the style we used everywhere else. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f356f08b55cb..4282bdb5a9f1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1699,12 +1699,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent = ref->parent; ref_root = ref->root; - if (node->ref_mod != 1) { + if (unlikely(node->ref_mod != 1)) { btrfs_err(trans->fs_info, - "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu", node->bytenr, node->ref_mod, node->action, ref_root, parent); - return -EIO; + return -EUCLEAN; } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); -- cgit From d2f79e6385b0fcb1a38368e17d4721b8cd72af9f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Sep 2023 18:20:24 +0100 Subject: btrfs: remove redundant BUG_ON() from __btrfs_inc_extent_ref() At __btrfs_inc_extent_ref() we are doing a BUG_ON() if we are dealing with a tree block reference that has a reference count that is different from 1, but we have already dealt with this case at run_delayed_tree_ref(), making it useless. So remove the BUG_ON(). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4282bdb5a9f1..fd80129acc3c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1514,15 +1514,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* now insert the actual backref */ - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - BUG_ON(refs_to_add != 1); + if (owner < BTRFS_FIRST_FREE_OBJECTID) ret = insert_tree_block_ref(trans, path, bytenr, parent, root_objectid); - } else { + else ret = insert_extent_data_ref(trans, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); - } + if (ret) btrfs_abort_transaction(trans, ret); out: -- cgit From 8ec0a4a5774ab3f91c356c71f24dfba615bee860 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Sep 2023 18:20:29 +0100 Subject: btrfs: log message if extent item not found when running delayed extent op When running a delayed extent operation, if we don't find the extent item in the extent tree we just return -EIO without any logged message. This indicates some bug or possibly a memory or fs corruption, so the return value should not be -EIO but -EUCLEAN instead, and since it's not expected to ever happen, print an informative error message so that if it happens we have some idea of what went wrong, where to look at. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fd80129acc3c..fc313fce5bbd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1655,7 +1655,10 @@ again: goto again; } } else { - err = -EIO; + err = -EUCLEAN; + btrfs_err(fs_info, + "missing extent item for extent %llu num_bytes %llu level %d", + head->bytenr, head->num_bytes, extent_op->level); goto out; } } -- cgit From 58bfe2ccec5f9f137b41dd38f335290dcc13cd5c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 18 Sep 2023 10:34:51 -0400 Subject: btrfs: properly report 0 avail for very full file systems A user reported some issues with smaller file systems that get very full. While investigating this issue I noticed that df wasn't showing 100% full, despite having 0 chunk space and having < 1MiB of available metadata space. This turns out to be an overflow issue, we're doing: total_available_metadata_space - SZ_4M < global_block_rsv_size to determine if there's not enough space to make metadata allocations, which overflows if total_available_metadata_space is < 4M. Fix this by checking to see if our available space is greater than the 4M threshold. This makes df properly report 100% usage on the file system. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index cffdd6f7f8e8..1a093ec0f7e3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2117,7 +2117,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && - total_free_meta - thresh < block_rsv->size) + (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; -- cgit From 74ee79142c0a344d4eae2eb7012ebc4e82254109 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 19 Sep 2023 11:44:42 +0930 Subject: btrfs: reset destination buffer when read_extent_buffer() gets invalid range Commit f98b6215d7d1 ("btrfs: extent_io: do extra check for extent buffer read write functions") changed how we handle invalid extent buffer range for read_extent_buffer(). Previously if the range is invalid we just set the destination to zero, but after the patch we do nothing and error out. This can lead to smatch static checker errors like: fs/btrfs/print-tree.c:186 print_uuid_item() error: uninitialized symbol 'subvol_id'. fs/btrfs/tests/extent-io-tests.c:338 check_eb_bitmap() error: uninitialized symbol 'has'. fs/btrfs/tests/extent-io-tests.c:353 check_eb_bitmap() error: uninitialized symbol 'has'. fs/btrfs/uuid-tree.c:203 btrfs_uuid_tree_remove() error: uninitialized symbol 'read_subid'. fs/btrfs/uuid-tree.c:353 btrfs_uuid_tree_iterate() error: uninitialized symbol 'subid_le'. fs/btrfs/uuid-tree.c:72 btrfs_uuid_tree_lookup() error: uninitialized symbol 'data'. fs/btrfs/volumes.c:7415 btrfs_dev_stats_value() error: uninitialized symbol 'val'. Fix those warnings by reverting back to the old memset() behavior. By this we keep the static checker happy and would still make a lot of noise when such invalid ranges are passed in. Reported-by: Dan Carpenter Fixes: f98b6215d7d1 ("btrfs: extent_io: do extra check for extent buffer read write functions") Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6954ae763b86..caccd0376342 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3995,8 +3995,14 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; unsigned long i = get_eb_page_index(start); - if (check_eb_range(eb, start, len)) + if (check_eb_range(eb, start, len)) { + /* + * Invalid range hit, reset the memory, so callers won't get + * some random garbage for their uninitialzed memory. + */ + memset(dstv, 0, len); return; + } offset = get_eb_offset_in_page(eb, start); -- cgit From 6e2e27e47c022b86bd248e301986d461ca449bcf Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 Sep 2023 16:04:51 -0500 Subject: smb3: remove duplicate error mapping In status_to_posix_error STATUS_IO_REPARSE_TAG_NOT_HANDLED was mapped to both -EOPNOTSUPP and also to -EIO but the later one (-EIO) is ignored. Remove the duplicate. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2maperror.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index 194799ddd382..1a90dd78b238 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -877,8 +877,6 @@ static const struct status_to_posix_error smb2_error_map_table[] = { "STATUS_IO_REPARSE_TAG_MISMATCH"}, {STATUS_IO_REPARSE_DATA_INVALID, -EIO, "STATUS_IO_REPARSE_DATA_INVALID"}, - {STATUS_IO_REPARSE_TAG_NOT_HANDLED, -EIO, - "STATUS_IO_REPARSE_TAG_NOT_HANDLED"}, {STATUS_REPARSE_POINT_NOT_RESOLVED, -EIO, "STATUS_REPARSE_POINT_NOT_RESOLVED"}, {STATUS_DIRECTORY_IS_A_REPARSE_POINT, -EIO, -- cgit From 4556b93f6c026c62c93e7acc22838224ac2e2eba Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Tue, 12 Sep 2023 09:08:24 +0300 Subject: drm/virtio: clean out_fence on complete_submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The removed line prevents the following cleanup function to execute a dma_fence_put on the out_fence to free its memory, producing the following output in kmemleak: unreferenced object 0xffff888126d8ee00 (size 128): comm "kwin_wayland", pid 981, jiffies 4295380296 (age 390.060s) hex dump (first 32 bytes): c8 a1 c2 27 81 88 ff ff e0 14 a9 c0 ff ff ff ff ...'............ 30 1a e1 2e a6 00 00 00 28 fc 5b 17 81 88 ff ff 0.......(.[..... backtrace: [<0000000011655661>] kmalloc_trace+0x26/0xa0 [<0000000055f15b82>] virtio_gpu_fence_alloc+0x47/0xc0 [virtio_gpu] [<00000000fa6d96f9>] virtio_gpu_execbuffer_ioctl+0x1a8/0x800 [virtio_gpu] [<00000000e6cb5105>] drm_ioctl_kernel+0x169/0x240 [drm] [<000000005ad33e27>] drm_ioctl+0x399/0x6b0 [drm] [<00000000a19dbf65>] __x64_sys_ioctl+0xc5/0x100 [<0000000011fa801e>] do_syscall_64+0x5b/0xc0 [<0000000065c76d8a>] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 unreferenced object 0xffff888121930500 (size 128): comm "kwin_wayland", pid 981, jiffies 4295380313 (age 390.096s) hex dump (first 32 bytes): c8 a1 c2 27 81 88 ff ff e0 14 a9 c0 ff ff ff ff ...'............ f9 ec d7 2f a6 00 00 00 28 fc 5b 17 81 88 ff ff .../....(.[..... backtrace: [<0000000011655661>] kmalloc_trace+0x26/0xa0 [<0000000055f15b82>] virtio_gpu_fence_alloc+0x47/0xc0 [virtio_gpu] [<00000000fa6d96f9>] virtio_gpu_execbuffer_ioctl+0x1a8/0x800 [virtio_gpu] [<00000000e6cb5105>] drm_ioctl_kernel+0x169/0x240 [drm] [<000000005ad33e27>] drm_ioctl+0x399/0x6b0 [drm] [<00000000a19dbf65>] __x64_sys_ioctl+0xc5/0x100 [<0000000011fa801e>] do_syscall_64+0x5b/0xc0 [<0000000065c76d8a>] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [...] This memleak will grow quickly, being possible to see the following line in dmesg after few minutes of life in the virtual machine: [ 706.217388] kmemleak: 10731 new suspected memory leaks (see /sys/kernel/debug/kmemleak) The patch will remove the line to allow the cleanup function do its job. Signed-off-by: José Pekkarinen Fixes: e4812ab8e6b1 ("drm/virtio: Refactor and optimize job submission code path") Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20230912060824.5210-1-jose.pekkarinen@foxhound.fi --- drivers/gpu/drm/virtio/virtgpu_submit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 3c00135ead45..5c514946bbad 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -361,7 +361,6 @@ static void virtio_gpu_complete_submit(struct virtio_gpu_submit *submit) submit->buf = NULL; submit->buflist = NULL; submit->sync_file = NULL; - submit->out_fence = NULL; submit->out_fence_fd = -1; } -- cgit From 7fb77d9c87b8283f26aeeca473468e361b2fcf21 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 20 Sep 2023 17:42:11 -0300 Subject: smb: client: handle STATUS_IO_REPARSE_TAG_NOT_HANDLED Fix missing set of cifs_open_info_data::reparse_point when SMB2_CREATE request fails with STATUS_IO_REPARSE_TAG_NOT_HANDLED. Fixes: 5f71ebc41294 ("smb: client: parse reparse point flag in create response") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index b41e2e872b22..0b89f7008ac0 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -539,6 +539,9 @@ static int parse_create_response(struct cifs_open_info_data *data, int rc = 0; switch (rsp->hdr.Status) { + case STATUS_IO_REPARSE_TAG_NOT_HANDLED: + reparse_point = true; + break; case STATUS_STOPPED_ON_SYMLINK: rc = smb2_parse_symlink_response(cifs_sb, iov, &data->symlink_target); -- cgit From 6f6583e58d1ddf3c46e25ed756e6d5c8277968ee Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Wed, 23 Aug 2023 19:25:25 -0400 Subject: drm/amd/display: Fix MST recognizes connected displays as one [What] MST now recognizes both connected displays Fixes: 927e784c180c ("drm/amd/display: Add symclk enable/disable during stream enable/disable") Reviewed-by: Charlene Liu Acked-by: Stylon Wang Signed-off-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dce110/dce110_hw_sequencer.c | 30 ++++++++++++---------- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 8 ++---- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 478281f2a5ba..2a6157555fd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1178,12 +1178,15 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.otg_inst = tg->inst; dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); - dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->disable_symclk_se) + if (dccg) { + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + } + } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); + } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { /* TODO: This looks like a bug to me as we are disabling HPO IO when @@ -2658,11 +2661,11 @@ void dce110_prepare_bandwidth( struct clk_mgr *dccg = dc->clk_mgr; dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - - dccg->funcs->update_clocks( - dccg, - context, - false); + if (dccg) + dccg->funcs->update_clocks( + dccg, + context, + false); } void dce110_optimize_bandwidth( @@ -2673,10 +2676,11 @@ void dce110_optimize_bandwidth( dce110_set_displaymarks(dc, context); - dccg->funcs->update_clocks( - dccg, - context, - true); + if (dccg) + dccg->funcs->update_clocks( + dccg, + context, + true); } static void dce110_program_front_end_for_pipe( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index e72f15ac0048..aeadc587433f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2692,8 +2692,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) struct dce_hwseq *hws = dc->hwseq; unsigned int k1_div = PIXEL_RATE_DIV_NA; unsigned int k2_div = PIXEL_RATE_DIV_NA; - struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); - struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { if (dc->hwseq->funcs.setup_hpo_hw_control) @@ -2713,10 +2711,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->enable_symclk_se) - dccg->funcs->enable_symclk_se(dccg, - stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); - + } else { + } if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) { hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 3082da04a63d..1d052f08aff5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -75,7 +75,7 @@ void mpc32_power_on_blnd_lut( if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); - } else { + } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { ASSERT(false); /* TODO: change to mpc * dpp_base->ctx->dc->optimized_required = true; -- cgit From 06cce38ef51fc101402a0b02fca6e69c2e15ff3c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 14 Sep 2023 11:46:08 +0530 Subject: Revert "drm/amdgpu: Report vbios version instead of PN" This reverts commit 7748ce5b69581325cae40c2134088820f0957902. vbios_version sysfs node is used to identify Part Number also. Revert to the same so that it doesn't break scripts/software which parse this. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 73ee14f7a9a4..dce9e7d5e4ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, struct amdgpu_device *adev = drm_to_adev(ddev); struct atom_context *ctx = adev->mode_info.atom_context; - return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str); + return sysfs_emit(buf, "%s\n", ctx->vbios_pn); } static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, -- cgit From 7c0195fa9a9e263df204963f88a22b21688ffb66 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Thu, 3 Mar 2022 20:39:14 +0800 Subject: i2c: mux: demux-pinctrl: check the return value of devm_kstrdup() devm_kstrdup() returns pointer to allocated string on success, NULL on failure. So it is better to check the return value of it. Fixes: e35478eac030 ("i2c: mux: demux-pinctrl: run properly with multiple instances") Signed-off-by: Xiaoke Wang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index a3a122fae71e..22f2280eab7f 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -243,6 +243,10 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL); props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL); + if (!props[i].name || !props[i].value) { + err = -ENOMEM; + goto err_rollback; + } props[i].length = 3; of_changeset_init(&priv->chan[i].chgset); -- cgit From f387bb578d49c5bf24204810cb2721f151d3eee2 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Thu, 14 Sep 2023 17:45:33 +0800 Subject: drm/amdgpu: fix a memory leak in amdgpu_ras_feature_enable This patch fixes a memory leak in the amdgpu_ras_feature_enable() function. The leak occurs when the function sends a command to the firmware to enable or disable a RAS feature for a GFX block. If the command fails, the kfree() function is not called to free the info memory. Fixes: 9f051d6ff13f ("drm/amdgpu: Free ras cmd input buffer properly") Reviewed-by: Hawking Zhang Signed-off-by: Cong Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 937c54fc7174..163445baa4fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -801,6 +801,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", get_ras_block_str(head), amdgpu_ras_is_poison_mode_supported(adev), ret); + kfree(info); return ret; } -- cgit From 2de19022c5d7ff519dd5b9690f7713267bd1abfe Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Wed, 13 Sep 2023 14:48:08 -0400 Subject: drm/amd/display: fix the ability to use lower resolution modes on eDP On eDP we can receive invalid modes from dm_update_crtc_state() for entirely new streams for which drm_mode_set_crtcinfo() shouldn't be called on. So, instead of calling drm_mode_set_crtcinfo() from within create_stream_for_sink() we can instead call it from amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call drm_mode_set_crtcinfo() for valid modes from that function (invalid modes are rejected by that callback) and that is the only user of create_validate_stream_for_sink() that we need to call drm_mode_set_crtcinfo() for (as before commit cb841d27b876 ("drm/amd/display: Always pass connector_state to stream validation"), that is the only place where create_validate_stream_for_sink()'s dm_state was NULL). Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693 Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to stream validation") Tested-by: Mark Broadworth Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c6fd34bab358..868946dd7ef1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6098,8 +6098,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) drm_mode_set_crtcinfo(&saved_mode, 0); - else if (!old_stream) - drm_mode_set_crtcinfo(&mode, 0); /* * If scaling is enabled and refresh rate didn't change @@ -6661,6 +6659,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec goto fail; } + drm_mode_set_crtcinfo(mode, 0); + stream = create_validate_stream_for_sink(aconnector, mode, to_dm_connector_state(connector->state), NULL); -- cgit From cc39f9ccb82426e576734b493e1777ea01b144a8 Mon Sep 17 00:00:00 2001 From: YuBiao Wang Date: Fri, 15 Sep 2023 10:47:50 +0800 Subject: drm/amdkfd: Use gpu_offset for user queue's wptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly use tbo's start address will miss the domain start offset. Need to use gpu_offset instead. Signed-off-by: YuBiao Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8a6cb41444a4..0d3d538b64eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -216,7 +216,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (q->wptr_bo) { wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; } queue_input.is_kfd_process = 1; -- cgit From 0c1a2e69bcb506f48ebf94bd199bab0b93f66da2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 8 Aug 2023 15:19:50 -0700 Subject: drm/msm/dp: do not reinitialize phy unless retry during link training DP PHY re-initialization done using dp_ctrl_reinitialize_mainlink() will cause PLL unlocked initially and then PLL gets locked at the end of initialization. PLL_UNLOCKED interrupt will fire during this time if the interrupt mask is enabled. However currently DP driver link training implementation incorrectly re-initializes PHY unconditionally during link training as the PHY was already configured in dp_ctrl_enable_mainlink_clocks(). Fix this by re-initializing the PHY only if the previous link training failed. [drm:dp_aux_isr] *ERROR* Unexpected DP AUX IRQ 0x01000000 when not busy Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/30 Signed-off-by: Kuogee Hsieh Tested-by: Abhinav Kumar # sc7280 Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/551847/ Link: https://lore.kernel.org/r/1691533190-19335-1-git-send-email-quic_khsieh@quicinc.com [quic_abhinavk@quicinc.com: added line break in commit text] Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index a7a5c7e0ab92..77a8d9366ed7 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1774,13 +1774,6 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) return rc; while (--link_train_max_retries) { - rc = dp_ctrl_reinitialize_mainlink(ctrl); - if (rc) { - DRM_ERROR("Failed to reinitialize mainlink. rc=%d\n", - rc); - break; - } - training_step = DP_TRAINING_NONE; rc = dp_ctrl_setup_main_link(ctrl, &training_step); if (rc == 0) { @@ -1832,6 +1825,12 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) /* stop link training before start re training */ dp_ctrl_clear_training_pattern(ctrl); } + + rc = dp_ctrl_reinitialize_mainlink(ctrl); + if (rc) { + DRM_ERROR("Failed to reinitialize mainlink. rc=%d\n", rc); + break; + } } if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) -- cgit From c0666d1dc6816e74192600e154517d7943b917fe Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 5 Sep 2023 20:43:48 +0300 Subject: drm/msm/mdss: fix highest-bank-bit for msm8998 According to the vendor DT files, msm8998 has highest-bank-bit equal to 2. Update the data accordingly. Fixes: 6f410b246209 ("drm/msm/mdss: populate missing data") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/555840/ Link: https://lore.kernel.org/r/20230905174353.3118648-2-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 2e87dd6cb17b..348c66b14683 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -511,7 +511,7 @@ static int mdss_remove(struct platform_device *pdev) static const struct msm_mdss_data msm8998_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, - .highest_bank_bit = 1, + .highest_bank_bit = 2, }; static const struct msm_mdss_data qcm2290_data = { -- cgit From ab483e3adcc178254eb1ce0fbdfbea65f86f1006 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Fri, 15 Sep 2023 13:44:25 -0700 Subject: drm/msm/dsi: skip the wait for video mode done if not applicable dsi_wait4video_done() API waits for the DSI video mode engine to become idle so that we can transmit the DCS commands in the beginning of BLLP. However, with the current sequence, the MDP timing engine is turned on after the panel's pre_enable() callback which can send out the DCS commands needed to power up the panel. During those cases, this API will always timeout and print out the error spam leading to long bootup times and log flooding. Fix this by checking if the DSI video engine was actually busy before waiting for it to become idle otherwise this is a redundant wait. changes in v2: - move the reg read below the video mode check - minor fixes in commit text Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/34 Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/557853/ Link: https://lore.kernel.org/r/20230915204426.19011-1-quic_abhinavk@quicinc.com --- drivers/gpu/drm/msm/dsi/dsi_host.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 5d9ec27c89d3..4da450742302 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1082,9 +1082,21 @@ static void dsi_wait4video_done(struct msm_dsi_host *msm_host) static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host) { + u32 data; + if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) return; + data = dsi_read(msm_host, REG_DSI_STATUS0); + + /* if video mode engine is not busy, its because + * either timing engine was not turned on or the + * DSI controller has finished transmitting the video + * data already, so no need to wait in those cases + */ + if (!(data & DSI_STATUS0_VIDEO_MODE_ENGINE_BUSY)) + return; + if (msm_host->power_on && msm_host->enabled) { dsi_wait4video_done(msm_host); /* delay 4 ms to skip BLLP */ -- cgit From 6a1d4c7976dd1ee7c9f80bc8e62801ec7b1f2f58 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Sep 2023 15:59:40 +0300 Subject: drm/msm/dsi: fix irq_of_parse_and_map() error checking The irq_of_parse_and_map() function returns zero on error. It never returns negative error codes. Fix the check. Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Signed-off-by: Dan Carpenter Reviewed-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/557715/ Link: https://lore.kernel.org/r/4f3c5c98-04f7-43f7-900f-5d7482c83eef@moroto.mountain Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/dsi_host.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 4da450742302..3d6fb708dc22 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1906,10 +1906,9 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) } msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (msm_host->irq < 0) { - ret = msm_host->irq; - dev_err(&pdev->dev, "failed to get irq: %d\n", ret); - return ret; + if (!msm_host->irq) { + dev_err(&pdev->dev, "failed to get irq\n"); + return -EINVAL; } /* do not autoenable, will be enabled later */ -- cgit From 95e681ca3b65e4ce3d2537b47672d787b7d30375 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 7 Sep 2023 18:26:16 -0700 Subject: drm/msm/dpu: change _dpu_plane_calc_bw() to use u64 to avoid overflow _dpu_plane_calc_bw() uses integer variables to calculate the bandwidth used during plane bandwidth calculations. However for high resolution displays this overflows easily and leads to below errors [dpu error]crtc83 failed performance check -7 Promote the intermediate variables to u64 to avoid overflow. changes in v2: - change to u64 where actually needed in the math Fixes: c33b7c0389e1 ("drm/msm/dpu: add support for clk and bw scaling for display") Reviewed-by: Dmitry Baryshkov Reported-by: Nia Espera Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/32 Tested-by: Nia Espera Patchwork: https://patchwork.freedesktop.org/patch/556288/ Link: https://lore.kernel.org/r/20230908012616.20654-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index c2aaaded07ed..98c1b22e9bca 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -119,6 +119,7 @@ static u64 _dpu_plane_calc_bw(const struct dpu_mdss_cfg *catalog, struct dpu_sw_pipe_cfg *pipe_cfg) { int src_width, src_height, dst_height, fps; + u64 plane_pixel_rate, plane_bit_rate; u64 plane_prefill_bw; u64 plane_bw; u32 hw_latency_lines; @@ -136,13 +137,12 @@ static u64 _dpu_plane_calc_bw(const struct dpu_mdss_cfg *catalog, scale_factor = src_height > dst_height ? mult_frac(src_height, 1, dst_height) : 1; - plane_bw = - src_width * mode->vtotal * fps * fmt->bpp * - scale_factor; + plane_pixel_rate = src_width * mode->vtotal * fps; + plane_bit_rate = plane_pixel_rate * fmt->bpp; - plane_prefill_bw = - src_width * hw_latency_lines * fps * fmt->bpp * - scale_factor * mode->vtotal; + plane_bw = plane_bit_rate * scale_factor; + + plane_prefill_bw = plane_bw * hw_latency_lines; if ((vbp+vpw) > hw_latency_lines) do_div(plane_prefill_bw, (vbp+vpw)); -- cgit From eba8c99a0fc45da1c8d5b5f5bd1dc2e79229a767 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 25 Aug 2023 16:01:08 -0700 Subject: drm/msm/dp: Add newlines to debug printks These debug printks are missing newlines, causing drm debug logs to be hard to read. Add newlines so that the messages are on their own line. Cc: Kuogee Hsieh Cc: Vinod Polimera Signed-off-by: Stephen Boyd Fixes: 601f0479c583 ("drm/msm/dp: add logs across DP driver for ease of debugging") Fixes: cd779808cccd ("drm/msm/dp: Add basic PSR support for eDP") Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/554533/ Link: https://lore.kernel.org/r/20230825230109.2264345-1-swboyd@chromium.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 42427129acea..6375daaeb98e 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -1090,7 +1090,7 @@ int dp_link_process_request(struct dp_link *dp_link) } else if (dp_link_read_psr_error_status(link)) { DRM_ERROR("PSR IRQ_HPD received\n"); } else if (dp_link_psr_capability_changed(link)) { - drm_dbg_dp(link->drm_dev, "PSR Capability changed"); + drm_dbg_dp(link->drm_dev, "PSR Capability changed\n"); } else { ret = dp_link_process_link_status_update(link); if (!ret) { @@ -1107,7 +1107,7 @@ int dp_link_process_request(struct dp_link *dp_link) } } - drm_dbg_dp(link->drm_dev, "sink request=%#x", + drm_dbg_dp(link->drm_dev, "sink request=%#x\n", dp_link->sink_request); return ret; } -- cgit From c8ebf077fbebda3a24335660ded7cff4b90331b8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 Sep 2023 19:50:05 -0500 Subject: smb3: fix confusing debug message The message said it was an invalid mode, when it was intentionally not set. Fix confusing message logged to dmesg. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 405ea324f28d..c75a80bb6d9e 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -848,7 +848,7 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) iov[num].iov_base = create_posix_buf(mode); if (mode == ACL_NO_MODE) - cifs_dbg(FYI, "Invalid mode\n"); + cifs_dbg(FYI, "%s: no mode\n", __func__); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_posix); -- cgit From 41b43b6c6e30a832c790b010a06772e793bca193 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 20 Sep 2023 12:46:27 +0200 Subject: locking/seqlock: Do the lockdep annotation before locking in do_write_seqcount_begin_nested() It was brought up by Tetsuo that the following sequence: write_seqlock_irqsave() printk_deferred_enter() could lead to a deadlock if the lockdep annotation within write_seqlock_irqsave() triggers. The problem is that the sequence counter is incremented before the lockdep annotation is performed. The lockdep splat would then attempt to invoke printk() but the reader side, of the same seqcount, could have a tty_port::lock acquired waiting for the sequence number to become even again. The other lockdep annotations come before the actual locking because "we want to see the locking error before it happens". There is no reason why seqcount should be different here. Do the lockdep annotation first then perform the locking operation (the sequence increment). Fixes: 1ca7d67cf5d5a ("seqcount: Add lockdep functionality to seqcount/seqlock structures") Reported-by: Tetsuo Handa Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230920104627._DTHgPyA@linutronix.de Closes: https://lore.kernel.org/20230621130641.-5iueY1I@linutronix.de --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..e9bd2f65d7f4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -512,8 +512,8 @@ do { \ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + do_raw_write_seqcount_begin(s); } /** -- cgit From 6f411fb5ca9419090bee6a0a46425e0a5060b734 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:09 +0200 Subject: net: ena: Flush XDP packets on error. xdp_do_flush() should be invoked before leaving the NAPI poll function after a XDP-redirect. This is not the case if the driver leaves via the error path (after having a redirect in one of its previous iterations). Invoke xdp_do_flush() also in the error path. Cc: Arthur Kiyanovski Cc: David Arinzon Cc: Noam Dagan Cc: Saeed Bishara Cc: Shay Agroskin Fixes: a318c70ad152b ("net: ena: introduce XDP redirect implementation") Acked-by: Arthur Kiyanovski Signed-off-by: Sebastian Andrzej Siewior Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index ad32ca81f7ef..f955bde10cf9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1833,6 +1833,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, return work_done; error: + if (xdp_flags & ENA_XDP_REDIRECT) + xdp_do_flush(); + adapter = netdev_priv(rx_ring->netdev); if (rc == -ENOSPC) { -- cgit From edc0140cc3b7b91874ebe70eb7d2a851e8817ccc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:10 +0200 Subject: bnxt_en: Flush XDP for bnxt_poll_nitroa0()'s NAPI bnxt_poll_nitroa0() invokes bnxt_rx_pkt() which can run a XDP program which in turn can return XDP_REDIRECT. bnxt_rx_pkt() is also used by __bnxt_poll_work() which flushes (xdp_do_flush()) the packets after each round. bnxt_poll_nitroa0() lacks this feature. xdp_do_flush() should be invoked before leaving the NAPI callback. Invoke xdp_do_flush() after a redirect in bnxt_poll_nitroa0() NAPI. Cc: Michael Chan Fixes: f18c2b77b2e4e ("bnxt_en: optimized XDP_REDIRECT support") Reviewed-by: Andy Gospodarek Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Michael Chan Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5cc0dbe12132..7551aa8068f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2614,6 +2614,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) struct rx_cmp_ext *rxcmp1; u32 cp_cons, tmp_raw_cons; u32 raw_cons = cpr->cp_raw_cons; + bool flush_xdp = false; u32 rx_pkts = 0; u8 event = 0; @@ -2648,6 +2649,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; + if (event & BNXT_REDIRECT_EVENT) + flush_xdp = true; } else if (unlikely(TX_CMP_TYPE(txcmp) == CMPL_BASE_TYPE_HWRM_DONE)) { bnxt_hwrm_handler(bp, txcmp); @@ -2667,6 +2670,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) if (event & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + if (flush_xdp) + xdp_do_flush(); if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) { napi_complete_done(napi, rx_pkts); -- cgit From 70b2b6892645e58ed6f051dad7f8d1083f0ad553 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 18 Sep 2023 17:36:11 +0200 Subject: octeontx2-pf: Do xdp_do_flush() after redirects. xdp_do_flush() should be invoked before leaving the NAPI poll function if XDP-redirect has been performed. Invoke xdp_do_flush() before leaving NAPI. Cc: Geetha sowjanya Cc: Subbaraya Sundeep Cc: Sunil Goutham Cc: hariprasad Fixes: 06059a1a9a4a5 ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Geethasowjanya Akula Acked-by: Jesper Dangaard Brouer Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index e77d43848955..53b2a4ef5298 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -29,7 +29,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, - struct otx2_cq_queue *cq); + struct otx2_cq_queue *cq, + bool *need_xdp_flush); static int otx2_nix_cq_op_status(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) @@ -337,7 +338,7 @@ static bool otx2_check_rcv_errors(struct otx2_nic *pfvf, static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, struct napi_struct *napi, struct otx2_cq_queue *cq, - struct nix_cqe_rx_s *cqe) + struct nix_cqe_rx_s *cqe, bool *need_xdp_flush) { struct nix_rx_parse_s *parse = &cqe->parse; struct nix_rx_sg_s *sg = &cqe->sg; @@ -353,7 +354,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, } if (pfvf->xdp_prog) - if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq)) + if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq, need_xdp_flush)) return; skb = napi_get_frags(napi); @@ -388,6 +389,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, struct napi_struct *napi, struct otx2_cq_queue *cq, int budget) { + bool need_xdp_flush = false; struct nix_cqe_rx_s *cqe; int processed_cqe = 0; @@ -409,13 +411,15 @@ process_cqe: cq->cq_head++; cq->cq_head &= (cq->cqe_cnt - 1); - otx2_rcv_pkt_handler(pfvf, napi, cq, cqe); + otx2_rcv_pkt_handler(pfvf, napi, cq, cqe, &need_xdp_flush); cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; cqe->sg.seg_addr = 0x00; processed_cqe++; cq->pend_cqe--; } + if (need_xdp_flush) + xdp_do_flush(); /* Free CQEs to HW */ otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR, @@ -1354,7 +1358,8 @@ bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx) static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, - struct otx2_cq_queue *cq) + struct otx2_cq_queue *cq, + bool *need_xdp_flush) { unsigned char *hard_start, *data; int qidx = cq->cq_idx; @@ -1391,8 +1396,10 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); - if (!err) + if (!err) { + *need_xdp_flush = true; return true; + } put_page(page); break; default: -- cgit From 1703b2e0de653b459ca6230be32ce7f2ea0ae7ee Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Tue, 19 Sep 2023 10:03:31 -0700 Subject: igc: Expose tx-usecs coalesce setting to user When users attempt to obtain the coalesce setting using the ethtool command, current code always returns 0 for tx-usecs. This is because I225/6 always uses a queue pair setting, hence tx_coalesce_usecs does not return a value during the igc_ethtool_get_coalesce() callback process. The pair queue condition checking in igc_ethtool_get_coalesce() is removed by this patch so that the user gets information of the value of tx-usecs. Even if i225/6 is using queue pair setting, there is no harm in notifying the user of the tx-usecs. The implementation of the current code may have previously been a copy of the legacy code i210. Since I225 has the queue pair setting enabled, tx-usecs will always adhere to the user-set rx-usecs value. An error message will appear when the user attempts to set the tx-usecs value for the input parameters because, by default, they should only set the rx-usecs value. This patch also adds the helper function to get the previous rx coalesce value similar to tx coalesce. How to test: User can get the coalesce value using ethtool command. Example command: Get: ethtool -c Previous output: rx-usecs: 3 rx-frames: n/a rx-usecs-irq: n/a rx-frames-irq: n/a tx-usecs: 0 tx-frames: n/a tx-usecs-irq: n/a tx-frames-irq: n/a New output: rx-usecs: 3 rx-frames: n/a rx-usecs-irq: n/a rx-frames-irq: n/a tx-usecs: 3 tx-frames: n/a tx-usecs-irq: n/a tx-frames-irq: n/a Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230919170331.1581031-1-anthony.l.nguyen@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 31 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 93bce729be76..7ab6dd58e400 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -868,6 +868,18 @@ static void igc_ethtool_get_stats(struct net_device *netdev, spin_unlock(&adapter->stats64_lock); } +static int igc_ethtool_get_previous_rx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->rx_itr_setting <= 3) ? + adapter->rx_itr_setting : adapter->rx_itr_setting >> 2; +} + +static int igc_ethtool_get_previous_tx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->tx_itr_setting <= 3) ? + adapter->tx_itr_setting : adapter->tx_itr_setting >> 2; +} + static int igc_ethtool_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, @@ -875,17 +887,8 @@ static int igc_ethtool_get_coalesce(struct net_device *netdev, { struct igc_adapter *adapter = netdev_priv(netdev); - if (adapter->rx_itr_setting <= 3) - ec->rx_coalesce_usecs = adapter->rx_itr_setting; - else - ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; - - if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) { - if (adapter->tx_itr_setting <= 3) - ec->tx_coalesce_usecs = adapter->tx_itr_setting; - else - ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; - } + ec->rx_coalesce_usecs = igc_ethtool_get_previous_rx_coalesce(adapter); + ec->tx_coalesce_usecs = igc_ethtool_get_previous_tx_coalesce(adapter); return 0; } @@ -910,8 +913,12 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev, ec->tx_coalesce_usecs == 2) return -EINVAL; - if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && + ec->tx_coalesce_usecs != igc_ethtool_get_previous_tx_coalesce(adapter)) { + NL_SET_ERR_MSG_MOD(extack, + "Queue Pair mode enabled, both Rx and Tx coalescing controlled by rx-usecs"); return -EINVAL; + } /* If ITR is disabled, disable DMAC */ if (ec->rx_coalesce_usecs == 0) { -- cgit From f75f71b2c418a27a7c05139bb27a0c83adf88d19 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Sep 2023 11:03:49 +0200 Subject: fbdev/sh7760fb: Depend on FB=y Fix linker error if FB=m about missing fb_io_read and fb_io_write. The linker's error message suggests that this config setting has already been broken for other symbols. All errors (new ones prefixed by >>): sh4-linux-ld: drivers/video/fbdev/sh7760fb.o: in function `sh7760fb_probe': sh7760fb.c:(.text+0x374): undefined reference to `framebuffer_alloc' sh4-linux-ld: sh7760fb.c:(.text+0x394): undefined reference to `fb_videomode_to_var' sh4-linux-ld: sh7760fb.c:(.text+0x39c): undefined reference to `fb_alloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x3a4): undefined reference to `register_framebuffer' sh4-linux-ld: sh7760fb.c:(.text+0x3ac): undefined reference to `fb_dealloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x434): undefined reference to `framebuffer_release' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o: in function `sh7760fb_remove': sh7760fb.c:(.text+0x800): undefined reference to `unregister_framebuffer' sh4-linux-ld: sh7760fb.c:(.text+0x804): undefined reference to `fb_dealloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x814): undefined reference to `framebuffer_release' >> sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0xc): undefined reference to `fb_io_read' >> sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x10): undefined reference to `fb_io_write' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x2c): undefined reference to `cfb_fillrect' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x30): undefined reference to `cfb_copyarea' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x34): undefined reference to `cfb_imageblit' Suggested-by: Randy Dunlap Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309130632.LS04CPWu-lkp@intel.com/ Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: John Paul Adrian Glaubitz Link: https://patchwork.freedesktop.org/patch/msgid/20230918090400.13264-1-tzimmermann@suse.de --- drivers/video/fbdev/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index eac0ba39581e..c29754b65c0e 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -1762,7 +1762,7 @@ config FB_COBALT config FB_SH7760 bool "SH7760/SH7763/SH7720/SH7721 LCDC support" - depends on FB && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ + depends on FB=y && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ || CPU_SUBTYPE_SH7720 || CPU_SUBTYPE_SH7721) select FB_IOMEM_HELPERS help -- cgit From fc21f08375dbf654bd1fda748261955de580ac14 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 19 Sep 2023 19:39:49 +0100 Subject: sfc: handle error pointers returned by rhashtable_lookup_get_insert_fast() Several places in TC offload code assumed that the return from rhashtable_lookup_get_insert_fast() was always either NULL or a valid pointer to an existing entry, but in fact that function can return an error pointer. In that case, perform the usual cleanup of the newly created entry, then pass up the error, rather than attempting to take a reference on the old entry. Fixes: d902e1a737d4 ("sfc: bare bones TC offload on EF100") Reported-by: Dan Carpenter Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/20230919183949.59392-1-edward.cree@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/sfc/tc.c | 21 ++++++++++++++++++--- drivers/net/ethernet/sfc/tc_conntrack.c | 7 ++++++- drivers/net/ethernet/sfc/tc_counters.c | 2 ++ drivers/net/ethernet/sfc/tc_encap_actions.c | 4 ++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 047322b04d4f..834f000ba1c4 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -136,6 +136,8 @@ static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx if (old) { /* don't need our new entry */ kfree(ped); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found, ref taken */ @@ -602,6 +604,8 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, kfree(encap); if (pseudo) /* don't need our new pseudo either */ efx_tc_flower_release_encap_match(efx, pseudo); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return PTR_ERR(old); /* check old and new em_types are compatible */ switch (old->type) { case EFX_TC_EM_DIRECT: @@ -700,6 +704,8 @@ static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx, if (old) { /* don't need our new entry */ kfree(rid); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ @@ -1482,7 +1488,10 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, &rule->linkage, efx_tc_match_action_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Ignoring already-offloaded rule (cookie %lx)\n", tc->cookie); @@ -1697,7 +1706,10 @@ static int efx_tc_flower_replace_lhs(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, &rule->linkage, efx_tc_lhs_rule_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); rc = -EEXIST; @@ -1858,7 +1870,10 @@ static int efx_tc_flower_replace(struct efx_nic *efx, old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, &rule->linkage, efx_tc_match_action_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c index 8e06bfbcbea1..44bb57670340 100644 --- a/drivers/net/ethernet/sfc/tc_conntrack.c +++ b/drivers/net/ethernet/sfc/tc_conntrack.c @@ -298,7 +298,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht, &conn->linkage, efx_tc_ct_ht_params); - if (old) { + if (IS_ERR(old)) { + rc = PTR_ERR(old); + goto release; + } else if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded conntrack (cookie %lx)\n", tc->cookie); rc = -EEXIST; @@ -482,6 +485,8 @@ struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone, if (old) { /* don't need our new entry */ kfree(ct_zone); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index 0fafb47ea082..c44088424323 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -236,6 +236,8 @@ struct efx_tc_counter_index *efx_tc_flower_get_counter_index( if (old) { /* don't need our new entry */ kfree(ctr); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found */ diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index 7e8bcdb222ad..87443f9dfd22 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -132,6 +132,8 @@ static int efx_bind_neigh(struct efx_nic *efx, /* don't need our new entry */ put_net_track(neigh->net, &neigh->ns_tracker); kfree(neigh); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return PTR_ERR(old); if (!refcount_inc_not_zero(&old->ref)) return -EAGAIN; /* existing entry found, ref taken */ @@ -640,6 +642,8 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( if (old) { /* don't need our new entry */ kfree(encap); + if (IS_ERR(old)) /* oh dear, it's actually an error */ + return ERR_CAST(old); if (!refcount_inc_not_zero(&old->ref)) return ERR_PTR(-EAGAIN); /* existing entry found, ref taken */ -- cgit From db6aee6083a56ac4a6cd1b08fff7938072bcd0a3 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Mar 2023 12:29:51 +0800 Subject: i2c: mux: gpio: Add missing fwnode_handle_put() In i2c_mux_gpio_probe_fw(), we should add fwnode_handle_put() when break out of the iteration device_for_each_child_node() as it will automatically increase and decrease the refcounter. Fixes: 98b2b712bc85 ("i2c: i2c-mux-gpio: Enable this driver in ACPI land") Signed-off-by: Liang He Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-gpio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c index 5d5cbe0130cd..5ca03bd34c8d 100644 --- a/drivers/i2c/muxes/i2c-mux-gpio.c +++ b/drivers/i2c/muxes/i2c-mux-gpio.c @@ -105,8 +105,10 @@ static int i2c_mux_gpio_probe_fw(struct gpiomux *mux, } else if (is_acpi_node(child)) { rc = acpi_get_local_address(ACPI_HANDLE_FWNODE(child), values + i); - if (rc) + if (rc) { + fwnode_handle_put(child); return dev_err_probe(dev, rc, "Cannot get address\n"); + } } i++; -- cgit From 9fc5f9a92fe6897dbed7b9295b234cb7e3cc9d11 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 20 Sep 2023 01:41:19 -0700 Subject: RDMA/bnxt_re: Fix the handling of control path response data Flag that indicate control path command completion should be cleared only after copying the command response data. As soon as the is_in_used flag is clear, the waiting thread can proceed with wrong response data. This wrong data is causing multiple issues like wrong lkey used in data traffic and wrong AH Id etc. Use a memory barrier to ensure that the response data is copied and visible to the process waiting on a different cpu core before clearing the is_in_used flag. Clear the is_in_used after copying the command response. Fixes: bcfee4ce3e01 ("RDMA/bnxt_re: remove redundant cmdq_bitmap") Signed-off-by: Saravanan Vajravel Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1695199280-13520-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index c8c4017fe405..e47b4ca64d33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -665,7 +665,6 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; crsqe = &rcfw->crsqe_tbl[cookie]; - crsqe->is_in_used = false; if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags), @@ -681,8 +680,14 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, atomic_dec(&rcfw->timeout_send); if (crsqe->is_waiter_alive) { - if (crsqe->resp) + if (crsqe->resp) { memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); + /* Insert write memory barrier to ensure that + * response data is copied before clearing the + * flags + */ + smp_wmb(); + } if (!blocked) wait_cmds++; } @@ -694,6 +699,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, if (!is_waiter_alive) crsqe->resp = NULL; + crsqe->is_in_used = false; + hwq->cons += req_size; /* This is a case to handle below scenario - -- cgit From a83c69278975227b689b4a016d1fc8e4820756e9 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 20 Sep 2023 01:41:20 -0700 Subject: RDMA/bnxt_re: Decrement resource stats correctly rc_qp_count and ud_qp_count is not decremented during qp destroy. Fix this. Fixes: cb95709e0dca ("bnxt_re: Update the hw counters for resource stats") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1695199280-13520-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0848c2c2ffcf..faa88d12ee86 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -910,6 +910,10 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) list_del(&qp->list); mutex_unlock(&rdev->qp_lock); atomic_dec(&rdev->stats.res.qp_count); + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC) + atomic_dec(&rdev->stats.res.rc_qp_count); + else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) + atomic_dec(&rdev->stats.res.ud_qp_count); ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); -- cgit From 3b6c4a11bf2b810f772f5c2c1ef6eef3fc268246 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 20 Sep 2023 17:04:01 +0100 Subject: soundwire: bus: Make IRQ handling conditionally built SoundWire has provisions for a simple callback for the IRQ handling so has no hard dependency on IRQ_DOMAIN, but the recent addition of IRQ handling was causing builds without IRQ_DOMAIN to fail. Resolve this by moving the IRQ handling into its own file and only add it to the build when IRQ_DOMAIN is included in the kernel. Fixes: 12a95123bfe1 ("soundwire: bus: Allow SoundWire peripherals to register IRQ handlers") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309150522.MoKeF4jx-lkp@intel.com/ Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230920160401.854052-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/Makefile | 4 +++ drivers/soundwire/bus.c | 31 ++++------------------- drivers/soundwire/bus_type.c | 11 +++------ drivers/soundwire/irq.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ drivers/soundwire/irq.h | 43 ++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 33 deletions(-) create mode 100644 drivers/soundwire/irq.c create mode 100644 drivers/soundwire/irq.h diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile index c3d3ab3262d3..657f5888a77b 100644 --- a/drivers/soundwire/Makefile +++ b/drivers/soundwire/Makefile @@ -15,6 +15,10 @@ ifdef CONFIG_DEBUG_FS soundwire-bus-y += debugfs.o endif +ifdef CONFIG_IRQ_DOMAIN +soundwire-bus-y += irq.o +endif + #AMD driver soundwire-amd-y := amd_manager.o obj-$(CONFIG_SOUNDWIRE_AMD) += soundwire-amd.o diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 1720031f35a3..0e7bc3c40f9d 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -3,13 +3,13 @@ #include #include -#include #include #include #include #include #include #include "bus.h" +#include "irq.h" #include "sysfs_local.h" static DEFINE_IDA(sdw_bus_ida); @@ -25,23 +25,6 @@ static int sdw_get_id(struct sdw_bus *bus) return 0; } -static int sdw_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - struct sdw_bus *bus = h->host_data; - - irq_set_chip_data(virq, bus); - irq_set_chip(virq, &bus->irq_chip); - irq_set_nested_thread(virq, 1); - irq_set_noprobe(virq); - - return 0; -} - -static const struct irq_domain_ops sdw_domain_ops = { - .map = sdw_irq_map, -}; - /** * sdw_bus_master_add() - add a bus Master instance * @bus: bus instance @@ -168,13 +151,9 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - bus->irq_chip.name = dev_name(bus->dev); - bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES, - &sdw_domain_ops, bus); - if (!bus->domain) { - dev_err(bus->dev, "Failed to add IRQ domain\n"); - return -EINVAL; - } + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; return 0; } @@ -213,7 +192,7 @@ void sdw_bus_master_delete(struct sdw_bus *bus) { device_for_each_child(bus->dev, NULL, sdw_delete_slave); - irq_domain_remove(bus->domain); + sdw_irq_delete(bus); sdw_master_device_del(bus); diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c index fafbc284e82d..9fa93bb923d7 100644 --- a/drivers/soundwire/bus_type.c +++ b/drivers/soundwire/bus_type.c @@ -7,6 +7,7 @@ #include #include #include "bus.h" +#include "irq.h" #include "sysfs_local.h" /** @@ -122,11 +123,8 @@ static int sdw_drv_probe(struct device *dev) if (drv->ops && drv->ops->read_prop) drv->ops->read_prop(slave); - if (slave->prop.use_domain_irq) { - slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num); - if (!slave->irq) - dev_warn(dev, "Failed to map IRQ\n"); - } + if (slave->prop.use_domain_irq) + sdw_irq_create_mapping(slave); /* init the sysfs as we have properties now */ ret = sdw_slave_sysfs_init(slave); @@ -176,8 +174,7 @@ static int sdw_drv_remove(struct device *dev) slave->probed = false; if (slave->prop.use_domain_irq) - irq_dispose_mapping(irq_find_mapping(slave->bus->domain, - slave->dev_num)); + sdw_irq_dispose_mapping(slave); mutex_unlock(&slave->sdw_dev_lock); diff --git a/drivers/soundwire/irq.c b/drivers/soundwire/irq.c new file mode 100644 index 000000000000..0c08cebb1235 --- /dev/null +++ b/drivers/soundwire/irq.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2023 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include +#include "irq.h" + +static int sdw_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct sdw_bus *bus = h->host_data; + + irq_set_chip_data(virq, bus); + irq_set_chip(virq, &bus->irq_chip); + irq_set_nested_thread(virq, 1); + irq_set_noprobe(virq); + + return 0; +} + +static const struct irq_domain_ops sdw_domain_ops = { + .map = sdw_irq_map, +}; + +int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode) +{ + bus->irq_chip.name = dev_name(bus->dev); + + bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES, + &sdw_domain_ops, bus); + if (!bus->domain) { + dev_err(bus->dev, "Failed to add IRQ domain\n"); + return -EINVAL; + } + + return 0; +} + +void sdw_irq_delete(struct sdw_bus *bus) +{ + irq_domain_remove(bus->domain); +} + +void sdw_irq_create_mapping(struct sdw_slave *slave) +{ + slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num); + if (!slave->irq) + dev_warn(&slave->dev, "Failed to map IRQ\n"); +} + +void sdw_irq_dispose_mapping(struct sdw_slave *slave) +{ + irq_dispose_mapping(irq_find_mapping(slave->bus->domain, slave->dev_num)); +} diff --git a/drivers/soundwire/irq.h b/drivers/soundwire/irq.h new file mode 100644 index 000000000000..58a58046d92b --- /dev/null +++ b/drivers/soundwire/irq.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef __SDW_IRQ_H +#define __SDW_IRQ_H + +#include +#include + +#if IS_ENABLED(CONFIG_IRQ_DOMAIN) + +int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode); +void sdw_irq_delete(struct sdw_bus *bus); +void sdw_irq_create_mapping(struct sdw_slave *slave); +void sdw_irq_dispose_mapping(struct sdw_slave *slave); + +#else /* CONFIG_IRQ_DOMAIN */ + +static inline int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode) +{ + return 0; +} + +static inline void sdw_irq_delete(struct sdw_bus *bus) +{ +} + +static inline void sdw_irq_create_mapping(struct sdw_slave *slave) +{ +} + +static inline void sdw_irq_dispose_mapping(struct sdw_slave *slave) +{ +} + +#endif /* CONFIG_IRQ_DOMAIN */ + +#endif /* __SDW_IRQ_H */ -- cgit From ef4d48368587f27cb1f690691518889d8fb3510b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Sep 2023 11:48:29 +0530 Subject: RISC-V: KVM: Fix KVM_GET_REG_LIST API for ISA_EXT registers The ISA_EXT registers to enabled/disable ISA extensions for VCPU are always available when underlying host has the corresponding ISA extension. The copy_isa_ext_reg_indices() called by the KVM_GET_REG_LIST API does not align with this expectation so let's fix it. Fixes: 031f9efafc08 ("KVM: riscv: Add KVM_GET_REG_LIST API support") Signed-off-by: Anup Patel Reviewed-by: Atish Patra Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_onereg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 1b7e9fa265cb..e7e833ced91b 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -842,7 +842,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; isa_ext = kvm_isa_ext_arr[i]; - if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext)) + if (!__riscv_isa_extension_available(NULL, isa_ext)) continue; if (uindices) { -- cgit From 17f71a2a340f1dcd397a66110005722177d5927c Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Sep 2023 11:58:29 +0530 Subject: RISC-V: KVM: Fix riscv_vcpu_get_isa_ext_single() for missing extensions The riscv_vcpu_get_isa_ext_single() should fail with -ENOENT error when corresponding ISA extension is not available on the host. Fixes: e98b1085be79 ("RISC-V: KVM: Factor-out ONE_REG related code to its own source file") Signed-off-by: Anup Patel Reviewed-by: Atish Patra Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_onereg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index e7e833ced91b..b7e0e03c69b1 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -460,8 +460,11 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) return -ENOENT; - *reg_val = 0; host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) + return -ENOENT; + + *reg_val = 0; if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) *reg_val = 1; /* Mark the given extension as available */ -- cgit From ba1af6e2e0f0e814c0f6be6ef64917c212f9fa96 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Sep 2023 14:29:19 +0530 Subject: KVM: riscv: selftests: Fix ISA_EXT register handling in get-reg-list Same set of ISA_EXT registers are not present on all host because ISA_EXT registers are visible to the KVM user space based on the ISA extensions available on the host. Also, disabling an ISA extension using corresponding ISA_EXT register does not affect the visibility of the ISA_EXT register itself. Based on the above, we should filter-out all ISA_EXT registers. Fixes: 477069398ed6 ("KVM: riscv: selftests: Add get-reg-list test") Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 35 ++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index d8ecacd03ecf..76c0ad11e423 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -14,17 +14,33 @@ bool filter_reg(__u64 reg) { + switch (reg & ~REG_MASK) { /* - * Some ISA extensions are optional and not present on all host, - * but they can't be disabled through ISA_EXT registers when present. - * So, to make life easy, just filtering out these kind of registers. + * Same set of ISA_EXT registers are not present on all host because + * ISA_EXT registers are visible to the KVM user space based on the + * ISA extensions available on the host. Also, disabling an ISA + * extension using corresponding ISA_EXT register does not affect + * the visibility of the ISA_EXT register itself. + * + * Based on above, we should filter-out all ISA_EXT registers. */ - switch (reg & ~REG_MASK) { + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: @@ -50,12 +66,7 @@ static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) unsigned long value; ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); - if (ret) { - printf("Failed to get ext %d", ext); - return false; - } - - return !!value; + return (ret) ? false : !!value; } void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) @@ -506,10 +517,6 @@ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, -- cgit From 071ef070ca77e6dfe33fd78afa293e83422f0411 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Sep 2023 10:55:55 +0530 Subject: KVM: riscv: selftests: Selectively filter-out AIA registers Currently the AIA ONE_REG registers are reported by get-reg-list as new registers for various vcpu_reg_list configs whenever Ssaia is available on the host because Ssaia extension can only be disabled by Smstateen extension which is not always available. To tackle this, we should filter-out AIA ONE_REG registers only when Ssaia can't be disabled for a VCPU. Fixes: 477069398ed6 ("KVM: riscv: selftests: Add get-reg-list test") Signed-off-by: Anup Patel Reviewed-by: Atish Patra Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 76c0ad11e423..9f99ea42f45f 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -12,6 +12,8 @@ #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) +static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; + bool filter_reg(__u64 reg) { switch (reg & ~REG_MASK) { @@ -48,6 +50,15 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: return true; + /* AIA registers are always available when Ssaia can't be disabled */ + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA]; default: break; } @@ -71,14 +82,22 @@ static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { + unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; struct vcpu_reg_sublist *s; + int rc; + + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) + __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]); /* * Disable all extensions which were enabled by default * if they were available in the risc-v host. */ - for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) - __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + if (rc && isa_ext_state[i]) + isa_ext_cant_disable[i] = true; + } for_each_sublist(c, s) { if (!s->feature) -- cgit From 50107e8b2a8a59d8cec7e8454e27c1f8e365acdb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Sep 2023 17:39:14 -0700 Subject: KVM: x86/mmu: Open code leaf invalidation from mmu_notifier The mmu_notifier path is a bit of a special snowflake, e.g. it zaps only a single address space (because it's per-slot), and can't always yield. Because of this, it calls kvm_tdp_mmu_zap_leafs() in ways that no one else does. Iterate manually over the leafs in response to an mmu_notifier invalidation, instead of invoking kvm_tdp_mmu_zap_leafs(). Drop the @can_yield param from kvm_tdp_mmu_zap_leafs() as its sole remaining caller unconditionally passes "true". Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230916003916.2545000-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 13 +++++++++---- arch/x86/kvm/mmu/tdp_mmu.h | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e1d011c67cc6..59f5e40b8f55 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6260,7 +6260,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) if (tdp_mmu_enabled) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, - gfn_end, true, flush); + gfn_end, flush); } if (flush) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6c63f2d1675f..9c081591652b 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -878,12 +878,12 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, * more SPTEs were zapped since the MMU lock was last acquired. */ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, - bool can_yield, bool flush) + bool flush) { struct kvm_mmu_page *root; for_each_tdp_mmu_root_yield_safe(kvm, root, as_id) - flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush); + flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; } @@ -1146,8 +1146,13 @@ retry: bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start, - range->end, range->may_block, flush); + struct kvm_mmu_page *root; + + for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id) + flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, + range->may_block, flush); + + return flush; } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 0a63b1afabd3..eb4fa345d3a4 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -20,8 +20,8 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared); -bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, - gfn_t end, bool can_yield, bool flush); +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, + bool flush); bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm); -- cgit From 2d3465a75c9f83684d17da6807423824bf260524 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:50 -0400 Subject: phy: qcom-qmp-usb: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Signed-off-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230828152353.16529-2-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0130bb8e809a..e49262ce6d91 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1703,6 +1703,7 @@ static int qmp_usb_power_on(struct phy *phy) void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -1726,6 +1727,9 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_usb_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); -- cgit From c599dc5cca4dd6a5c664e4a8837246e68a96cb4c Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:51 -0400 Subject: phy: qcom-qmp-usb: split PCS_USB init table for sc8280xp and sa8775p For sc8280xp and sa8775p, PCS and PCS_USB initialization data is described in the same table, thus the pcs_usb offset is not being applied during initialization of PCS_USB registers. Fix this by adding the appropriate pcs_usb_tbl tables. Fixes: 8bd2d6e11c99 ("phy: qcom-qmp: Add SA8775P USB3 UNI phy") Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230828152353.16529-3-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index e49262ce6d91..c69577601ae0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1112,8 +1112,6 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1122,6 +1120,11 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), +}; + static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG1, 0xc4), QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG2, 0x89), @@ -1131,9 +1134,6 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1142,6 +1142,12 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), +}; + struct qmp_usb_offsets { u16 serdes; u16 pcs; @@ -1383,6 +1389,8 @@ static const struct qmp_phy_cfg sa8775p_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sa8775p_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sa8775p_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, @@ -1405,6 +1413,8 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sc8280xp_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sc8280xp_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, -- cgit From 211de9681195b92709b5d68e07af948b01c8bb9a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 23 Aug 2023 21:17:28 +0300 Subject: dt-bindings: phy: qcom,ipq8074-qmp-pcie: fix warning regarding reg size Fix the 'reg is too long' warning caused by me adding 64-bit address and size to the example, while default being 32-bit (cell size equal to 1). Reported-by: Rob Herring Fixes: 505fb2541678 ("dt-bindings: phy: migrate QMP PCIe PHY bindings to qcom,sc8280xp-qmp-pcie-phy.yaml") Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230823181728.3082946-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 5073007267ad..634cec5d57ea 100644 --- a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -70,7 +70,7 @@ examples: phy@84000 { compatible = "qcom,ipq6018-qmp-pcie-phy"; - reg = <0x0 0x00084000 0x0 0x1000>; + reg = <0x00084000 0x1000>; clocks = <&gcc GCC_PCIE0_AUX_CLK>, <&gcc GCC_PCIE0_AHB_CLK>, -- cgit From 5f7cd740a6b657fba775bde744496e5ed21851ca Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:13:45 +0800 Subject: phy: qcom: phy-qcom-m31: fix wrong pointer pass to PTR_ERR() It should be 'qphy->vreg' passed to PTR_ERR() when devm_regulator_get() fails. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824091345.1072650-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index ed08072ca032..99d570f4142a 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -256,7 +256,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); -- cgit From 426e05ce126e8febc21fae643139a1072d2670ad Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:23:56 +0800 Subject: phy: qcom: phy-qcom-m31: change m31_ipq5332_regs to static m31_ipq5332_regs is only used in phy-qcom-m31.c now, change it to static. Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824092356.1154839-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 99d570f4142a..014278e5428c 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -82,7 +82,7 @@ struct m31_priv_data { unsigned int nregs; }; -struct m31_phy_regs m31_ipq5332_regs[] = { +static struct m31_phy_regs m31_ipq5332_regs[] = { { USB_PHY_CFG0, UTMI_PHY_OVERRIDE_EN, -- cgit From 6ee8a9a772b5abd9a9791123ca7aee2dbf126d5f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 1 Sep 2023 15:52:31 +0800 Subject: phy: realtek: usb: Drop unnecessary error check for debugfs_create_dir() Both debugfs_create_dir() and debugfs_create_file() return ERR_PTR and never return NULL. As Greg suggested, this patch removes the error checking for debugfs_create_dir in phy-rtk-usb2.c and phy-rtk-usb3.c. This is because the DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. The debugfs APIs have a IS_ERR() judge in start_creating() which can handle it gracefully. So these checks are unnecessary. Fixes: 134e6d25f6bd ("phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY") Fixes: adda6e82a7de ("phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY") Signed-off-by: Jinjie Ruan Suggested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230901075231.1368947-1-ruanjinjie@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/realtek/phy-rtk-usb2.c | 10 ++-------- drivers/phy/realtek/phy-rtk-usb3.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c index 5e7ee060b404..aedc78bd37f7 100644 --- a/drivers/phy/realtek/phy-rtk-usb2.c +++ b/drivers/phy/realtek/phy-rtk-usb2.c @@ -853,17 +853,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb2_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb2_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c index 7881f908aade..dfb3122f3f11 100644 --- a/drivers/phy/realtek/phy-rtk-usb3.c +++ b/drivers/phy/realtek/phy-rtk-usb3.c @@ -416,17 +416,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) return; rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb3_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb3_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) -- cgit From ecec1de5c58f8f3ab6959fcf8d68752eeb65311d Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Thu, 7 Sep 2023 12:20:52 +0530 Subject: phy: qcom: m31: Remove unwanted qphy->vreg is NULL check Fix the following Smatch complaint: drivers/phy/qualcomm/phy-qcom-m31.c:175 m31usb_phy_init() warn: variable dereferenced before check 'qphy->vreg' (see line 167) drivers/phy/qualcomm/phy-qcom-m31.c 166 167 ret = regulator_enable(qphy->vreg); ^^^^^^^^^^ Unchecked dereference 168 if (ret) { 169 dev_err(&phy->dev, "failed to enable regulator, %d\n", ret); 170 return ret; 171 } 172 173 ret = clk_prepare_enable(qphy->clk); 174 if (ret) { 175 if (qphy->vreg) ^^^^^^^^^^ Checked too late 176 regulator_disable(qphy->vreg); 177 dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); Since the phy will not get registered if qphy->vreg is NULL, this check is not needed. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-phy/cbd26132-c624-44b7-a073-73222b287338@moroto.mountain/T/#u Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/1694069452-3794-1-git-send-email-quic_varada@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 014278e5428c..5cb7e79b99b3 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -172,8 +172,7 @@ static int m31usb_phy_init(struct phy *phy) ret = clk_prepare_enable(qphy->clk); if (ret) { - if (qphy->vreg) - regulator_disable(qphy->vreg); + regulator_disable(qphy->vreg); dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); return ret; } -- cgit From 112c23705c6dc59a05290c8e3e597e1b4e9c23fc Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:14 +0200 Subject: phy: qcom-qmp-combo: Square out 8550 POWER_STATE_CONFIG1 There are two instances of the POWER_STATE_CONFIG1 register: one in the PCS space and another one in PCS_USB. The downstream init sequence pokes the latter one while we've been poking the former one (and misnamed it as the latter one, impostor!). Fix that up to avoid UB. Fixes: 49742e9edab3 ("phy: qcom-qmp-combo: Add support for SM8550") Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-1-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cbb28afce135..8fd240dd5127 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -859,10 +859,10 @@ static const struct qmp_phy_init_tbl sm8550_usb3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG1, 0x4b), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG5, 0x10), - QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), }; static const struct qmp_phy_init_tbl sm8550_usb3_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40), diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h index 9510e63ba9d8..c38530d6776b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h @@ -12,7 +12,7 @@ #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG3 0xcc #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG6 0xd8 #define QPHY_USB_V6_PCS_REFGEN_REQ_CONFIG1 0xdc -#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x90 +#define QPHY_USB_V6_PCS_POWER_STATE_CONFIG1 0x90 #define QPHY_USB_V6_PCS_RX_SIGDET_LVL 0x188 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 @@ -23,6 +23,7 @@ #define QPHY_USB_V6_PCS_EQ_CONFIG1 0x1dc #define QPHY_USB_V6_PCS_EQ_CONFIG5 0x1ec +#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x00 #define QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL 0x18 #define QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2 0x3c #define QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L 0x40 -- cgit From 76d20290d0c66a84a7a40c6231e73d1ab25994e5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:15 +0200 Subject: phy: qcom-qmp-combo: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Reported-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-2-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8fd240dd5127..5e6fc8103e9d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2555,6 +2555,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -2576,6 +2577,9 @@ static int qmp_combo_usb_power_on(struct phy *phy) qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_combo_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); -- cgit From 11395c32f9e9e26f2f6281bd916a1161ba42ee6c Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Tue, 12 Sep 2023 07:46:46 -0400 Subject: phy: qualcomm: Fix typos in comments Fix typo in the description of the 'succesfully'. Signed-off-by: Bo Liu Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230912114646.8452-1-liubo03@inspur.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-apq8064-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 8814f4322adf..3642a5d4f2f3 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -152,7 +152,7 @@ static int qcom_apq8064_sata_phy_init(struct phy *generic_phy) return ret; } - /* SATA phy calibrated succesfully, power up to functional mode */ + /* SATA phy calibrated successfully, power up to functional mode */ writel_relaxed(0x3E, base + SATA_PHY_POW_DWN_CTRL1); writel_relaxed(0x01, base + SATA_PHY_RX_IMCAL0); writel_relaxed(0x01, base + SATA_PHY_TX_IMCAL0); -- cgit From d93eeca627db512a56145285dc94feac5b88a1d4 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 21 Sep 2023 15:20:41 +0800 Subject: ALSA: hda/realtek - ALC287 merge RTK codec with CS CS35L41 AMP This is merge model ALC287_FIXUP_THINKPAD_I2S_SPK and ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI. Signed-off-by: Kailang Yang Fixes: f7b069cf0881 ("ALSA: hda/realtek: Fix generic fixup definition for cs35l41 amp") Link: https://lore.kernel.org/r/82a45234327c4c50b4988a27e9f64c37@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 751783f3a15c..48bb57bd8d11 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7343,6 +7343,7 @@ enum { ALC245_FIXUP_HP_MUTE_LED_COEFBIT, ALC245_FIXUP_HP_X360_MUTE_LEDS, ALC287_FIXUP_THINKPAD_I2S_SPK, + ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9441,6 +9442,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, }, + [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9988,14 +9995,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), - SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), -- cgit From bc3b6f59463ba9f4367a80331213db491766b5a1 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 19 Sep 2023 15:39:48 +0300 Subject: MAINTAINERS: Add x86 platform drivers patchwork MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add x86 platform drivers patchwork which has been missing from MAINTAINERS. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230919123948.1583-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Hans de Goede --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b04cbcec521f..dbf1668dcd84 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23430,6 +23430,7 @@ M: Ilpo Järvinen M: Mark Gross L: platform-driver-x86@vger.kernel.org S: Maintained +Q: https://patchwork.kernel.org/project/platform-driver-x86/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git F: drivers/platform/olpc/ F: drivers/platform/x86/ -- cgit From 20218dfbaa31b8d3ef842fafcc7eb4c6aa03f80a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Sep 2023 12:15:23 -0400 Subject: btrfs: make sure to initialize start and len in find_free_dev_extent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jens reported a compiler error when using CONFIG_CC_OPTIMIZE_FOR_SIZE=y that looks like this In function ‘gather_device_info’, inlined from ‘btrfs_create_chunk’ at fs/btrfs/volumes.c:5507:8: fs/btrfs/volumes.c:5245:48: warning: ‘dev_offset’ may be used uninitialized [-Wmaybe-uninitialized] 5245 | devices_info[ndevs].dev_offset = dev_offset; | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~ fs/btrfs/volumes.c: In function ‘btrfs_create_chunk’: fs/btrfs/volumes.c:5196:13: note: ‘dev_offset’ was declared here 5196 | u64 dev_offset; This occurs because find_free_dev_extent is responsible for setting dev_offset, however if we get an -ENOMEM at the top of the function we'll return without setting the value. This isn't actually a problem because we will see the -ENOMEM in gather_device_info() and return and not use the uninitialized value, however we also just don't want the compiler warning so rework the code slightly in find_free_dev_extent() to make sure it's always setting *start and *len to avoid the compiler warning. Reported-by: Jens Axboe Tested-by: Jens Axboe Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 733842136163..e3a3769fd92e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1594,7 +1594,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 search_start; u64 hole_size; u64 max_hole_start; - u64 max_hole_size; + u64 max_hole_size = 0; u64 extent_end; u64 search_end = device->total_bytes; int ret; @@ -1602,17 +1602,16 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, struct extent_buffer *l; search_start = dev_extent_search_start(device); + max_hole_start = search_start; WARN_ON(device->zone_info && !IS_ALIGNED(num_bytes, device->zone_info->zone_size)); path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - max_hole_start = search_start; - max_hole_size = 0; - + if (!path) { + ret = -ENOMEM; + goto out; + } again: if (search_start >= search_end || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { -- cgit From b4c639f699349880b7918b861e1bd360442ec450 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Sep 2023 12:15:24 -0400 Subject: btrfs: initialize start_slot in btrfs_log_prealloc_extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jens reported a compiler warning when using CONFIG_CC_OPTIMIZE_FOR_SIZE=y that looks like this fs/btrfs/tree-log.c: In function ‘btrfs_log_prealloc_extents’: fs/btrfs/tree-log.c:4828:23: warning: ‘start_slot’ may be used uninitialized [-Wmaybe-uninitialized] 4828 | ret = copy_items(trans, inode, dst_path, path, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4829 | start_slot, ins_nr, 1, 0); | ~~~~~~~~~~~~~~~~~~~~~~~~~ fs/btrfs/tree-log.c:4725:13: note: ‘start_slot’ was declared here 4725 | int start_slot; | ^~~~~~~~~~ The compiler is incorrect, as we only use this code when ins_len > 0, and when ins_len > 0 we have start_slot properly initialized. However we generally find the -Wmaybe-uninitialized warnings valuable, so initialize start_slot to get rid of the warning. Reported-by: Jens Axboe Tested-by: Jens Axboe Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b9229c08164f..537eb3de8809 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4722,7 +4722,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int slot; int ins_nr = 0; - int start_slot; + int start_slot = 0; int ret; if (!(inode->flags & BTRFS_INODE_PREALLOC)) -- cgit From cd4aece493f99f95d41edcce32927d70a5dde923 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 Sep 2023 15:05:06 +0200 Subject: ACPI: EC: Add quirk for the HP Pavilion Gaming 15-dk1xxx Added GPE quirk entry for the HP Pavilion Gaming 15-dk1xxx. There is a quirk entry for 2 15-c..... laptops, this is for a new version which has 15-dk1xxx as identifier. This fixes the LID switch and rfkill and brightness hotkeys not working. Closes: https://github.com/systemd/systemd/issues/28942 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 660834a49c1f..c95d0edb0be9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1913,6 +1913,17 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "HP 15-cx0041ur"), }, }, + { + /* + * HP Pavilion Gaming Laptop 15-dk1xxx + * https://github.com/systemd/systemd/issues/28942 + */ + .callback = ec_honor_dsdt_gpe, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Gaming Laptop 15-dk1xxx"), + }, + }, { /* * Samsung hardware -- cgit From f2f11fca5d7112e2f91c4854cddd68a059fdaa4a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 19 Sep 2023 23:19:29 +0900 Subject: ksmbd: return invalid parameter error response if smb2 request is invalid If smb2 request from client is invalid, The following kernel oops could happen. The patch e2b76ab8b5c9: "ksmbd: add support for read compound" leads this issue. When request is invalid, It doesn't set anything in the response buffer. This patch add missing set invalid parameter error response. [ 673.085542] ksmbd: cli req too short, len 184 not 142. cmd:5 mid:109 [ 673.085580] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 673.085591] #PF: supervisor read access in kernel mode [ 673.085600] #PF: error_code(0x0000) - not-present page [ 673.085608] PGD 0 P4D 0 [ 673.085620] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 673.085631] CPU: 3 PID: 1039 Comm: kworker/3:0 Not tainted 6.6.0-rc2-tmt #16 [ 673.085643] Hardware name: AZW U59/U59, BIOS JTKT001 05/05/2022 [ 673.085651] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] [ 673.085719] RIP: 0010:ksmbd_conn_write+0x68/0xc0 [ksmbd] [ 673.085808] RAX: 0000000000000000 RBX: ffff88811ade4f00 RCX: 0000000000000000 [ 673.085817] RDX: 0000000000000000 RSI: ffff88810c2a9780 RDI: ffff88810c2a9ac0 [ 673.085826] RBP: ffffc900005e3e00 R08: 0000000000000000 R09: 0000000000000000 [ 673.085834] R10: ffffffffa3168160 R11: 63203a64626d736b R12: ffff8881057c8800 [ 673.085842] R13: ffff8881057c8820 R14: ffff8882781b2380 R15: ffff8881057c8800 [ 673.085852] FS: 0000000000000000(0000) GS:ffff888278180000(0000) knlGS:0000000000000000 [ 673.085864] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 673.085872] CR2: 0000000000000000 CR3: 000000015b63c000 CR4: 0000000000350ee0 [ 673.085883] Call Trace: [ 673.085890] [ 673.085900] ? show_regs+0x6a/0x80 [ 673.085916] ? __die+0x25/0x70 [ 673.085926] ? page_fault_oops+0x154/0x4b0 [ 673.085938] ? tick_nohz_tick_stopped+0x18/0x50 [ 673.085954] ? __irq_work_queue_local+0xba/0x140 [ 673.085967] ? do_user_addr_fault+0x30f/0x6c0 [ 673.085979] ? exc_page_fault+0x79/0x180 [ 673.085992] ? asm_exc_page_fault+0x27/0x30 [ 673.086009] ? ksmbd_conn_write+0x68/0xc0 [ksmbd] [ 673.086067] ? ksmbd_conn_write+0x46/0xc0 [ksmbd] [ 673.086123] handle_ksmbd_work+0x28d/0x4b0 [ksmbd] [ 673.086177] process_one_work+0x178/0x350 [ 673.086193] ? __pfx_worker_thread+0x10/0x10 [ 673.086202] worker_thread+0x2f3/0x420 [ 673.086210] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ 673.086222] ? __pfx_worker_thread+0x10/0x10 [ 673.086230] kthread+0x103/0x140 [ 673.086242] ? __pfx_kthread+0x10/0x10 [ 673.086253] ret_from_fork+0x39/0x60 [ 673.086263] ? __pfx_kthread+0x10/0x10 [ 673.086274] ret_from_fork_asm+0x1b/0x30 Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: Tom Talpey Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/server.c | 4 +++- fs/smb/server/smb2misc.c | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 5ab2f52f9b35..32347fec33c4 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -115,8 +115,10 @@ static int __process_request(struct ksmbd_work *work, struct ksmbd_conn *conn, if (check_conn_state(work)) return SERVER_HANDLER_CONTINUE; - if (ksmbd_verify_smb_message(work)) + if (ksmbd_verify_smb_message(work)) { + conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); return SERVER_HANDLER_ABORT; + } command = conn->ops->get_cmd_val(work); *cmd = command; diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index e881df1d10cb..23bd3d1209df 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -440,10 +440,8 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) validate_credit: if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && - smb2_validate_credit_charge(work->conn, hdr)) { - work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); + smb2_validate_credit_charge(work->conn, hdr)) return 1; - } return 0; } -- cgit From 73f949ea87c7d697210653501ca21efe57295327 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 21 Sep 2023 15:37:06 +0900 Subject: ksmbd: check iov vector index in ksmbd_conn_write() If ->iov_idx is zero, This means that the iov vector for the response was not added during the request process. In other words, it means that there is a problem in generating a response, So this patch return as an error to avoid NULL pointer dereferencing problem. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 0d990c2f33cd..db7fa704a3f6 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -197,6 +197,9 @@ int ksmbd_conn_write(struct ksmbd_work *work) if (work->send_no_response) return 0; + if (!work->iov_idx) + return -EINVAL; + ksmbd_conn_lock(conn); sent = conn->transport->ops->writev(conn->transport, work->iov, work->iov_cnt, -- cgit From 2132df16f53b4f01ab25f5d404f36a22244ae342 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 Sep 2023 11:20:34 +0900 Subject: scsi: core: ata: Do no try to probe for CDL on old drives Some old drives (e.g. an Ultra320 SCSI disk as reported by John) do not seem to execute MAINTENANCE_IN / MI_REPORT_SUPPORTED_OPERATION_CODES commands correctly and hang when a non-zero service action is specified (one command format with service action case in scsi_report_opcode()). Currently, CDL probing with scsi_cdl_check_cmd() is the only caller using a non zero service action for scsi_report_opcode(). To avoid issues with these old drives, do not attempt CDL probe if the device reports support for an SPC version lower than 5 (CDL was introduced in SPC-5). To keep things working with ATA devices which probe for the CDL T2A and T2B pages introduced with SPC-6, modify ata_scsiop_inq_std() to claim SPC-6 version compatibility for ATA drives supporting CDL. SPC-6 standard version number is defined as Dh (= 13) in SPC-6 r09. Fix scsi_probe_lun() to correctly capture this value by changing the bit mask for the second byte of the INQUIRY response from 0x7 to 0xf. include/scsi/scsi.h is modified to add the definition SCSI_SPC_6 with the value 14 (Dh + 1). The missing definitions for the SCSI_SPC_4 and SCSI_SPC_5 versions are also added. Reported-by: John David Anglin Fixes: 624885209f31 ("scsi: core: Detect support for command duration limits") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230915022034.678121-1-dlemoal@kernel.org Tested-by: David Gow Reviewed-by: Bart Van Assche Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 3 +++ drivers/scsi/scsi.c | 11 +++++++++++ drivers/scsi/scsi_scan.c | 2 +- include/scsi/scsi.h | 3 +++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d3f28b82c97b..0e96ed408c71 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1835,6 +1835,9 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) hdr[2] = 0x7; /* claim SPC-5 version compatibility */ } + if (args->dev->flags & ATA_DFLAG_CDL) + hdr[2] = 0xd; /* claim SPC-6 version compatibility */ + memcpy(rbuf, hdr, sizeof(hdr)); memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index d0911bc28663..89367c4bf0ef 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -613,6 +613,17 @@ void scsi_cdl_check(struct scsi_device *sdev) bool cdl_supported; unsigned char *buf; + /* + * Support for CDL was defined in SPC-5. Ignore devices reporting an + * lower SPC version. This also avoids problems with old drives choking + * on MAINTENANCE_IN / MI_REPORT_SUPPORTED_OPERATION_CODES with a + * service action specified, as done in scsi_cdl_check_cmd(). + */ + if (sdev->scsi_level < SCSI_SPC_5) { + sdev->cdl_supported = 0; + return; + } + buf = kmalloc(SCSI_CDL_CHECK_BUF_LEN, GFP_KERNEL); if (!buf) { sdev->cdl_supported = 0; diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 52014b2d39e1..eaa972bee6c0 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -822,7 +822,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, * device is attached at LUN 0 (SCSI_SCAN_TARGET_PRESENT) so * non-zero LUNs can be scanned. */ - sdev->scsi_level = inq_result[2] & 0x07; + sdev->scsi_level = inq_result[2] & 0x0f; if (sdev->scsi_level >= 2 || (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1)) sdev->scsi_level++; diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index ec093594ba53..4498f845b112 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -157,6 +157,9 @@ enum scsi_disposition { #define SCSI_3 4 /* SPC */ #define SCSI_SPC_2 5 #define SCSI_SPC_3 6 +#define SCSI_SPC_4 7 +#define SCSI_SPC_5 8 +#define SCSI_SPC_6 14 /* * INQ PERIPHERAL QUALIFIERS -- cgit From b41b28366d3b176c8297961de4f095f2e392402d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 19 Sep 2023 08:22:25 +1000 Subject: MAINTAINERS: remove myself as nouveau maintainer I have resigned, and will no longer be taking as active a role in nouveau development. Signed-off-by: Ben Skeggs Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230918222225.8629-1-skeggsb@gmail.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bf0f54c24f81..c9172a592bdf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6645,7 +6645,6 @@ F: Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS -M: Ben Skeggs M: Karol Herbst M: Lyude Paul L: dri-devel@lists.freedesktop.org -- cgit From a59addacf899b1b21a7b7449a1c52c98704c2472 Mon Sep 17 00:00:00 2001 From: Alexandra Diupina Date: Tue, 19 Sep 2023 17:25:02 +0300 Subject: drivers/net: process the result of hdlc_open() and add call of hdlc_close() in uhdlc_close() Process the result of hdlc_open() and call uhdlc_close() in case of an error. It is necessary to pass the error code up the control flow, similar to a possible error in request_irq(). Also add a hdlc_close() call to the uhdlc_close() because the comment to hdlc_close() says it must be called by the hardware driver when the HDLC device is being closed Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Alexandra Diupina Reviewed-by: Christophe Leroy Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 47c2ad7a3e42..fd50bb313b92 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -34,6 +34,8 @@ #define TDM_PPPOHT_SLIC_MAXIN #define RX_BD_ERRORS (R_CD_S | R_OV_S | R_CR_S | R_AB_S | R_NO_S | R_LG_S) +static int uhdlc_close(struct net_device *dev); + static struct ucc_tdm_info utdm_primary_info = { .uf_info = { .tsa = 0, @@ -708,6 +710,7 @@ static int uhdlc_open(struct net_device *dev) hdlc_device *hdlc = dev_to_hdlc(dev); struct ucc_hdlc_private *priv = hdlc->priv; struct ucc_tdm *utdm = priv->utdm; + int rc = 0; if (priv->hdlc_busy != 1) { if (request_irq(priv->ut_info->uf_info.irq, @@ -731,10 +734,13 @@ static int uhdlc_open(struct net_device *dev) napi_enable(&priv->napi); netdev_reset_queue(dev); netif_start_queue(dev); - hdlc_open(dev); + + rc = hdlc_open(dev); + if (rc) + uhdlc_close(dev); } - return 0; + return rc; } static void uhdlc_memclean(struct ucc_hdlc_private *priv) @@ -824,6 +830,8 @@ static int uhdlc_close(struct net_device *dev) netdev_reset_queue(dev); priv->hdlc_busy = 0; + hdlc_close(dev); + return 0; } -- cgit From 684e45e120b82deccaf8b85633905304a3bbf56d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 19 Sep 2023 21:47:47 +0200 Subject: wifi: mt76: mt76x02: fix MT76x0 external LNA gain handling On MT76x0, LNA gain should be applied for both external and internal LNA. On MT76x2, LNA gain should be treated as 0 for external LNA. Move the LNA type based logic to mt76x2 in order to fix mt76x0. Fixes: 2daa67588f34 ("mt76x0: unify lna_gain parsing") Reported-by: Shiji Yang Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230919194747.31647-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c | 7 ------- drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c | 13 +++++++++++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c index 0acabba2d1a5..5d402cf2951c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c @@ -131,15 +131,8 @@ u8 mt76x02_get_lna_gain(struct mt76x02_dev *dev, s8 *lna_2g, s8 *lna_5g, struct ieee80211_channel *chan) { - u16 val; u8 lna; - val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1); - if (val & MT_EE_NIC_CONF_1_LNA_EXT_2G) - *lna_2g = 0; - if (val & MT_EE_NIC_CONF_1_LNA_EXT_5G) - memset(lna_5g, 0, sizeof(s8) * 3); - if (chan->band == NL80211_BAND_2GHZ) lna = *lna_2g; else if (chan->hw_value <= 64) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c index d5809408d1d3..8c01855885ce 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c @@ -256,7 +256,8 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev) struct ieee80211_channel *chan = dev->mphy.chandef.chan; int channel = chan->hw_value; s8 lna_5g[3], lna_2g; - u8 lna; + bool use_lna; + u8 lna = 0; u16 val; if (chan->band == NL80211_BAND_2GHZ) @@ -275,7 +276,15 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev) dev->cal.rx.mcu_gain |= (lna_5g[1] & 0xff) << 16; dev->cal.rx.mcu_gain |= (lna_5g[2] & 0xff) << 24; - lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan); + val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1); + if (chan->band == NL80211_BAND_2GHZ) + use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_2G); + else + use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_5G); + + if (use_lna) + lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan); + dev->cal.rx.lna_gain = mt76x02_sign_extend(lna, 8); } EXPORT_SYMBOL_GPL(mt76x2_read_rx_gain); -- cgit From 5cb9606a901a41f2ffe37fb8528bb6fbfb5d90e2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 Sep 2023 09:32:53 +0200 Subject: gpio: sim: fix an invalid __free() usage gpio_sim_make_line_names() returns NULL or ERR_PTR() so we must not use __free(kfree) on the returned address. Split this function into two, one that determines the size of the "gpio-line-names" array to allocate and one that actually sets the names at correct offsets. The allocation and assignment of the managed pointer happens in between. Fixes: 3faf89f27aab ("gpio: sim: simplify code with cleanup helpers") Reported-by: Alexey Dobriyan Closes: https://lore.kernel.org/all/07c32bf1-6c1a-49d9-b97d-f0ae4a2b42ab@p183/ Suggested-by: Linus Torvalds Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 60 +++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 271db3639a78..44bf1709a648 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -685,52 +686,32 @@ gpio_sim_device_config_live_show(struct config_item *item, char *page) return sprintf(page, "%c\n", live ? '1' : '0'); } -static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank, - unsigned int *line_names_size) +static unsigned int gpio_sim_get_line_names_size(struct gpio_sim_bank *bank) { - unsigned int max_offset = 0; - bool has_line_names = false; struct gpio_sim_line *line; - char **line_names; + unsigned int size = 0; list_for_each_entry(line, &bank->line_list, siblings) { - if (line->offset >= bank->num_lines) + if (!line->name || (line->offset >= bank->num_lines)) continue; - if (line->name) { - if (line->offset > max_offset) - max_offset = line->offset; - - /* - * max_offset can stay at 0 so it's not an indicator - * of whether line names were configured at all. - */ - has_line_names = true; - } + size = max(size, line->offset + 1); } - if (!has_line_names) - /* - * This is not an error - NULL means, there are no line - * names configured. - */ - return NULL; - - *line_names_size = max_offset + 1; + return size; +} - line_names = kcalloc(*line_names_size, sizeof(*line_names), GFP_KERNEL); - if (!line_names) - return ERR_PTR(-ENOMEM); +static void +gpio_sim_set_line_names(struct gpio_sim_bank *bank, char **line_names) +{ + struct gpio_sim_line *line; list_for_each_entry(line, &bank->line_list, siblings) { - if (line->offset >= bank->num_lines) + if (!line->name || (line->offset >= bank->num_lines)) continue; - if (line->name && (line->offset <= max_offset)) - line_names[line->offset] = line->name; + line_names[line->offset] = line->name; } - - return line_names; } static void gpio_sim_remove_hogs(struct gpio_sim_device *dev) @@ -834,7 +815,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank, struct fwnode_handle *parent) { struct property_entry properties[GPIO_SIM_PROP_MAX]; - unsigned int prop_idx = 0, line_names_size = 0; + unsigned int prop_idx = 0, line_names_size; char **line_names __free(kfree) = NULL; memset(properties, 0, sizeof(properties)); @@ -845,14 +826,19 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank, properties[prop_idx++] = PROPERTY_ENTRY_STRING("gpio-sim,label", bank->label); - line_names = gpio_sim_make_line_names(bank, &line_names_size); - if (IS_ERR(line_names)) - return ERR_CAST(line_names); + line_names_size = gpio_sim_get_line_names_size(bank); + if (line_names_size) { + line_names = kcalloc(line_names_size, sizeof(*line_names), + GFP_KERNEL); + if (!line_names) + return ERR_PTR(-ENOMEM); + + gpio_sim_set_line_names(bank, line_names); - if (line_names) properties[prop_idx++] = PROPERTY_ENTRY_STRING_ARRAY_LEN( "gpio-line-names", line_names, line_names_size); + } return fwnode_create_software_node(properties, parent); } -- cgit From 59851fb05d759f13662be143eff0aae605815b0e Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 20 Sep 2023 14:41:09 +0100 Subject: i2c: xiic: Correct return value check for xiic_reinit() The error paths for xiic_reinit() return negative values on failure and 0 on success - this error message therefore is triggered on _success_ rather than failure. Correct the condition so it's only shown on failure as intended. Fixes: 8fa9c9388053 ("i2c: xiic: return value of xiic_reinit") Signed-off-by: Daniel Scally Acked-by: Michal Simek Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index b3bb97762c85..71391b590ada 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -710,7 +710,7 @@ static irqreturn_t xiic_process(int irq, void *dev_id) * reset the IP instead of just flush fifos */ ret = xiic_reinit(i2c); - if (!ret) + if (ret < 0) dev_dbg(i2c->adap.dev.parent, "reinit failed\n"); if (i2c->rx_msg) { -- cgit From 23d2626b841c2adccdeb477665313c02dff02dc3 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 14 Sep 2023 19:36:04 +0530 Subject: perf/x86/amd/core: Fix overflow reset on hotplug Kernels older than v5.19 do not support PerfMonV2 and the PMI handler does not clear the overflow bits of the PerfCntrGlobalStatus register. Because of this, loading a recent kernel using kexec from an older kernel can result in inconsistent register states on Zen 4 systems. The PMI handler of the new kernel gets confused and shows a warning when an overflow occurs because some of the overflow bits are set even if the corresponding counters are inactive. These are remnants from overflows that were handled by the older kernel. During CPU hotplug, the PerfCntrGlobalCtl and PerfCntrGlobalStatus registers should always be cleared for PerfMonV2-capable processors. However, a condition used for NB event constaints applicable only to older processors currently prevents this from happening. Move the reset sequence to an appropriate place and also clear the LBR Freeze bit. Fixes: 21d59e3e2c40 ("perf/x86/amd/core: Detect PerfMonV2 support") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/882a87511af40792ba69bb0e9026f19a2e71e8a3.1694696888.git.sandipan.das@amd.com --- arch/x86/events/amd/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index abadd5f23425..ed626bfa1eed 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -534,8 +534,12 @@ static void amd_pmu_cpu_reset(int cpu) /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); - /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask); + /* + * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze + * and PerfCntrGLobalStatus.PerfCntrOvfl + */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); } static int amd_pmu_cpu_prepare(int cpu) @@ -570,6 +574,7 @@ static void amd_pmu_cpu_starting(int cpu) int i, nb_id; cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; + amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; @@ -591,8 +596,6 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; - - amd_pmu_cpu_reset(cpu); } static void amd_pmu_cpu_dead(int cpu) @@ -601,6 +604,7 @@ static void amd_pmu_cpu_dead(int cpu) kfree(cpuhw->lbr_sel); cpuhw->lbr_sel = NULL; + amd_pmu_cpu_reset(cpu); if (!x86_pmu.amd_nb_constraints) return; @@ -613,8 +617,6 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } - - amd_pmu_cpu_reset(cpu); } static inline void amd_pmu_set_global_ctl(u64 ctl) -- cgit From d5afb4b47e13161b3f33904d45110f9e6463bad6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 19 Sep 2023 22:22:57 -0700 Subject: iommu/arm-smmu-v3: Fix soft lockup triggered by arm_smmu_mm_invalidate_range When running an SVA case, the following soft lockup is triggered: -------------------------------------------------------------------- watchdog: BUG: soft lockup - CPU#244 stuck for 26s! pstate: 83400009 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 lr : arm_smmu_cmdq_issue_cmdlist+0x150/0xa50 sp : ffff8000d83ef290 x29: ffff8000d83ef290 x28: 000000003b9aca00 x27: 0000000000000000 x26: ffff8000d83ef3c0 x25: da86c0812194a0e8 x24: 0000000000000000 x23: 0000000000000040 x22: ffff8000d83ef340 x21: ffff0000c63980c0 x20: 0000000000000001 x19: ffff0000c6398080 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: ffff3000b4a3bbb0 x14: ffff3000b4a30888 x13: ffff3000b4a3cf60 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffc08120e4d6bc x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000048cfa x5 : 0000000000000000 x4 : 0000000000000001 x3 : 000000000000000a x2 : 0000000080000000 x1 : 0000000000000000 x0 : 0000000000000001 Call trace: arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 __arm_smmu_tlb_inv_range+0x118/0x254 arm_smmu_tlb_inv_range_asid+0x6c/0x130 arm_smmu_mm_invalidate_range+0xa0/0xa4 __mmu_notifier_invalidate_range_end+0x88/0x120 unmap_vmas+0x194/0x1e0 unmap_region+0xb4/0x144 do_mas_align_munmap+0x290/0x490 do_mas_munmap+0xbc/0x124 __vm_munmap+0xa8/0x19c __arm64_sys_munmap+0x28/0x50 invoke_syscall+0x78/0x11c el0_svc_common.constprop.0+0x58/0x1c0 do_el0_svc+0x34/0x60 el0_svc+0x2c/0xd4 el0t_64_sync_handler+0x114/0x140 el0t_64_sync+0x1a4/0x1a8 -------------------------------------------------------------------- Note that since 6.6-rc1 the arm_smmu_mm_invalidate_range above is renamed to "arm_smmu_mm_arch_invalidate_secondary_tlbs", yet the problem remains. The commit 06ff87bae8d3 ("arm64: mm: remove unused functions and variable protoypes") fixed a similar lockup on the CPU MMU side. Yet, it can occur to SMMU too, since arm_smmu_mm_arch_invalidate_secondary_tlbs() is called typically next to MMU tlb flush function, e.g. tlb_flush_mmu_tlbonly { tlb_flush { __flush_tlb_range { // check MAX_TLBI_OPS } } mmu_notifier_arch_invalidate_secondary_tlbs { arm_smmu_mm_arch_invalidate_secondary_tlbs { // does not check MAX_TLBI_OPS } } } Clone a CMDQ_MAX_TLBI_OPS from the MAX_TLBI_OPS in tlbflush.h, since in an SVA case SMMU uses the CPU page table, so it makes sense to align with the tlbflush code. Then, replace per-page TLBI commands with a single per-asid TLBI command, if the request size hits this threshold. Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20230920052257.8615-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 4d83edc2be99..8a16cd3ef487 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -186,6 +186,15 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) } } +/* + * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this + * is used as a threshold to replace per-page TLBI commands to issue in the + * command queue with an address-space TLBI command, when SMMU w/o a range + * invalidation feature handles too many per-page TLBI commands, which will + * otherwise result in a soft lockup. + */ +#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3)) + static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, @@ -201,8 +210,13 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, * range. So do a simple translation here by calculating size correctly. */ size = end - start; - if (size == ULONG_MAX) - size = 0; + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) { + if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE) + size = 0; + } else { + if (size == ULONG_MAX) + size = 0; + } if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { if (!size) -- cgit From c5cc3ca707bc916a3f326364751a41f25040aef3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 22 Sep 2023 09:24:41 +1000 Subject: powerpc/stacktrace: Fix arch_stack_walk_reliable() The changes to copy_thread() made in commit eed7c420aac7 ("powerpc: copy_thread differentiate kthreads and user mode threads") inadvertently broke arch_stack_walk_reliable() because it has knowledge of the stack layout. Fix it by changing the condition to match the new logic in copy_thread(). The changes make the comments about the stack layout incorrect, rather than rephrasing them just refer the reader to copy_thread(). Also the comment about the stack backchain is no longer true, since commit edbd0387f324 ("powerpc: copy_thread add a back chain to the switch stack frame"), so remove that as well. Fixes: eed7c420aac7 ("powerpc: copy_thread differentiate kthreads and user mode threads") Reported-by: Joe Lawrence Reviewed-by: Petr Mladek Signed-off-by: Michael Ellerman Link: https://msgid.link/20230921232441.1181843-1-mpe@ellerman.id.au --- arch/powerpc/kernel/stacktrace.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b15f15dcacb5..e6a958a5da27 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -73,29 +73,12 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(task)) { - /* - * For user tasks, this is the SP value loaded on - * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common(). - * - * Likewise for non-swapper kernel threads, - * this also happens to be the top of the stack - * as setup by copy_thread(). - * - * Note that stack backlinks are not properly setup by - * copy_thread() and thus, a forked task() will have - * an unreliable stack trace until it's been - * _switch()'ed to for the first time. - */ - stack_end -= STACK_USER_INT_FRAME_SIZE; - } else { - /* - * idle tasks have a custom stack layout, - * c.f. cpu_idle_thread_init(). - */ + + // See copy_thread() for details. + if (task->flags & PF_KTHREAD) stack_end -= STACK_FRAME_MIN_SIZE; - } + else + stack_end -= STACK_USER_INT_FRAME_SIZE; if (task == current) sp = current_stack_frame(); -- cgit From 58b33e78a31782ffe25d404d5eba9a45fe636e27 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 21 Sep 2023 17:26:10 +1000 Subject: selftests/powerpc: Fix emit_tests to work with run_kselftest.sh In order to use run_kselftest.sh the list of tests must be emitted to populate kselftest-list.txt. The powerpc Makefile is written to use EMIT_TESTS. But support for EMIT_TESTS was dropped in commit d4e59a536f50 ("selftests: Use runner.sh for emit targets"). Although prior to that commit a548de0fe8e1 ("selftests: lib.mk: add test execute bit check to EMIT_TESTS") had already broken run_kselftest.sh for powerpc due to the executable check using the wrong path. It can be fixed by replacing the EMIT_TESTS definitions with actual emit_tests rules in the powerpc Makefiles. This makes run_kselftest.sh able to run powerpc tests: $ cd linux $ export ARCH=powerpc $ export CROSS_COMPILE=powerpc64le-linux-gnu- $ make headers $ make -j -C tools/testing/selftests install $ grep -c "^powerpc" tools/testing/selftests/kselftest_install/kselftest-list.txt 182 Fixes: d4e59a536f50 ("selftests: Use runner.sh for emit targets") Signed-off-by: Michael Ellerman Link: https://msgid.link/20230921072623.828772-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/Makefile | 7 +++---- tools/testing/selftests/powerpc/pmu/Makefile | 11 ++++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 49f2ad1793fd..7ea42fa02eab 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -59,12 +59,11 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS +emit_tests: +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\ done; -endef override define CLEAN +@for TARGET in $(SUB_DIRS); do \ @@ -77,4 +76,4 @@ endef tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: tags $(SUB_DIRS) +.PHONY: tags $(SUB_DIRS) emit_tests diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 2b95e44d20ff..a284fa874a9f 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -30,13 +30,14 @@ override define RUN_TESTS +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef -DEFAULT_EMIT_TESTS := $(EMIT_TESTS) -override define EMIT_TESTS - $(DEFAULT_EMIT_TESTS) +emit_tests: + for TEST in $(TEST_GEN_PROGS); do \ + BASENAME_TEST=`basename $$TEST`; \ + echo "$(COLLECTION):$$BASENAME_TEST"; \ + done +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests -endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE @@ -64,4 +65,4 @@ sampling_tests: event_code_tests: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests ebb sampling_tests event_code_tests +.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests -- cgit From e8c44d3b713b96cda055a23b21e8c4f931dd159f Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 15 Sep 2023 15:02:32 +0200 Subject: rtla/timerlat: Do not stop user-space if a cpu is offline If no CPU list is passed, timerlat in user-space will dispatch one thread per sysconf(_SC_NPROCESSORS_CONF). However, not all CPU might be available, for instance, if HT is disabled. Currently, rtla timerlat is stopping the session if an user-space thread cannot set affinity to a CPU, or if a running user-space thread is killed. However, this is too restrictive. So, reduce the error to a debug message, and rtla timerlat run as long as there is at least one user-space thread alive. Link: https://lore.kernel.org/lkml/59cf2c882900ab7de91c6ee33b382ac7fa6b4ed0.1694781909.git.bristot@kernel.org Fixes: cdca4f4e5e8e ("rtla/timerlat_top: Add timerlat user-space support") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/timerlat_u.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c index 05e310696dd5..01dbf9a6b5a5 100644 --- a/tools/tracing/rtla/src/timerlat_u.c +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -45,7 +45,7 @@ static int timerlat_u_main(int cpu, struct timerlat_u_params *params) retval = sched_setaffinity(gettid(), sizeof(set), &set); if (retval == -1) { - err_msg("Error setting user thread affinity\n"); + debug_msg("Error setting user thread affinity %d, is the CPU online?\n", cpu); exit(1); } @@ -193,7 +193,9 @@ void *timerlat_u_dispatcher(void *data) procs_count--; } } - break; + + if (!procs_count) + break; } sleep(1); -- cgit From 81ec384b80ffbda752c230778d39ea620c7e3bcf Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Tue, 19 Sep 2023 21:30:28 +0800 Subject: rtla: fix a example in rtla-timerlat-hist.rst The following error message is reported when running the example in document. # timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms --no-aa Failed to set timerlat period Could not apply config The unit of the period is microsecond, '1ms' cannot be accepted. usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] ... ... -p/--period us: timerlat period in us ... Also fix another minor missleading comment. Link: https://lore.kernel.org/lkml/20230919133028.697144-1-xiexiuqi@huaweicloud.com Signed-off-by: Xie XiuQi Signed-off-by: Daniel Bristot de Oliveira --- Documentation/tools/rtla/rtla-timerlat-hist.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/tools/rtla/rtla-timerlat-hist.rst b/Documentation/tools/rtla/rtla-timerlat-hist.rst index 057db78d4095..03b7f3deb069 100644 --- a/Documentation/tools/rtla/rtla-timerlat-hist.rst +++ b/Documentation/tools/rtla/rtla-timerlat-hist.rst @@ -36,11 +36,11 @@ EXAMPLE In the example below, **rtla timerlat hist** is set to run for *10* minutes, in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat hist** will change the priority of the *timerlat* threads to run under -*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The +*SCHED_DEADLINE* priority, with a *100us* runtime every *1ms* period. The *1ms* period is also passed to the *timerlat* tracer. Auto-analysis is disabled to reduce overhead :: - [root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms --no-aa + [root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1000 --no-aa # RTLA timerlat histogram # Time unit is microseconds (us) # Duration: 0 00:10:00 -- cgit From f6267c81dbd9c66e5d7dfd65e5a849f688c877b8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 22 Sep 2023 10:08:29 +0100 Subject: spi: cs42l43: Remove spurious pm_runtime_disable A pm_runtime_disable was left in when the driver was ported to use devm_pm_runtime_enable, remove it. Fixes: ef75e767167a ("spi: cs42l43: Add SPI controller support") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230922090829.1467594-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index 453a9b37ce78..d239fc5a49cc 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -256,7 +256,6 @@ static int cs42l43_spi_probe(struct platform_device *pdev) ret = devm_spi_register_controller(priv->dev, priv->ctlr); if (ret) { - pm_runtime_disable(priv->dev); dev_err(priv->dev, "Failed to register SPI controller: %d\n", ret); } -- cgit From 3a4a893dbb19e229db3b753f0462520b561dee98 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:20 +0200 Subject: mtd: rawnand: arasan: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 88ffef1b65cf ("mtd: rawnand: arasan: Support the hardware BCH ECC engine") Signed-off-by: Miquel Raynal Acked-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/arasan-nand-controller.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c index 4621ec549cc7..a492051c46f5 100644 --- a/drivers/mtd/nand/raw/arasan-nand-controller.c +++ b/drivers/mtd/nand/raw/arasan-nand-controller.c @@ -515,6 +515,7 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, struct mtd_info *mtd = nand_to_mtd(chip); unsigned int len = mtd->writesize + (oob_required ? mtd->oobsize : 0); dma_addr_t dma_addr; + u8 status; int ret; struct anfc_op nfc_op = { .pkt_reg = @@ -561,10 +562,21 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, } /* Spare data is not protected */ - if (oob_required) + if (oob_required) { ret = nand_write_oob_std(chip, page); + if (ret) + return ret; + } - return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int anfc_sel_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, -- cgit From 9777cc13fd2c3212618904636354be60835e10bb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:21 +0200 Subject: mtd: rawnand: pl353: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 08d8c62164a3 ("mtd: rawnand: pl353: Add support for the ARM PL353 SMC NAND controller") Signed-off-by: Miquel Raynal Tested-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/pl35x-nand-controller.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 8da5fee321b5..c506e92a3e45 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -511,6 +511,7 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, u32 addr1 = 0, addr2 = 0, row; u32 cmd_addr; int i, ret; + u8 status; ret = pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_APB); if (ret) @@ -563,6 +564,14 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, if (ret) goto disable_ecc_engine; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + goto disable_ecc_engine; + + if (status & NAND_STATUS_FAIL) + ret = -EIO; + disable_ecc_engine: pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_BYPASS); -- cgit From 5279f4a9eed3ee7d222b76511ea7a22c89e7eefd Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Wed, 13 Sep 2023 12:37:02 +0530 Subject: mtd: rawnand: qcom: Unmap the right resource upon probe failure We currently provide the physical address of the DMA region rather than the output of dma_map_resource() which is obviously wrong. Fixes: 7330fc505af4 ("mtd: rawnand: qcom: stop using phys_to_dma()") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bibek Kumar Patro Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230913070702.12707-1-quic_bibekkum@quicinc.com --- drivers/mtd/nand/raw/qcom_nandc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 64499c1b3603..b079605c84d3 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -3444,7 +3444,7 @@ err_nandc_alloc: err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - dma_unmap_resource(dev, res->start, resource_size(res), + dma_unmap_resource(dev, nandc->base_dma, resource_size(res), DMA_BIDIRECTIONAL, 0); return ret; } -- cgit From 7a795ac8d49e2433e1b97caf5e99129daf8e1b08 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 22 Sep 2023 16:37:11 +0100 Subject: regmap: rbtree: Fix wrong register marked as in-cache when creating new node When regcache_rbtree_write() creates a new rbtree_node it was passing the wrong bit number to regcache_rbtree_set_register(). The bit number is the offset __in number of registers__, but in the case of creating a new block regcache_rbtree_write() was not dividing by the address stride to get the number of registers. Fix this by dividing by map->reg_stride. Compare with regcache_rbtree_read() where the bit is checked. This bug meant that the wrong register was marked as present. The register that was written to the cache could not be read from the cache because it was not marked as cached. But a nearby register could be marked as having a cached value even if it was never written to the cache. Signed-off-by: Richard Fitzgerald Fixes: 3f4ff561bc88 ("regmap: rbtree: Make cache_present bitmap per node") Link: https://lore.kernel.org/r/20230922153711.28103-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-rbtree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index db716ffd083e..3db88bbcae0f 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -453,7 +453,8 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, if (!rbnode) return -ENOMEM; regcache_rbtree_set_register(map, rbnode, - reg - rbnode->base_reg, value); + (reg - rbnode->base_reg) / map->reg_stride, + value); regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode); rbtree_ctx->cached_rbnode = rbnode; } -- cgit From e52dca7216cfeae76a99908a2eea6e850d3f918f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 22 Sep 2023 18:15:47 +0200 Subject: ASoC: soc-generic-dmaengine-pcm: Fix function name in comment While browsing/grepping in the sound core, I found that snd_dmaengine_set_config_from_dai_data() did not exist, in favor of snd_dmaengine_pcm_set_config_from_dai_data(). Fix the typo. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20230922161547.594484-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- sound/soc/soc-generic-dmaengine-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index d0653d775c87..cad222eb9a29 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -44,8 +44,8 @@ static struct device *dmaengine_dma_dev(struct dmaengine_pcm *pcm, * platforms which make use of the snd_dmaengine_dai_dma_data struct for their * DAI DMA data. Internally the function will first call * snd_hwparams_to_dma_slave_config to fill in the slave config based on the - * hw_params, followed by snd_dmaengine_set_config_from_dai_data to fill in the - * remaining fields based on the DAI DMA data. + * hw_params, followed by snd_dmaengine_pcm_set_config_from_dai_data to fill in + * the remaining fields based on the DAI DMA data. */ int snd_dmaengine_pcm_prepare_slave_config(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct dma_slave_config *slave_config) -- cgit From 4ba89dd6ddeca2a733bdaed7c9a5cbe4e19d9124 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:54 -0700 Subject: x86/alternatives: Remove faulty optimization The following commit 095b8303f383 ("x86/alternative: Make custom return thunk unconditional") made '__x86_return_thunk' a placeholder value. All code setting X86_FEATURE_RETHUNK also changes the value of 'x86_return_thunk'. So the optimization at the beginning of apply_returns() is dead code. Also, before the above-mentioned commit, the optimization actually had a bug It bypassed __static_call_fixup(), causing some raw returns to remain unpatched in static call trampolines. Thus the 'Fixes' tag. Fixes: d2408e043e72 ("x86/alternative: Optimize returns patching") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/16d19d2249d4485d8380fb215ffaae81e6b8119e.1693889988.git.jpoimboe@kernel.org --- arch/x86/kernel/alternative.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a5ead6a6d233..c850f5a9b1bb 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -720,14 +720,6 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - /* - * Do not patch out the default return thunks if those needed are the - * ones generated by the compiler. - */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK) && - (x86_return_thunk == __x86_return_thunk)) - return; - for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; struct insn insn; -- cgit From aee9d30b9744d677509ef790f30f3a24c7841c3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2023 10:12:25 +0000 Subject: x86,static_call: Fix static-call vs return-thunk Commit 7825451fa4dc ("static_call: Add call depth tracking support") failed to realize the problem fixed there is not specific to call depth tracking but applies to all return-thunk uses. Move the fix to the appropriate place and condition. Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") Reported-by: David Kaplan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ingo Molnar Tested-by: Borislav Petkov (AMD) Cc: --- arch/x86/kernel/alternative.c | 3 +++ arch/x86/kernel/callthunks.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c850f5a9b1bb..517ee01503be 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -720,6 +720,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + static_call_force_reinit(); + for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; struct insn insn; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index c06bfc086565..faa9f2299848 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -272,7 +272,6 @@ void __init callthunks_patch_builtin_calls(void) pr_info("Setting up call depth tracking\n"); mutex_lock(&text_mutex); callthunks_setup(&cs, &builtin_coretext); - static_call_force_reinit(); thunks_initialized = true; mutex_unlock(&text_mutex); } -- cgit From f2a55d08d7e1a5746dad80fc5eda023eff2eeea5 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 19 Sep 2023 21:04:35 -0700 Subject: x86/hyperv: Restrict get_vtl to only VTL platforms When Linux runs in a non-default VTL (CONFIG_HYPERV_VTL_MODE=y), get_vtl() must never fail as its return value is used in negotiations with the host. In the more generic case, (CONFIG_HYPERV_VTL_MODE=n) the VTL is always zero so there's no need to do the hypercall. Make get_vtl() BUG() in case of failure and put the implementation under "if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)" to avoid the call altogether in the most generic use case. Signed-off-by: Saurabh Sengar Reviewed-by: Vitaly Kuznetsov Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/1695182675-13405-1-git-send-email-ssengar@linux.microsoft.com --- arch/x86/hyperv/hv_init.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 783ed339f341..f0128fd4031d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -394,6 +394,7 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) static u8 __init get_vtl(void) { u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; @@ -416,13 +417,16 @@ static u8 __init get_vtl(void) if (hv_result_success(ret)) { ret = output->as64.low & HV_X64_VTL_MASK; } else { - pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); - ret = 0; + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); } local_irq_restore(flags); return ret; } +#else +static inline u8 get_vtl(void) { return 0; } +#endif /* * This function is to be invoked early in the boot sequence after the @@ -604,8 +608,7 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) - ms_hyperv.vtl = get_vtl(); + ms_hyperv.vtl = get_vtl(); return; -- cgit From 14058f72cf13e476bcc3b4e9922a8cb2e59783d2 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 21 Sep 2023 21:58:40 -0700 Subject: x86/hyperv: Remove hv_vtl_early_init initcall There has been cases reported where HYPERV_VTL_MODE is enabled by mistake, on a non Hyper-V platforms. This causes the hv_vtl_early_init function to be called in an non Hyper-V/VTL platforms which results the memory corruption. Remove the early_initcall for hv_vtl_early_init and call it at the end of hyperv_init to make sure it is never called in a non Hyper-V platform by mistake. Reported-by: Mathias Krause Closes: https://lore.kernel.org/lkml/40467722-f4ab-19a5-4989-308225b1f9f0@grsecurity.net/ Signed-off-by: Saurabh Sengar Acked-by: Mathias Krause Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/1695358720-27681-1-git-send-email-ssengar@linux.microsoft.com --- arch/x86/hyperv/hv_init.c | 3 +++ arch/x86/hyperv/hv_vtl.c | 3 +-- arch/x86/include/asm/mshyperv.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f0128fd4031d..608f4fe41fb7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -610,6 +610,9 @@ skip_hypercall_pg_init: /* Find the VTL */ ms_hyperv.vtl = get_vtl(); + if (ms_hyperv.vtl > 0) /* non default VTL */ + hv_vtl_early_init(); + return; clean_guest_os_id: diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 36a562218010..999f5ac82fe9 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) return hv_vtl_bringup_vcpu(vp_id, start_eip); } -static int __init hv_vtl_early_init(void) +int __init hv_vtl_early_init(void) { /* * `boot_cpu_has` returns the runtime feature support, @@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void) return 0; } -early_initcall(hv_vtl_early_init); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 033b53f993c6..896445edc6a8 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } #ifdef CONFIG_HYPERV_VTL_MODE void __init hv_vtl_init_platform(void); +int __init hv_vtl_early_init(void); #else static inline void __init hv_vtl_init_platform(void) {} +static inline int __init hv_vtl_early_init(void) { return 0; } #endif #include -- cgit From 203a521bd93c323ded93c5aa35069029d5c23611 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 19 Sep 2023 04:36:01 -0700 Subject: x86/hyperv: Add common print prefix "Hyper-V" in hv_init Add "#define pr_fmt()" in hv_init.c to use "Hyper-V:" as common print prefix for all pr_*() statements in this file. Remove the "Hyper-V:" already prefixed in couple of prints. Signed-off-by: Saurabh Sengar Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/1695123361-8877-1-git-send-email-ssengar@linux.microsoft.com --- arch/x86/hyperv/hv_init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 608f4fe41fb7..21556ad87f4b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,6 +7,8 @@ * Author : K. Y. Srinivasan */ +#define pr_fmt(fmt) "Hyper-V: " fmt + #include #include #include @@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; if (!hv_reenlightenment_available()) { - pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + pr_warn("reenlightenment support is unavailable\n"); return; } @@ -568,7 +570,7 @@ skip_hypercall_pg_init: if (cpu_feature_enabled(X86_FEATURE_IBT) && *(u32 *)hv_hypercall_pg != gen_endbr()) { setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + pr_warn("Disabling IBT because of Hyper-V bug\n"); } #endif -- cgit From c777b11d34e0f47dbbc4b018ef65ad030f2b283a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 18 Sep 2023 19:55:51 +0800 Subject: vfio/mdev: Fix a null-ptr-deref bug for mdev_unregister_parent() Inject fault while probing mdpy.ko, if kstrdup() of create_dir() fails in kobject_add_internal() in kobject_init_and_add() in mdev_type_add() in parent_create_sysfs_files(), it will return 0 and probe successfully. And when rmmod mdpy.ko, the mdpy_dev_exit() will call mdev_unregister_parent(), the mdev_type_remove() may traverse uninitialized parent->types[i] in parent_remove_sysfs_files(), and it will cause below null-ptr-deref. If mdev_type_add() fails, return the error code and kset_unregister() to fix the issue. general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 2 PID: 10215 Comm: rmmod Tainted: G W N 6.6.0-rc2+ #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:__kobject_del+0x62/0x1c0 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 51 01 00 00 48 b8 00 00 00 00 00 fc ff df 48 8b 6b 28 48 8d 7d 10 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 24 01 00 00 48 8b 75 10 48 89 df 48 8d 6b 3c e8 RSP: 0018:ffff88810695fd30 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffffffffa0270268 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000004 RDI: 0000000000000010 RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed10233a4ef1 R10: ffff888119d2778b R11: 0000000063666572 R12: 0000000000000000 R13: fffffbfff404e2d4 R14: dffffc0000000000 R15: ffffffffa0271660 FS: 00007fbc81981540(0000) GS:ffff888119d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc14a142dc0 CR3: 0000000110a62003 CR4: 0000000000770ee0 DR0: ffffffff8fb0bce8 DR1: ffffffff8fb0bce9 DR2: ffffffff8fb0bcea DR3: ffffffff8fb0bceb DR6: 00000000fffe0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: ? die_addr+0x3d/0xa0 ? exc_general_protection+0x144/0x220 ? asm_exc_general_protection+0x22/0x30 ? __kobject_del+0x62/0x1c0 kobject_del+0x32/0x50 parent_remove_sysfs_files+0xd6/0x170 [mdev] mdev_unregister_parent+0xfb/0x190 [mdev] ? mdev_register_parent+0x270/0x270 [mdev] ? find_module_all+0x9d/0xe0 mdpy_dev_exit+0x17/0x63 [mdpy] __do_sys_delete_module.constprop.0+0x2fa/0x4b0 ? module_flags+0x300/0x300 ? __fput+0x4e7/0xa00 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fbc813221b7 Code: 73 01 c3 48 8b 0d d1 8c 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d a1 8c 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe780e0648 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 00007ffe780e06a8 RCX: 00007fbc813221b7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055e214df9b58 RBP: 000055e214df9af0 R08: 00007ffe780df5c1 R09: 0000000000000000 R10: 00007fbc8139ecc0 R11: 0000000000000206 R12: 00007ffe780e0870 R13: 00007ffe780e0ed0 R14: 000055e214df9260 R15: 000055e214df9af0 Modules linked in: mdpy(-) mdev vfio_iommu_type1 vfio [last unloaded: mdpy] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 0000000000000000 ]--- RIP: 0010:__kobject_del+0x62/0x1c0 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 51 01 00 00 48 b8 00 00 00 00 00 fc ff df 48 8b 6b 28 48 8d 7d 10 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 24 01 00 00 48 8b 75 10 48 89 df 48 8d 6b 3c e8 RSP: 0018:ffff88810695fd30 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffffffffa0270268 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000004 RDI: 0000000000000010 RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed10233a4ef1 R10: ffff888119d2778b R11: 0000000063666572 R12: 0000000000000000 R13: fffffbfff404e2d4 R14: dffffc0000000000 R15: ffffffffa0271660 FS: 00007fbc81981540(0000) GS:ffff888119d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc14a142dc0 CR3: 0000000110a62003 CR4: 0000000000770ee0 DR0: ffffffff8fb0bce8 DR1: ffffffff8fb0bce9 DR2: ffffffff8fb0bcea DR3: ffffffff8fb0bceb DR6: 00000000fffe0ff0 DR7: 0000000000000600 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Rebooting in 1 seconds.. Fixes: da44c340c4fe ("vfio/mdev: simplify mdev_type handling") Signed-off-by: Jinjie Ruan Reviewed-by: Eric Farman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230918115551.1423193-1-ruanjinjie@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index e4490639d383..9d2738e10c0b 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -233,7 +233,8 @@ int parent_create_sysfs_files(struct mdev_parent *parent) out_err: while (--i >= 0) mdev_type_remove(parent->types[i]); - return 0; + kset_unregister(parent->mdev_types_kset); + return ret; } static ssize_t remove_store(struct device *dev, struct device_attribute *attr, -- cgit From cfc7461a60e324f75dc9d1beadc07890140ffd29 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 22 Sep 2023 21:28:40 +0200 Subject: hyperv: reduce size of ms_hyperv_info Use the hole prior shared_gpa_boundary to store the result of get_vtl. This reduces the size by 8 bytes. Signed-off-by: Olaf Hering Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230922192840.3886-1-olaf@aepfle.de --- include/asm-generic/mshyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index cecd2b7bd033..430f0ae0dde2 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -36,6 +36,7 @@ struct ms_hyperv_info { u32 nested_features; u32 max_vp_index; u32 max_lp_index; + u8 vtl; union { u32 isolation_config_a; struct { @@ -54,7 +55,6 @@ struct ms_hyperv_info { }; }; u64 shared_gpa_boundary; - u8 vtl; }; extern struct ms_hyperv_info ms_hyperv; extern bool hv_nested; -- cgit From 11b1c9cda5d051788269b11c0bdacad3ad17b6c0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 15 Sep 2023 15:19:36 -0500 Subject: dt-bindings: riscv: cpus: Add missing additionalProperties on interrupt-controller node The "interrupt-controller" CPU child node is missing constraints on extra properties. Add "additionalProperties: false" to fix this. Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230915201946.4184468-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/riscv/cpus.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 38c0b5213736..97e8441eda1c 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -91,6 +91,7 @@ properties: interrupt-controller: type: object + additionalProperties: false description: Describes the CPU's local interrupt controller properties: -- cgit From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage); -- cgit From ef36b4f92868d66908e235980f74afdfb9742d12 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 22 Sep 2023 16:34:46 -0400 Subject: eventfs: Remember what dentries were created on dir open Using the following code with libtracefs: int dfd; // create the directory events/kprobes/kp1 tracefs_kprobe_raw(NULL, "kp1", "schedule_timeout", "time=$arg1"); // Open the kprobes directory dfd = tracefs_instance_file_open(NULL, "events/kprobes", O_RDONLY); // Do a lookup of the kprobes/kp1 directory (by looking at enable) tracefs_file_exists(NULL, "events/kprobes/kp1/enable"); // Now create a new entry in the kprobes directory tracefs_kprobe_raw(NULL, "kp2", "schedule_hrtimeout", "expires=$arg1"); // Do another lookup to create the dentries tracefs_file_exists(NULL, "events/kprobes/kp2/enable")) // Close the directory close(dfd); What happened above, the first open (dfd) will call dcache_dir_open_wrapper() that will create the dentries and up their ref counts. Now the creation of "kp2" will add another dentry within the kprobes directory. Upon the close of dfd, eventfs_release() will now do a dput for all the entries in kprobes. But this is where the problem lies. The open only upped the dentry of kp1 and not kp2. Now the close is decrementing both kp1 and kp2, which causes kp2 to get a negative count. Doing a "trace-cmd reset" which deletes all the kprobes cause the kernel to crash! (due to the messed up accounting of the ref counts). To solve this, save all the dentries that are opened in the dcache_dir_open_wrapper() into an array, and use this array to know what dentries to do a dput on in eventfs_release(). Since the dcache_dir_open_wrapper() calls dcache_dir_open() which uses the file->private_data, we need to also add a wrapper around dcache_readdir() that uses the cursor assigned to the file->private_data. This is because the dentries need to also be saved in the file->private_data. To do this create the structure: struct dentry_list { void *cursor; struct dentry **dentries; }; Which will hold both the cursor and the dentries. Some shuffling around is needed to make sure that dcache_dir_open() and dcache_readdir() only see the cursor. Link: https://lore.kernel.org/linux-trace-kernel/20230919211804.230edf1e@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230922163446.1431d4fa@gandalf.local.home Cc: Mark Rutland Cc: Ajay Kaher Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions") Reported-by: "Masami Hiramatsu (Google)" Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 87 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 9f64e7332796..5f1714089884 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -70,6 +70,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); +static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); static int eventfs_release(struct inode *inode, struct file *file); static const struct inode_operations eventfs_root_dir_inode_operations = { @@ -79,7 +80,7 @@ static const struct inode_operations eventfs_root_dir_inode_operations = { static const struct file_operations eventfs_file_operations = { .open = dcache_dir_open_wrapper, .read = generic_read_dir, - .iterate_shared = dcache_readdir, + .iterate_shared = dcache_readdir_wrapper, .llseek = generic_file_llseek, .release = eventfs_release, }; @@ -396,6 +397,11 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, return ret; } +struct dentry_list { + void *cursor; + struct dentry **dentries; +}; + /** * eventfs_release - called to release eventfs file/dir * @inode: inode to be released @@ -404,26 +410,25 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, static int eventfs_release(struct inode *inode, struct file *file) { struct tracefs_inode *ti; - struct eventfs_inode *ei; - struct eventfs_file *ef; - struct dentry *dentry; - int idx; + struct dentry_list *dlist = file->private_data; + void *cursor; + int i; ti = get_tracefs(inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; - ei = ti->private; - idx = srcu_read_lock(&eventfs_srcu); - list_for_each_entry_srcu(ef, &ei->e_top_files, list, - srcu_read_lock_held(&eventfs_srcu)) { - mutex_lock(&eventfs_mutex); - dentry = ef->dentry; - mutex_unlock(&eventfs_mutex); - if (dentry) - dput(dentry); + if (WARN_ON_ONCE(!dlist)) + return -EINVAL; + + for (i = 0; dlist->dentries[i]; i++) { + dput(dlist->dentries[i]); } - srcu_read_unlock(&eventfs_srcu, idx); + + cursor = dlist->cursor; + kfree(dlist->dentries); + kfree(dlist); + file->private_data = cursor; return dcache_dir_close(inode, file); } @@ -442,22 +447,70 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) struct tracefs_inode *ti; struct eventfs_inode *ei; struct eventfs_file *ef; + struct dentry_list *dlist; + struct dentry **dentries = NULL; struct dentry *dentry = file_dentry(file); + struct dentry *d; struct inode *f_inode = file_inode(file); + int cnt = 0; int idx; + int ret; ti = get_tracefs(f_inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; + if (WARN_ON_ONCE(file->private_data)) + return -EINVAL; + + dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); + if (!dlist) + return -ENOMEM; + ei = ti->private; idx = srcu_read_lock(&eventfs_srcu); list_for_each_entry_srcu(ef, &ei->e_top_files, list, srcu_read_lock_held(&eventfs_srcu)) { - create_dentry(ef, dentry, false); + d = create_dentry(ef, dentry, false); + if (d) { + struct dentry **tmp; + + tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + if (!tmp) + break; + tmp[cnt] = d; + tmp[cnt + 1] = NULL; + cnt++; + dentries = tmp; + } } srcu_read_unlock(&eventfs_srcu, idx); - return dcache_dir_open(inode, file); + ret = dcache_dir_open(inode, file); + + /* + * dcache_dir_open() sets file->private_data to a dentry cursor. + * Need to save that but also save all the dentries that were + * opened by this function. + */ + dlist->cursor = file->private_data; + dlist->dentries = dentries; + file->private_data = dlist; + return ret; +} + +/* + * This just sets the file->private_data back to the cursor and back. + */ +static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) +{ + struct dentry_list *dlist = file->private_data; + int ret; + + file->private_data = dlist->cursor; + ret = dcache_readdir(file, ctx); + dlist->cursor = file->private_data; + file->private_data = dlist; + return ret; } /** -- cgit From a76b62518eb30ef59158fa777ab2e2a23e1334f9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Sep 2023 01:07:30 -0700 Subject: cxl/port: Fix cxl_test register enumeration regression The cxl_test unit test environment models a CXL topology for sysfs/user-ABI regression testing. It uses interface mocking via the "--wrap=" linker option to redirect cxl_core routines that parse hardware registers with versions that just publish objects, like devm_cxl_enumerate_decoders(). Starting with: Commit 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port") ...port register enumeration is moved into devm_cxl_add_port(). This conflicts with the "cxl_test avoids emulating registers stance" so either the port code needs to be refactored (too violent), or modified so that register enumeration is skipped on "fake" cxl_test ports (annoying, but straightforward). This conflict has happened previously and the "check for platform device" workaround to avoid instrusive refactoring was deployed in those scenarios. In general, refactoring should only benefit production code, test code needs to remain minimally instrusive to the greatest extent possible. This was missed previously because it may sometimes just cause warning messages to be emitted, but it can also cause test failures. The backport to -stable is only nice to have for clean cxl_test runs. Fixes: 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port") Cc: stable@vger.kernel.org Reported-by: Alison Schofield Reviewed-by: Dave Jiang Tested-by: Dave Jiang Link: https://lore.kernel.org/r/169476525052.1013896.6235102957693675187.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 724be8448eb4..7ca01a834e18 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -706,16 +707,20 @@ static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map, return cxl_setup_regs(map); } -static inline int cxl_port_setup_regs(struct cxl_port *port, - resource_size_t component_reg_phys) +static int cxl_port_setup_regs(struct cxl_port *port, + resource_size_t component_reg_phys) { + if (dev_is_platform(port->uport_dev)) + return 0; return cxl_setup_comp_regs(&port->dev, &port->comp_map, component_reg_phys); } -static inline int cxl_dport_setup_regs(struct cxl_dport *dport, - resource_size_t component_reg_phys) +static int cxl_dport_setup_regs(struct cxl_dport *dport, + resource_size_t component_reg_phys) { + if (dev_is_platform(dport->dport_dev)) + return 0; return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map, component_reg_phys); } -- cgit From c66650d29764e228eba40b7a59fdb70fa6567daa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:53:19 -0700 Subject: cxl/acpi: Annotate struct cxl_cxims_data with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct cxl_cxims_data. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Dan Williams Cc: linux-cxl@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230922175319.work.096-kees@kernel.org Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d1c559879dcc..40d055560e52 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -14,7 +14,7 @@ struct cxl_cxims_data { int nr_maps; - u64 xormaps[]; + u64 xormaps[] __counted_by(nr_maps); }; /* @@ -112,9 +112,9 @@ static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, GFP_KERNEL); if (!cximsd) return -ENOMEM; + cximsd->nr_maps = nr_maps; memcpy(cximsd->xormaps, cxims->xormap_list, nr_maps * sizeof(*cximsd->xormaps)); - cximsd->nr_maps = nr_maps; cxlrd->platform_data = cximsd; return 0; -- cgit From 441a5dfcd96854cbcb625709e2694a9c60adfaab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 21 Sep 2023 05:44:56 -0400 Subject: KVM: x86/mmu: Do not filter address spaces in for_each_tdp_mmu_root_yield_safe() All callers except the MMU notifier want to process all address spaces. Remove the address space ID argument of for_each_tdp_mmu_root_yield_safe() and switch the MMU notifier to use __for_each_tdp_mmu_root_yield_safe(). Extracted out of a patch by Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 8 ++------ arch/x86/kvm/mmu/tdp_mmu.c | 22 +++++++++++----------- arch/x86/kvm/mmu/tdp_mmu.h | 3 +-- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 59f5e40b8f55..54f94f644b42 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6246,7 +6246,6 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { bool flush; - int i; if (WARN_ON_ONCE(gfn_end <= gfn_start)) return; @@ -6257,11 +6256,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); - if (tdp_mmu_enabled) { - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, - gfn_end, flush); - } + if (tdp_mmu_enabled) + flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush); if (flush) kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 9c081591652b..aa90901d2871 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -211,8 +211,12 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ - __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false) +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, false, false); \ + _root; \ + _root = tdp_mmu_next_root(_kvm, _root, false, false)) \ + if (!kvm_lockdep_assert_mmu_lock_held(_kvm, false)) { \ + } else /* * Iterate over all TDP MMU roots. Requires that mmu_lock be held for write, @@ -877,12 +881,11 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or * more SPTEs were zapped since the MMU lock was last acquired. */ -bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, - bool flush) +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root, as_id) + for_each_tdp_mmu_root_yield_safe(kvm, root) flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; @@ -891,7 +894,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *root; - int i; /* * Zap all roots, including invalid roots, as all SPTEs must be dropped @@ -905,10 +907,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - for_each_tdp_mmu_root_yield_safe(kvm, root, i) - tdp_mmu_zap_root(kvm, root, false); - } + for_each_tdp_mmu_root_yield_safe(kvm, root) + tdp_mmu_zap_root(kvm, root, false); } /* @@ -1148,7 +1148,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id) + __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false) flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, range->may_block, flush); diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index eb4fa345d3a4..bc088953f929 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -20,8 +20,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared); -bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, - bool flush); +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm); -- cgit From 0df9dab891ff0d9b646d82e4fe038229e4c02451 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Sep 2023 17:39:15 -0700 Subject: KVM: x86/mmu: Stop zapping invalidated TDP MMU roots asynchronously Stop zapping invalidate TDP MMU roots via work queue now that KVM preserves TDP MMU roots until they are explicitly invalidated. Zapping roots asynchronously was effectively a workaround to avoid stalling a vCPU for an extended during if a vCPU unloaded a root, which at the time happened whenever the guest toggled CR0.WP (a frequent operation for some guest kernels). While a clever hack, zapping roots via an unbound worker had subtle, unintended consequences on host scheduling, especially when zapping multiple roots, e.g. as part of a memslot. Because the work of zapping a root is no longer bound to the task that initiated the zap, things like the CPU affinity and priority of the original task get lost. Losing the affinity and priority can be especially problematic if unbound workqueues aren't affined to a small number of CPUs, as zapping multiple roots can cause KVM to heavily utilize the majority of CPUs in the system, *beyond* the CPUs KVM is already using to run vCPUs. When deleting a memslot via KVM_SET_USER_MEMORY_REGION, the async root zap can result in KVM occupying all logical CPUs for ~8ms, and result in high priority tasks not being scheduled in in a timely manner. In v5.15, which doesn't preserve unloaded roots, the issues were even more noticeable as KVM would zap roots more frequently and could occupy all CPUs for 50ms+. Consuming all CPUs for an extended duration can lead to significant jitter throughout the system, e.g. on ChromeOS with virtio-gpu, deleting memslots is a semi-frequent operation as memslots are deleted and recreated with different host virtual addresses to react to host GPU drivers allocating and freeing GPU blobs. On ChromeOS, the jitter manifests as audio blips during games due to the audio server's tasks not getting scheduled in promptly, despite the tasks having a high realtime priority. Deleting memslots isn't exactly a fast path and should be avoided when possible, and ChromeOS is working towards utilizing MAP_FIXED to avoid the memslot shenanigans, but KVM is squarely in the wrong. Not to mention that removing the async zapping eliminates a non-trivial amount of complexity. Note, one of the subtle behaviors hidden behind the async zapping is that KVM would zap invalidated roots only once (ignoring partial zaps from things like mmu_notifier events). Preserve this behavior by adding a flag to identify roots that are scheduled to be zapped versus roots that have already been zapped but not yet freed. Add a comment calling out why kvm_tdp_mmu_invalidate_all_roots() can encounter invalid roots, as it's not at all obvious why zapping invalidated roots shouldn't simply zap all invalid roots. Reported-by: Pattara Teerapong Cc: David Stevens Cc: Yiwei Zhang Cc: Paul Hsia Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230916003916.2545000-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/kvm/mmu/mmu.c | 13 +--- arch/x86/kvm/mmu/mmu_internal.h | 15 +++-- arch/x86/kvm/mmu/tdp_mmu.c | 133 +++++++++++++++++----------------------- arch/x86/kvm/mmu/tdp_mmu.h | 2 +- arch/x86/kvm/x86.c | 5 +- 6 files changed, 68 insertions(+), 103 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a4def36d5bb..17715cb8731d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1419,7 +1419,6 @@ struct kvm_arch { * the thread holds the MMU lock in write mode. */ spinlock_t tdp_mmu_pages_lock; - struct workqueue_struct *tdp_mmu_zap_wq; #endif /* CONFIG_X86_64 */ /* @@ -1835,7 +1834,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -int kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 54f94f644b42..f7901cb4d2fa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6167,20 +6167,15 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -int kvm_mmu_init_vm(struct kvm *kvm) +void kvm_mmu_init_vm(struct kvm *kvm) { - int r; - INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (tdp_mmu_enabled) { - r = kvm_mmu_init_tdp_mmu(kvm); - if (r < 0) - return r; - } + if (tdp_mmu_enabled) + kvm_mmu_init_tdp_mmu(kvm); kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6189,8 +6184,6 @@ int kvm_mmu_init_vm(struct kvm *kvm) kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; - - return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index b102014e2c60..decc1f153669 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -58,7 +58,12 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; - u8 mmu_valid_gen; + union { + u8 mmu_valid_gen; + + /* Only accessed under slots_lock. */ + bool tdp_mmu_scheduled_root_to_zap; + }; /* * The shadow page can't be replaced by an equivalent huge page @@ -100,13 +105,7 @@ struct kvm_mmu_page { struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ tdp_ptep_t ptep; }; - union { - DECLARE_BITMAP(unsync_child_bitmap, 512); - struct { - struct work_struct tdp_mmu_async_work; - void *tdp_mmu_async_data; - }; - }; + DECLARE_BITMAP(unsync_child_bitmap, 512); /* * Tracks shadow pages that, if zapped, would allow KVM to create an NX diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index aa90901d2871..6cd4dd631a2f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -12,18 +12,10 @@ #include /* Initializes the TDP MMU for the VM, if enabled. */ -int kvm_mmu_init_tdp_mmu(struct kvm *kvm) +void kvm_mmu_init_tdp_mmu(struct kvm *kvm) { - struct workqueue_struct *wq; - - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); - if (!wq) - return -ENOMEM; - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - kvm->arch.tdp_mmu_zap_wq = wq; - return 1; } /* Arbitrarily returns true so that this may be used in if statements. */ @@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) * ultimately frees all roots. */ kvm_tdp_mmu_invalidate_all_roots(kvm); - - /* - * Destroying a workqueue also first flushes the workqueue, i.e. no - * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). - */ - destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); + kvm_tdp_mmu_zap_invalidated_roots(kvm); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* * Ensure that all the outstanding RCU callbacks to free shadow pages - * can run before the VM is torn down. Work items on tdp_mmu_zap_wq - * can call kvm_tdp_mmu_put_root and create new callbacks. + * can run before the VM is torn down. Putting the last reference to + * zapped roots will create new callbacks. */ rcu_barrier(); } @@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) tdp_mmu_free_sp(sp); } -static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared); - -static void tdp_mmu_zap_root_work(struct work_struct *work) -{ - struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page, - tdp_mmu_async_work); - struct kvm *kvm = root->tdp_mmu_async_data; - - read_lock(&kvm->mmu_lock); - - /* - * A TLB flush is not necessary as KVM performs a local TLB flush when - * allocating a new root (see kvm_mmu_load()), and when migrating vCPU - * to a different pCPU. Note, the local TLB flush on reuse also - * invalidates any paging-structure-cache entries, i.e. TLB entries for - * intermediate paging structures, that may be zapped, as such entries - * are associated with the ASID on both VMX and SVM. - */ - tdp_mmu_zap_root(kvm, root, true); - - /* - * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for - * avoiding an infinite loop. By design, the root is reachable while - * it's being asynchronously zapped, thus a different task can put its - * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an - * asynchronously zapped root is unavoidable. - */ - kvm_tdp_mmu_put_root(kvm, root, true); - - read_unlock(&kvm->mmu_lock); -} - -static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root) -{ - root->tdp_mmu_async_data = kvm; - INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work); - queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); -} - void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { @@ -211,11 +158,11 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, false, false); \ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \ _root; \ - _root = tdp_mmu_next_root(_kvm, _root, false, false)) \ - if (!kvm_lockdep_assert_mmu_lock_held(_kvm, false)) { \ + _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \ + if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \ } else /* @@ -296,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) * by a memslot update or by the destruction of the VM. Initialize the * refcount to two; one reference for the vCPU, and one reference for * the TDP MMU itself, which is held until the root is invalidated and - * is ultimately put by tdp_mmu_zap_root_work(). + * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots(). */ refcount_set(&root->tdp_mmu_root_count, 2); @@ -885,7 +832,7 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root) + for_each_tdp_mmu_root_yield_safe(kvm, root, false) flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; @@ -907,7 +854,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ - for_each_tdp_mmu_root_yield_safe(kvm, root) + for_each_tdp_mmu_root_yield_safe(kvm, root, false) tdp_mmu_zap_root(kvm, root, false); } @@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); + struct kvm_mmu_page *root; + + read_lock(&kvm->mmu_lock); + + for_each_tdp_mmu_root_yield_safe(kvm, root, true) { + if (!root->tdp_mmu_scheduled_root_to_zap) + continue; + + root->tdp_mmu_scheduled_root_to_zap = false; + KVM_BUG_ON(!root->role.invalid, kvm); + + /* + * A TLB flush is not necessary as KVM performs a local TLB + * flush when allocating a new root (see kvm_mmu_load()), and + * when migrating a vCPU to a different pCPU. Note, the local + * TLB flush on reuse also invalidates paging-structure-cache + * entries, i.e. TLB entries for intermediate paging structures, + * that may be zapped, as such entries are associated with the + * ASID on both VMX and SVM. + */ + tdp_mmu_zap_root(kvm, root, true); + + /* + * The referenced needs to be put *after* zapping the root, as + * the root must be reachable by mmu_notifiers while it's being + * zapped + */ + kvm_tdp_mmu_put_root(kvm, root, true); + } + + read_unlock(&kvm->mmu_lock); } /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual - * zapping is performed asynchronously. Using a separate workqueue makes it - * easy to ensure that the destruction is performed before the "fast zap" - * completes, without keeping a separate list of invalidated roots; the list is - * effectively the list of work items in the workqueue. + * zapping is done separately so that it happens with mmu_lock with read, + * whereas invalidating roots must be done with mmu_lock held for write (unless + * the VM is being destroyed). * - * Note, the asynchronous worker is gifted the TDP MMU's reference. + * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference. * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) @@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) /* * As above, mmu_lock isn't held when destroying the VM! There can't * be other references to @kvm, i.e. nothing else can invalidate roots - * or be consuming roots, but walking the list of roots does need to be - * guarded against roots being deleted by the asynchronous zap worker. + * or get/put references to roots. */ - rcu_read_lock(); - - list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { + list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { + /* + * Note, invalid roots can outlive a memslot update! Invalid + * roots must be *zapped* before the memslot update completes, + * but a different task can acquire a reference and keep the + * root alive after its been zapped. + */ if (!root->role.invalid) { + root->tdp_mmu_scheduled_root_to_zap = true; root->role.invalid = true; - tdp_mmu_schedule_zap_root(kvm, root); } } - - rcu_read_unlock(); } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index bc088953f929..733a3aef3a96 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -7,7 +7,7 @@ #include "spte.h" -int kvm_mmu_init_tdp_mmu(struct kvm *kvm); +void kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c9c81e82e65..9f18b06bbda6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12308,9 +12308,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out; - ret = kvm_mmu_init_vm(kvm); - if (ret) - goto out_page_track; + kvm_mmu_init_vm(kvm); ret = static_call(kvm_x86_vm_init)(kvm); if (ret) @@ -12355,7 +12353,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) out_uninit_mmu: kvm_mmu_uninit_vm(kvm); -out_page_track: kvm_page_track_cleanup(kvm); out: return ret; -- cgit From e8d93d5d93f85949e7299be289c6e7e1154b2f78 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Sep 2023 17:06:34 -0400 Subject: KVM: SVM: INTERCEPT_RDTSCP is never intercepted anyway svm_recalc_instruction_intercepts() is always called at least once before the vCPU is started, so the setting or clearing of the RDTSCP intercept can be dropped from the TSC_AUX virtualization support. Extracted from a patch by Tom Lendacky. Cc: stable@vger.kernel.org Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b9a0a939d59f..fa1fb81323b5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3027,11 +3027,8 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) { + guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); - if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) - svm_clr_intercept(svm, INTERCEPT_RDTSCP); - } } void sev_init_vmcb(struct vcpu_svm *svm) -- cgit From e0096d01c4fcb8c96c05643cfc2c20ab78eae4da Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 15 Sep 2023 15:54:30 -0500 Subject: KVM: SVM: Fix TSC_AUX virtualization setup The checks for virtualizing TSC_AUX occur during the vCPU reset processing path. However, at the time of initial vCPU reset processing, when the vCPU is first created, not all of the guest CPUID information has been set. In this case the RDTSCP and RDPID feature support for the guest is not in place and so TSC_AUX virtualization is not established. This continues for each vCPU created for the guest. On the first boot of an AP, vCPU reset processing is executed as a result of an APIC INIT event, this time with all of the guest CPUID information set, resulting in TSC_AUX virtualization being enabled, but only for the APs. The BSP always sees a TSC_AUX value of 0 which probably went unnoticed because, at least for Linux, the BSP TSC_AUX value is 0. Move the TSC_AUX virtualization enablement out of the init_vmcb() path and into the vcpu_after_set_cpuid() path to allow for proper initialization of the support after the guest CPUID information has been set. With the TSC_AUX virtualization support now in the vcpu_set_after_cpuid() path, the intercepts must be either cleared or set based on the guest CPUID input. Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts") Signed-off-by: Tom Lendacky Message-Id: <4137fbcb9008951ab5f0befa74a0399d2cce809a.1694811272.git.thomas.lendacky@amd.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 31 ++++++++++++++++++++++++++----- arch/x86/kvm/svm/svm.c | 9 ++------- arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fa1fb81323b5..4900c078045a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2962,6 +2962,32 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } +static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { + bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + + set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); + } +} + +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_cpuid_entry2 *best; + + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + best = kvm_find_cpuid_entry(vcpu, 0x8000001F); + if (best) + vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_vcpu_after_set_cpuid(svm); +} + static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb01.ptr; @@ -3024,11 +3050,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); - - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && - (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) - set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); } void sev_init_vmcb(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f283eb47f6ac..aef1ddf0b705 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4284,7 +4284,6 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_cpuid_entry2 *best; /* * SVM doesn't provide a way to disable just XSAVES in the guest, KVM @@ -4328,12 +4327,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0, !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); - /* For sev guests, the memory encryption bit is not reserved in CR3. */ - if (sev_guest(vcpu->kvm)) { - best = kvm_find_cpuid_entry(vcpu, 0x8000001F); - if (best) - vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); - } + if (sev_guest(vcpu->kvm)) + sev_vcpu_after_set_cpuid(svm); init_vmcb_after_set_cpuid(vcpu); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f41253958357..be67ab7fdd10 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -684,6 +684,7 @@ void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); void sev_init_vmcb(struct vcpu_svm *svm); +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -- cgit From 916e3e5f26abc165437950daff370c0693572ef4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 15 Sep 2023 15:54:32 -0500 Subject: KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX When the TSC_AUX MSR is virtualized, the TSC_AUX value is swap type "B" within the VMSA. This means that the guest value is loaded on VMRUN and the host value is restored from the host save area on #VMEXIT. Since the value is restored on #VMEXIT, the KVM user return MSR support for TSC_AUX can be replaced by populating the host save area with the current host value of TSC_AUX. And, since TSC_AUX is not changed by Linux post-boot, the host save area can be set once in svm_hardware_enable(). This eliminates the two WRMSR instructions associated with the user return MSR support. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index aef1ddf0b705..9507df93f410 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -683,6 +683,21 @@ static int svm_hardware_enable(void) amd_pmu_enable_virt(); + /* + * If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type + * "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests. + * Since Linux does not change the value of TSC_AUX once set, prime the + * TSC_AUX field now to avoid a RDMSR on every vCPU run. + */ + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { + struct sev_es_save_area *hostsa; + u32 msr_hi; + + hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); + + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi); + } + return 0; } @@ -1532,7 +1547,14 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); - if (likely(tsc_aux_uret_slot >= 0)) + /* + * TSC_AUX is always virtualized for SEV-ES guests when the feature is + * available. The user return MSR support is not required in this case + * because TSC_AUX is restored on #VMEXIT from the host save area + * (which has been initialized in svm_hardware_enable()). + */ + if (likely(tsc_aux_uret_slot >= 0) && + (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); svm->guest_state_loaded = true; @@ -3086,6 +3108,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: + /* + * TSC_AUX is always virtualized for SEV-ES guests when the + * feature is available. The user return MSR support is not + * required in this case because TSC_AUX is restored on #VMEXIT + * from the host save area (which has been initialized in + * svm_hardware_enable()). + */ + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) + break; + /* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX instead of exposing it to the -- cgit From eb72d5207008db54c659fd34f341672decc306ae Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Sep 2023 13:03:20 +0200 Subject: mfd: cs42l43: Use correct macro for new-style PM runtime ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code was accidentally mixing new and old style macros, update the macros used to remove an unused function warning whilst building with no PM enabled in the config. Fixes: ace6d1448138 ("mfd: cs42l43: Add support for cs42l43 core driver") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/all/20230822114914.340359-1-ckeepax@opensource.cirrus.com/ Reviewed-by: Nathan Chancellor Tested-by: Geert Uytterhoeven Acked-by: Lee Jones Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Torvalds --- drivers/mfd/cs42l43.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/cs42l43.c b/drivers/mfd/cs42l43.c index 37b23e9bae82..7b6d07cbe6fc 100644 --- a/drivers/mfd/cs42l43.c +++ b/drivers/mfd/cs42l43.c @@ -1178,8 +1178,8 @@ err: } EXPORT_NS_GPL_DEV_PM_OPS(cs42l43_pm_ops, MFD_CS42L43) = { - SET_SYSTEM_SLEEP_PM_OPS(cs42l43_suspend, cs42l43_resume) - SET_RUNTIME_PM_OPS(cs42l43_runtime_suspend, cs42l43_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(cs42l43_suspend, cs42l43_resume) + RUNTIME_PM_OPS(cs42l43_runtime_suspend, cs42l43_runtime_resume, NULL) }; MODULE_DESCRIPTION("CS42L43 Core Driver"); -- cgit From 053d7dcd3440fa953ef4ca08de8e0a33d23d3b29 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:37 -0700 Subject: fbdev: mmp: Annotate struct mmphw_ctrl with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmphw_ctrl. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Helge Deller --- drivers/video/fbdev/mmp/hw/mmp_ctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h index 167585a889d3..719b99a9bc77 100644 --- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h +++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h @@ -1406,7 +1406,7 @@ struct mmphw_ctrl { /*pathes*/ int path_num; - struct mmphw_path_plat path_plats[]; + struct mmphw_path_plat path_plats[] __counted_by(path_num); }; static inline int overlay_is_vid(struct mmp_overlay *overlay) -- cgit From e34872523ca56b6a3ed7a5b06febdc66b4f16e3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:41 -0700 Subject: fbdev: mmp: Annotate struct mmp_path with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmp_path. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Signed-off-by: Helge Deller --- include/video/mmp_disp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/mmp_disp.h b/include/video/mmp_disp.h index 77252cb46361..a722dcbf5073 100644 --- a/include/video/mmp_disp.h +++ b/include/video/mmp_disp.h @@ -231,7 +231,7 @@ struct mmp_path { /* layers */ int overlay_num; - struct mmp_overlay overlays[]; + struct mmp_overlay overlays[] __counted_by(overlay_num); }; extern struct mmp_path *mmp_get_path(const char *name); -- cgit From c1a8d1d0edb71dec15c9649cb56866c71c1ecd9e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Sep 2023 19:04:21 +0800 Subject: fbdev: atyfb: only use ioremap_uc() on i386 and ia64 ioremap_uc() is only meaningful on old x86-32 systems with the PAT extension, and on ia64 with its slightly unconventional ioremap() behavior, everywhere else this is the same as ioremap() anyway. Change the only driver that still references ioremap_uc() to only do so on x86-32/ia64 in order to allow removing that interface at some point in the future for the other architectures. On some architectures, ioremap_uc() just returns NULL, changing the driver to call ioremap() means that they now have a chance of working correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Baoquan He Reviewed-by: Luis Chamberlain Cc: Helge Deller Cc: Thomas Zimmermann Cc: Christophe Leroy Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/atyfb_base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 5c87817a4f4c..3dcf83f5e7b4 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3440,11 +3440,15 @@ static int atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *info, } info->fix.mmio_start = raddr; +#if defined(__i386__) || defined(__ia64__) /* * By using strong UC we force the MTRR to never have an * effect on the MMIO region on both non-PAT and PAT systems. */ par->ati_regbase = ioremap_uc(info->fix.mmio_start, 0x1000); +#else + par->ati_regbase = ioremap(info->fix.mmio_start, 0x1000); +#endif if (par->ati_regbase == NULL) return -ENOMEM; -- cgit From d7f393430a17c2bfcdf805462a5aa80be4285b27 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2023 07:55:56 +0200 Subject: IB/mlx4: Fix the size of a buffer in add_port_entries() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to be sure that 'buff' is never truncated, its size should be 12, not 11. When building with W=1, this fixes the following warnings: drivers/infiniband/hw/mlx4/sysfs.c: In function ‘add_port_entries’: drivers/infiniband/hw/mlx4/sysfs.c:268:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 268 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:268:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 268 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/mlx4/sysfs.c:286:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 286 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:286:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 286 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ Fixes: c1e7e466120b ("IB/mlx4: Add iov directory in sysfs under the ib device") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/0bb1443eb47308bc9be30232cc23004c4d4cf43e.1695448530.git.christophe.jaillet@wanadoo.fr Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 24ee79aa2122..88f534cf690e 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; - char buff[11]; + char buff[12]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; -- cgit From 94adf495e733d3b7e8b826c452ba12e995eef7c7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 22 Sep 2023 18:46:49 +0200 Subject: x86/kgdb: Fix a kerneldoc warning when build with W=1 When compiled with W=1, the following warning is generated: arch/x86/kernel/kgdb.c:698: warning: Cannot understand * on line 698 - I thought it was a doc line Remove the corresponding empty comment line to fix the warning. Signed-off-by: Christophe JAILLET Signed-off-by: Ingo Molnar Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/aad659537c1d4ebd86912a6f0be458676c8e69af.1695401178.git.christophe.jaillet@wanadoo.fr --- arch/x86/kernel/kgdb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 3a43a2dee658..9c9faa1634fb 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -695,7 +695,6 @@ void kgdb_arch_exit(void) } /** - * * kgdb_skipexception - Bail out of KGDB when we've been triggered. * @exception: Exception vector number * @regs: Current &struct pt_regs. -- cgit From 9b8df572ba3f4e544366196820a719a40774433e Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 18 Sep 2023 13:24:09 +0100 Subject: irqchip: renesas-rzg2l: Fix logic to clear TINT interrupt source The logic to clear the TINT interrupt source in rzg2l_irqc_irq_disable() is wrong as the mask is correct only for LSB on the TSSR register. This issue is found when testing with two TINT interrupt sources. So fix the logic for all TINTs by using the macro TSSEL_SHIFT() to multiply tssr_offset with 8. Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Biju Das Tested-by: Claudiu Beznea Reviewed-by: Geert Uytterhoeven Reviewed-by: Claudiu Beznea Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230918122411.237635-2-biju.das.jz@bp.renesas.com --- drivers/irqchip/irq-renesas-rzg2l.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 4bbfa2b0a4df..2cee5477be6b 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -118,7 +118,7 @@ static void rzg2l_irqc_irq_disable(struct irq_data *d) raw_spin_lock(&priv->lock); reg = readl_relaxed(priv->base + TSSR(tssr_index)); - reg &= ~(TSSEL_MASK << tssr_offset); + reg &= ~(TSSEL_MASK << TSSEL_SHIFT(tssr_offset)); writel_relaxed(reg, priv->base + TSSR(tssr_index)); raw_spin_unlock(&priv->lock); } -- cgit From b739681b3f8b2a7a684a71ddd048b9b6b5400011 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 19 Aug 2023 05:50:01 -0500 Subject: arm64: dts: imx8mp: Fix SDMA2/3 clocks Commit 16c984524862 ("arm64: dts: imx8mp: don't initialize audio clocks from CCM node") removed the Audio clocks from the main clock node, because the intent is to force people to setup the audio PLL clocks per board instead of having a common set of rates, since not all boards may use the various audio PLL clocks in the same way. Unfortunately, with this parenting removed, the SDMA2 and SDMA3 clocks were slowed to 24MHz because the SDMA2/3 clocks are controlled via the audio_blk_ctrl which is clocked from IMX8MP_CLK_AUDIO_ROOT, and that clock is enabled by pgc_audio. Per the TRM, "The SDMA2/3 target frequency is 400MHz IPG and 400MHz AHB, always 1:1 mode, to make sure there is enough throughput for all the audio use cases." Instead of cluttering the clock node, place the clock rate and parent information into the pgc_audio node. With the parenting and clock rates restored for IMX8MP_CLK_AUDIO_AHB, and IMX8MP_CLK_AUDIO_AXI_SRC, it appears the SDMA2 and SDMA3 run at 400MHz again. Fixes: 16c984524862 ("arm64: dts: imx8mp: don't initialize audio clocks from CCM node") Signed-off-by: Adam Ford Reviewed-by: Lucas Stach Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 6f2f50e1639c..83d907294fbc 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -790,6 +790,12 @@ reg = ; clocks = <&clk IMX8MP_CLK_AUDIO_ROOT>, <&clk IMX8MP_CLK_AUDIO_AXI>; + assigned-clocks = <&clk IMX8MP_CLK_AUDIO_AHB>, + <&clk IMX8MP_CLK_AUDIO_AXI_SRC>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <400000000>, + <600000000>; }; pgc_gpu2d: power-domain@6 { -- cgit From 161af16c18f3e10d81870328928e5fff3a7d47bb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 19 Aug 2023 05:50:02 -0500 Subject: arm64: dts: imx8mp-beacon-kit: Fix audio_pll2 clock Commit 16c984524862 ("arm64: dts: imx8mp: don't initialize audio clocks from CCM node") removed the Audio clocks from the main clock node, because the intent is to force people to setup the audio PLL clocks per board instead of having a common set of rates since not all boards may use the various audio PLL clocks for audio devices. This resulted in an incorrect clock rate when attempting to playback audio, since the AUDIO_PLL2 wasn't set any longer. Fix this by setting the AUDIO_PLL2 rate inside the SAI3 node since it's the SAI3 that needs it. Fixes: 16c984524862 ("arm64: dts: imx8mp: don't initialize audio clocks from CCM node") Signed-off-by: Adam Ford Reviewed-by: Lucas Stach Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts index 06e91297fb16..acd265d8b58e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts @@ -381,9 +381,10 @@ &sai3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai3>; - assigned-clocks = <&clk IMX8MP_CLK_SAI3>; + assigned-clocks = <&clk IMX8MP_CLK_SAI3>, + <&clk IMX8MP_AUDIO_PLL2> ; assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; - assigned-clock-rates = <12288000>; + assigned-clock-rates = <12288000>, <361267200>; fsl,sai-mclk-direction-output; status = "okay"; }; -- cgit From 9d1e8275a28f51599d754ce661c91e0a689c0234 Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Mon, 14 Aug 2023 01:57:00 +0000 Subject: soc: imx8m: Enable OCOTP clock for imx8mm before reading registers Commit 836fb30949d9 ("soc: imx8m: Enable OCOTP clock before reading the register") added configuration to enable the OCOTP clock before attempting to read from the associated registers. This same kexec issue is present with the imx8m SoCs that use the imx8mm_soc_uid function (e.g. imx8mp). This requires the imx8mm_soc_uid function to configure the OCOTP clock before accessing the associated registers. This change implements the same clock enable functionality that is present in the imx8mq_soc_revision function for the imx8mm_soc_uid function. Signed-off-by: Nathan Rossi Reviewed-by: Fabio Estevam Fixes: 836fb30949d9 ("soc: imx8m: Enable OCOTP clock before reading the register") Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx8m.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 1dcd243df567..ec87d9d878f3 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -100,6 +100,7 @@ static void __init imx8mm_soc_uid(void) { void __iomem *ocotp_base; struct device_node *np; + struct clk *clk; u32 offset = of_machine_is_compatible("fsl,imx8mp") ? IMX8MP_OCOTP_UID_OFFSET : 0; @@ -109,11 +110,20 @@ static void __init imx8mm_soc_uid(void) ocotp_base = of_iomap(np, 0); WARN_ON(!ocotp_base); + clk = of_clk_get_by_name(np, NULL); + if (IS_ERR(clk)) { + WARN_ON(IS_ERR(clk)); + return; + } + + clk_prepare_enable(clk); soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH + offset); soc_uid <<= 32; soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW + offset); + clk_disable_unprepare(clk); + clk_put(clk); iounmap(ocotp_base); of_node_put(np); } -- cgit From efa97aed071e0607b15ee08ddb1b7d775b664352 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 14 Aug 2023 16:11:47 +0800 Subject: arm64: dts: imx8mm-evk: Fix hdmi@3d node The hdmi@3d node's compatible string is "adi,adv7535" instead of "adi,adv7533" or "adi,adv751*". Fix the hdmi@3d node by means of: * Use default register addresses for "cec", "edid" and "packet", because there is no need to use a non-default address map. * Add missing interrupt related properties. * Drop "adi,input-*" properties which are only valid for adv751*. * Add VDDEXT_3V3 fixed regulator * Add "*-supply" properties, since most are required. * Fix label names - s/adv7533/adv7535/. Fixes: a27335b3f1e0 ("arm64: dts: imx8mm-evk: Add HDMI support") Signed-off-by: Liu Ying Tested-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi | 32 +++++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi index e31ab8b4f54f..a882c86ec313 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi @@ -26,7 +26,7 @@ port { hdmi_connector_in: endpoint { - remote-endpoint = <&adv7533_out>; + remote-endpoint = <&adv7535_out>; }; }; }; @@ -72,6 +72,13 @@ enable-active-high; }; + reg_vddext_3v3: regulator-vddext-3v3 { + compatible = "regulator-fixed"; + regulator-name = "VDDEXT_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + backlight: backlight { compatible = "pwm-backlight"; pwms = <&pwm1 0 5000000 0>; @@ -317,15 +324,16 @@ hdmi@3d { compatible = "adi,adv7535"; - reg = <0x3d>, <0x3c>, <0x3e>, <0x3f>; - reg-names = "main", "cec", "edid", "packet"; + reg = <0x3d>; + interrupt-parent = <&gpio1>; + interrupts = <9 IRQ_TYPE_EDGE_FALLING>; adi,dsi-lanes = <4>; - - adi,input-depth = <8>; - adi,input-colorspace = "rgb"; - adi,input-clock = "1x"; - adi,input-style = <1>; - adi,input-justification = "evenly"; + avdd-supply = <&buck5_reg>; + dvdd-supply = <&buck5_reg>; + pvdd-supply = <&buck5_reg>; + a2vdd-supply = <&buck5_reg>; + v3p3-supply = <®_vddext_3v3>; + v1p2-supply = <&buck5_reg>; ports { #address-cells = <1>; @@ -334,7 +342,7 @@ port@0 { reg = <0>; - adv7533_in: endpoint { + adv7535_in: endpoint { remote-endpoint = <&dsi_out>; }; }; @@ -342,7 +350,7 @@ port@1 { reg = <1>; - adv7533_out: endpoint { + adv7535_out: endpoint { remote-endpoint = <&hdmi_connector_in>; }; }; @@ -448,7 +456,7 @@ reg = <1>; dsi_out: endpoint { - remote-endpoint = <&adv7533_in>; + remote-endpoint = <&adv7535_in>; data-lanes = <1 2 3 4>; }; }; -- cgit From 6465e260f48790807eef06b583b38ca9789b6072 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Sep 2023 14:31:13 -0700 Subject: Linux 6.6-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57698d048e2c..3de08c780c74 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 537c013b140d373d1ffe6290b841dc00e67effaa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 24 Sep 2023 08:35:53 -0700 Subject: xfs: fix reloading entire unlinked bucket lists During review of the patcheset that provided reloading of the incore iunlink list, Dave made a few suggestions, and I updated the copy in my dev tree. Unfortunately, I then got distracted by ... who even knows what ... and forgot to backport those changes from my dev tree to my release candidate branch. I then sent multiple pull requests with stale patches, and that's what was merged into -rc3. So. This patch re-adds the use of an unlocked iunlink list check to determine if we want to allocate the resources to recreate the incore list. Since lost iunlinked inodes are supposed to be rare, this change helps us avoid paying the transaction and AGF locking costs every time we open any inode. This also re-adds the shutdowns on failure, and re-applies the restructuring of the inner loop in xfs_inode_reload_unlinked_bucket, and re-adds a requested comment about the quotachecking code. Retain the original RVB tag from Dave since there's no code change from the last submission. Fixes: 68b957f64fca1 ("xfs: load uncached unlinked inodes into memory on demand") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_export.c | 16 ++++++++++++---- fs/xfs/xfs_inode.c | 48 +++++++++++++++++++++++++++++++++++------------- fs/xfs/xfs_itable.c | 2 ++ fs/xfs/xfs_qm.c | 15 ++++++++++++--- 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index f71ea786a6d2..7cd09c3a82cb 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -146,10 +146,18 @@ xfs_nfs_get_inode( return ERR_PTR(error); } - error = xfs_inode_reload_unlinked(ip); - if (error) { - xfs_irele(ip); - return ERR_PTR(error); + /* + * Reload the incore unlinked list to avoid failure in inodegc. + * Use an unlocked check here because unrecovered unlinked inodes + * should be somewhat rare. + */ + if (xfs_inode_unlinked_incomplete(ip)) { + error = xfs_inode_reload_unlinked(ip); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + xfs_irele(ip); + return ERR_PTR(error); + } } if (VFS_I(ip)->i_generation != generation) { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f94f7b374041..4d55f58d99b7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1743,6 +1743,14 @@ xfs_inactive( truncate = 1; if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { + /* + * If this inode is being inactivated during a quotacheck and + * has not yet been scanned by quotacheck, we /must/ remove + * the dquots from the inode before inactivation changes the + * block and inode counts. Most probably this is a result of + * reloading the incore iunlinked list to purge unrecovered + * unlinked inodes. + */ xfs_qm_dqdetach(ip); } else { error = xfs_qm_dqattach(ip); @@ -3641,6 +3649,16 @@ xfs_inode_reload_unlinked_bucket( if (error) return error; + /* + * We've taken ILOCK_SHARED and the AGI buffer lock to stabilize the + * incore unlinked list pointers for this inode. Check once more to + * see if we raced with anyone else to reload the unlinked list. + */ + if (!xfs_inode_unlinked_incomplete(ip)) { + foundit = true; + goto out_agibp; + } + bucket = agino % XFS_AGI_UNLINKED_BUCKETS; agi = agibp->b_addr; @@ -3655,25 +3673,27 @@ xfs_inode_reload_unlinked_bucket( while (next_agino != NULLAGINO) { struct xfs_inode *next_ip = NULL; + /* Found this caller's inode, set its backlink. */ if (next_agino == agino) { - /* Found this inode, set its backlink. */ next_ip = ip; next_ip->i_prev_unlinked = prev_agino; foundit = true; + goto next_inode; } - if (!next_ip) { - /* Inode already in memory. */ - next_ip = xfs_iunlink_lookup(pag, next_agino); - } - if (!next_ip) { - /* Inode not in memory, reload. */ - error = xfs_iunlink_reload_next(tp, agibp, prev_agino, - next_agino); - if (error) - break; - next_ip = xfs_iunlink_lookup(pag, next_agino); - } + /* Try in-memory lookup first. */ + next_ip = xfs_iunlink_lookup(pag, next_agino); + if (next_ip) + goto next_inode; + + /* Inode not in memory, try reloading it. */ + error = xfs_iunlink_reload_next(tp, agibp, prev_agino, + next_agino); + if (error) + break; + + /* Grab the reloaded inode. */ + next_ip = xfs_iunlink_lookup(pag, next_agino); if (!next_ip) { /* No incore inode at all? We reloaded it... */ ASSERT(next_ip != NULL); @@ -3681,10 +3701,12 @@ xfs_inode_reload_unlinked_bucket( break; } +next_inode: prev_agino = next_agino; next_agino = next_ip->i_next_unlinked; } +out_agibp: xfs_trans_brelse(tp, agibp); /* Should have found this inode somewhere in the iunlinked bucket. */ if (!error && !foundit) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index ccf0c4ff4490..f5377ba5967a 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -80,10 +80,12 @@ xfs_bulkstat_one_int( if (error) goto out; + /* Reload the incore unlinked list to avoid failure in inodegc. */ if (xfs_inode_unlinked_incomplete(ip)) { error = xfs_inode_reload_unlinked_bucket(tp, ip); if (error) { xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_irele(ip); return error; } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7256090c3895..086e78a6143a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1160,9 +1160,18 @@ xfs_qm_dqusage_adjust( if (error) return error; - error = xfs_inode_reload_unlinked(ip); - if (error) - goto error0; + /* + * Reload the incore unlinked list to avoid failure in inodegc. + * Use an unlocked check here because unrecovered unlinked inodes + * should be somewhat rare. + */ + if (xfs_inode_unlinked_incomplete(ip)) { + error = xfs_inode_reload_unlinked(ip); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + goto error0; + } + } ASSERT(ip->i_delayed_blks == 0); -- cgit From f09752eaf0e8f8befc26b44c4d3e15633e56d16a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 11 Sep 2023 16:45:37 -0500 Subject: arm64: dts: imx: Add imx8mm-prt8mm.dtb to build imx8mm-prt8mm.dts was not getting built. Add it to the build. Fixes: 58497d7a13ed ("arm64: dts: imx: add Protonic PRT8MM board") Signed-off-by: Rob Herring Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile index c6872b7e9471..89aee6c92576 100644 --- a/arch/arm64/boot/dts/freescale/Makefile +++ b/arch/arm64/boot/dts/freescale/Makefile @@ -66,6 +66,7 @@ dtb-$(CONFIG_ARCH_MXC) += imx8mm-mx8menlo.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-nitrogen-r2.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-phg.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-phyboard-polis-rdk.dtb +dtb-$(CONFIG_ARCH_MXC) += imx8mm-prt8mm.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-tqma8mqml-mba8mx.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-var-som-symphony.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-venice-gw71xx-0x.dtb -- cgit From f5d19bbdb5289de632cf4ac8ace809c4648389ea Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 23 Sep 2023 16:26:19 -0300 Subject: dt-bindings: i2c: mxs: Pass ref and 'unevaluatedProperties: false' Running 'make dtbs_check DT_SCHEMA_FILES=i2c-mxs.yaml' throws several schema warnings such as: imx28-m28evk.dtb: i2c@80058000: '#address-cells', '#size-cells', 'codec@a', 'eeprom@51', 'rtc@68' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/i2c/i2c-mxs.yaml# Fix these warnings by passing a reference to i2c-controller.yaml# and using 'unevaluatedProperties: false' just like the yaml bindings of other I2C controllers. Signed-off-by: Fabio Estevam Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-mxs.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml b/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml index 21ae7bce038e..171a41407241 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml @@ -9,6 +9,9 @@ title: Freescale MXS Inter IC (I2C) Controller maintainers: - Shawn Guo +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + properties: compatible: enum: @@ -37,7 +40,7 @@ required: - dmas - dma-names -additionalProperties: false +unevaluatedProperties: false examples: - | -- cgit From b13e59e74ff71a1004e0508107e91e9a84fd7388 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 23 Sep 2023 23:54:06 +0200 Subject: i2c: mux: Avoid potential false error message in i2c_mux_add_adapter I2C_CLASS_DEPRECATED is a flag and not an actual class. There's nothing speaking against both, parent and child, having I2C_CLASS_DEPRECATED set. Therefore exclude it from the check. Signed-off-by: Heiner Kallweit Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 313904be5f3b..57ff09f18c37 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -341,7 +341,7 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, priv->adap.lock_ops = &i2c_parent_lock_ops; /* Sanity check on class */ - if (i2c_mux_parent_classes(parent) & class) + if (i2c_mux_parent_classes(parent) & class & ~I2C_CLASS_DEPRECATED) dev_err(&parent->dev, "Segment %d behind mux can't share classes with ancestors\n", chan_id); -- cgit From f87ef5723536a6545ed9c43e18b13a9faceb3c80 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 1 Sep 2023 12:58:23 +0200 Subject: KVM: s390: fix gisa destroy operation might lead to cpu stalls A GISA cannot be destroyed as long it is linked in the GIB alert list as this would break the alert list. Just waiting for its removal from the list triggered by another vm is not sufficient as it might be the only vm. The below shown cpu stall situation might occur when GIB alerts are delayed and is fixed by calling process_gib_alert_list() instead of waiting. At this time the vcpus of the vm are already destroyed and thus no vcpu can be kicked to enter the SIE again if for some reason an interrupt is pending for that vm. Additionally the IAM restore value is set to 0x00. That would be a bug introduced by incomplete device de-registration, i.e. missing kvm_s390_gisc_unregister() call. Setting this value and the IAM in the GISA to 0x00 guarantees that late interrupts don't bring the GISA back into the alert list. CPU stall caused by kvm_s390_gisa_destroy(): [ 4915.311372] rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 14-.... } 24533 jiffies s: 5269 root: 0x1/. [ 4915.311390] rcu: blocking rcu_node structures (internal RCU debug): l=1:0-15:0x4000/. [ 4915.311394] Task dump for CPU 14: [ 4915.311395] task:qemu-system-s39 state:R running task stack:0 pid:217198 ppid:1 flags:0x00000045 [ 4915.311399] Call Trace: [ 4915.311401] [<0000038003a33a10>] 0x38003a33a10 [ 4933.861321] rcu: INFO: rcu_sched self-detected stall on CPU [ 4933.861332] rcu: 14-....: (42008 ticks this GP) idle=53f4/1/0x4000000000000000 softirq=61530/61530 fqs=14031 [ 4933.861353] rcu: (t=42008 jiffies g=238109 q=100360 ncpus=18) [ 4933.861357] CPU: 14 PID: 217198 Comm: qemu-system-s39 Not tainted 6.5.0-20230816.rc6.git26.a9d17c5d8813.300.fc38.s390x #1 [ 4933.861360] Hardware name: IBM 8561 T01 703 (LPAR) [ 4933.861361] Krnl PSW : 0704e00180000000 000003ff804bfc66 (kvm_s390_gisa_destroy+0x3e/0xe0 [kvm]) [ 4933.861414] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 [ 4933.861416] Krnl GPRS: 0000000000000000 00000372000000fc 00000002134f8000 000000000d5f5900 [ 4933.861419] 00000002f5ea1d18 00000002f5ea1d18 0000000000000000 0000000000000000 [ 4933.861420] 00000002134fa890 00000002134f8958 000000000d5f5900 00000002134f8000 [ 4933.861422] 000003ffa06acf98 000003ffa06858b0 0000038003a33c20 0000038003a33bc8 [ 4933.861430] Krnl Code: 000003ff804bfc58: ec66002b007e cij %r6,0,6,000003ff804bfcae 000003ff804bfc5e: b904003a lgr %r3,%r10 #000003ff804bfc62: a7f40005 brc 15,000003ff804bfc6c >000003ff804bfc66: e330b7300204 lg %r3,10032(%r11) 000003ff804bfc6c: 58003000 l %r0,0(%r3) 000003ff804bfc70: ec03fffb6076 crj %r0,%r3,6,000003ff804bfc66 000003ff804bfc76: e320b7600271 lay %r2,10080(%r11) 000003ff804bfc7c: c0e5fffea339 brasl %r14,000003ff804942ee [ 4933.861444] Call Trace: [ 4933.861445] [<000003ff804bfc66>] kvm_s390_gisa_destroy+0x3e/0xe0 [kvm] [ 4933.861460] ([<00000002623523de>] free_unref_page+0xee/0x148) [ 4933.861507] [<000003ff804aea98>] kvm_arch_destroy_vm+0x50/0x120 [kvm] [ 4933.861521] [<000003ff8049d374>] kvm_destroy_vm+0x174/0x288 [kvm] [ 4933.861532] [<000003ff8049d4fe>] kvm_vm_release+0x36/0x48 [kvm] [ 4933.861542] [<00000002623cd04a>] __fput+0xea/0x2a8 [ 4933.861547] [<00000002620d5bf8>] task_work_run+0x88/0xf0 [ 4933.861551] [<00000002620b0aa6>] do_exit+0x2c6/0x528 [ 4933.861556] [<00000002620b0f00>] do_group_exit+0x40/0xb8 [ 4933.861557] [<00000002620b0fa6>] __s390x_sys_exit_group+0x2e/0x30 [ 4933.861559] [<0000000262d481f4>] __do_syscall+0x1d4/0x200 [ 4933.861563] [<0000000262d59028>] system_call+0x70/0x98 [ 4933.861565] Last Breaking-Event-Address: [ 4933.861566] [<0000038003a33b60>] 0x38003a33b60 Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Michael Mueller Reviewed-by: Matthew Rosato Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20230901105823.3973928-1-mimu@linux.ibm.com Message-ID: <20230901105823.3973928-1-mimu@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/interrupt.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c1b47d608a2b..efaebba5ee19 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) return 0; } -static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) -{ - return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa); -} - static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); @@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) if (!gi->origin) return; - if (gi->alert.mask) - KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", - kvm, gi->alert.mask); - while (gisa_in_alert_list(gi->origin)) - cpu_relax(); + WARN(gi->alert.mask != 0x00, + "unexpected non zero alert.mask 0x%02x", + gi->alert.mask); + gi->alert.mask = 0x00; + if (gisa_set_iam(gi->origin, gi->alert.mask)) + process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); -- cgit From 31db78a4923ef5e2008f2eed321811ca79e7f71b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2023 08:34:15 +0200 Subject: wifi: mac80211: fix potential key use-after-free When ieee80211_key_link() is called by ieee80211_gtk_rekey_add() but returns 0 due to KRACK protection (identical key reinstall), ieee80211_gtk_rekey_add() will still return a pointer into the key, in a potential use-after-free. This normally doesn't happen since it's only called by iwlwifi in case of WoWLAN rekey offload which has its own KRACK protection, but still better to fix, do that by returning an error code and converting that to success on the cfg80211 boundary only, leaving the error for bad callers of ieee80211_gtk_rekey_add(). Reported-by: Dan Carpenter Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +++ net/mac80211/key.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 45e7a5d9c7d9..e883c41a2163 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -566,6 +566,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } err = ieee80211_key_link(key, link, sta); + /* KRACK protection, shouldn't happen but just silently accept key */ + if (err == -EALREADY) + err = 0; out_unlock: mutex_unlock(&local->sta_mtx); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 13050dc9321f..84ba20c3e3dc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -905,7 +905,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); - ret = 0; + ret = -EALREADY; goto out; } -- cgit From d097ae01ebd48adc028aebcf760117a5317975dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2023 08:34:16 +0200 Subject: wifi: mac80211: fix potential key leak When returning from ieee80211_key_link(), the key needs to have been freed or successfully installed. This was missed in a number of error paths, fix it. Signed-off-by: Johannes Berg --- net/mac80211/key.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 84ba20c3e3dc..0665ff5e456e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -802,6 +802,9 @@ static void ieee80211_key_destroy(struct ieee80211_key *key, void ieee80211_key_free_unused(struct ieee80211_key *key) { + if (!key) + return; + WARN_ON(key->sdata || key->local); ieee80211_key_free_common(key); } @@ -854,7 +857,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * can cause warnings to appear. */ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; - int ret = -EOPNOTSUPP; + int ret; mutex_lock(&sdata->local->key_mtx); @@ -868,8 +871,10 @@ int ieee80211_key_link(struct ieee80211_key *key, * the same cipher. Enforce the assumption for pairwise keys. */ if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || - (old_key && old_key->conf.cipher != key->conf.cipher)) + (old_key && old_key->conf.cipher != key->conf.cipher)) { + ret = -EOPNOTSUPP; goto out; + } } else if (sta) { struct link_sta_info *link_sta = &sta->deflink; int link_id = key->conf.link_id; @@ -895,8 +900,10 @@ int ieee80211_key_link(struct ieee80211_key *key, /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { - if (old_key && old_key->conf.cipher != key->conf.cipher) + if (old_key && old_key->conf.cipher != key->conf.cipher) { + ret = -EOPNOTSUPP; goto out; + } } /* @@ -904,9 +911,8 @@ int ieee80211_key_link(struct ieee80211_key *key, * new version of the key to avoid nonce reuse or replay issues. */ if (ieee80211_key_identical(sdata, old_key, key)) { - ieee80211_key_free_unused(key); ret = -EALREADY; - goto out; + goto unlock; } key->local = sdata->local; @@ -930,7 +936,11 @@ int ieee80211_key_link(struct ieee80211_key *key, ieee80211_key_free(key, delay_tailroom); } + key = NULL; + out: + ieee80211_key_free_unused(key); + unlock: mutex_unlock(&sdata->local->key_mtx); return ret; -- cgit From 0914468adf92296c4cba8a2134e06e3dea150f2e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 18 Sep 2023 14:10:54 +0300 Subject: wifi: cfg80211: Fix 6GHz scan configuration When the scan request includes a non broadcast BSSID, when adding the scan parameters for 6GHz collocated scanning, do not include entries that do not match the given BSSID. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230918140607.6d31d2a96baf.I6c4e3e3075d1d1878ee41f45190fdc6b86f18708@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0cf1ce7b6934..939deecf0bbe 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -908,6 +908,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) !cfg80211_find_ssid_match(ap, request)) continue; + if (!is_broadcast_ether_addr(request->bssid) && + !ether_addr_equal(request->bssid, ap->bssid)) + continue; + if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) continue; -- cgit From 084cf2aeca97566db4fa15d55653c1cba2db83ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Sep 2023 14:10:55 +0300 Subject: wifi: mac80211: work around Cisco AP 9115 VHT MPDU length Cisco AP module 9115 with FW 17.3 has a bug and sends a too large maximum MPDU length in the association response (indicating 12k) that it cannot actually process. Work around that by taking the minimum between what's in the association response and the BSS elements (from beacon or probe response). Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230918140607.d1966a9a532e.I090225babb7cd4d1081ee9acd40e7de7e41c15ae@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 ++- net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh_plink.c | 2 +- net/mac80211/mlme.c | 27 +++++++++++++++++++++++++-- net/mac80211/vht.c | 16 ++++++++++++++-- 6 files changed, 44 insertions(+), 7 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e883c41a2163..0e3a1753a51c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1860,7 +1860,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - params->vht_capa, link_sta); + params->vht_capa, NULL, + link_sta); if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index e1900077bc4b..5542c93edfba 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1072,7 +1072,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, &chandef); memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - &cap_ie, + &cap_ie, NULL, &sta->deflink); if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) rates_updated |= true; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b3d00259e1d6..98ef1fe1226e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2141,6 +2141,7 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_vht_cap *vht_cap_ie, + const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f3d5bb0a59f1..a1e526419e9d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -451,7 +451,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, changed |= IEEE80211_RC_BW_CHANGED; ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - elems->vht_cap_elem, + elems->vht_cap_elem, NULL, &sta->deflink); ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 46d46cfab6c8..0e61eb5a29d1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4202,10 +4202,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, elems->ht_cap_elem, link_sta); - if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) + if (elems->vht_cap_elem && + !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) { + const struct ieee80211_vht_cap *bss_vht_cap = NULL; + const struct cfg80211_bss_ies *ies; + + /* + * Cisco AP module 9115 with FW 17.3 has a bug and sends a + * too large maximum MPDU length in the association response + * (indicating 12k) that it cannot actually process ... + * Work around that. + */ + rcu_read_lock(); + ies = rcu_dereference(cbss->ies); + if (ies) { + const struct element *elem; + + elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, + ies->data, ies->len); + if (elem && elem->datalen >= sizeof(*bss_vht_cap)) + bss_vht_cap = (const void *)elem->data; + } + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, elems->vht_cap_elem, - link_sta); + bss_vht_cap, link_sta); + rcu_read_unlock(); + } if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && elems->he_cap) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index c1250aa47808..b3a5c3e96a72 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #include @@ -116,12 +116,14 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_vht_cap *vht_cap_ie, + const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta) { struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; bool have_80mhz; + u32 mpdu_len; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -317,11 +319,21 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + /* + * Work around the Cisco 9115 FW 17.3 bug by taking the min of + * both reported MPDU lengths. + */ + mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK; + if (vht_cap_ie2) + mpdu_len = min_t(u32, mpdu_len, + le32_get_bits(vht_cap_ie2->vht_cap_info, + IEEE80211_VHT_CAP_MAX_MPDU_MASK)); + /* * FIXME - should the amsdu len be per link? store per link * and maintain a minimum? */ - switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { + switch (mpdu_len) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; -- cgit From 9c1b2429c18424759818e16e0767361a535529a8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 22 Sep 2023 15:22:06 +0200 Subject: accel/ivpu: Add Arrow Lake pci id Enable VPU on Arrow Lake CPUs. Reviewed-by: Krystian Pradzynski Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230922132206.812817-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 1 + drivers/accel/ivpu/ivpu_drv.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ba79f397c9e8..aa7314fdbc0f 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -634,6 +634,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, { } }; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9e8c075fe9ef..03b3d6532fb6 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -23,6 +23,7 @@ #define DRIVER_DATE "20230117" #define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d #define PCI_DEVICE_ID_LNL 0x643e #define IVPU_HW_37XX 37 @@ -165,6 +166,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev) { switch (ivpu_device_id(vdev)) { case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: return IVPU_HW_37XX; case PCI_DEVICE_ID_LNL: return IVPU_HW_40XX; -- cgit From 52bb69be6790bafbbbf57b714445704e82d4a97a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 15 Sep 2023 15:16:20 -0500 Subject: dt-bindings: ata: pata-common: Add missing additionalProperties on child nodes The PATA child node schema is missing constraints to prevent unknown properties. As none of the users of this common binding extend the child nodes with additional properties, adding "additionalProperties: false" here is sufficient. Signed-off-by: Rob Herring Acked-by: Conor Dooley Signed-off-by: Damien Le Moal --- Documentation/devicetree/bindings/ata/pata-common.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/ata/pata-common.yaml b/Documentation/devicetree/bindings/ata/pata-common.yaml index 337ddf1113c4..4e867dd4d402 100644 --- a/Documentation/devicetree/bindings/ata/pata-common.yaml +++ b/Documentation/devicetree/bindings/ata/pata-common.yaml @@ -38,6 +38,7 @@ patternProperties: ID number 0 and the slave drive will have ID number 1. The PATA port nodes will be named "ide-port". type: object + additionalProperties: false properties: reg: -- cgit From 3ef600923521616ebe192c893468ad0424de2afb Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 18 Sep 2023 22:24:50 +0200 Subject: ata: libata-scsi: ignore reserved bits for REPORT SUPPORTED OPERATION CODES For REPORT SUPPORTED OPERATION CODES command, the service action field is defined as bits 0-4 in the second byte in the CDB. Bits 5-7 in the second byte are reserved. Only look at the service action field in the second byte when determining if the MAINTENANCE IN opcode is a REPORT SUPPORTED OPERATION CODES command. This matches how we only look at the service action field in the second byte when determining if the SERVICE ACTION IN(16) opcode is a READ CAPACITY(16) command (reserved bits 5-7 in the second byte are ignored). Fixes: 7b2030942859 ("libata: Add support for SCT Write Same") Cc: stable@vger.kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d3f28b82c97b..fb73c145b49a 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4312,7 +4312,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) break; case MAINTENANCE_IN: - if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES) + if ((scsicmd[1] & 0x1f) == MI_REPORT_SUPPORTED_OPERATION_CODES) ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in); else ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); -- cgit From fbf5892df21a8ccfcb2fda0fd65bc3169c89ed28 Mon Sep 17 00:00:00 2001 From: Martin Nybo Andersen Date: Fri, 15 Sep 2023 12:15:39 +0200 Subject: kbuild: Use CRC32 and a 1MiB dictionary for XZ compressed modules Kmod is now (since kmod commit 09c9f8c5df04 ("libkmod: Use kernel decompression when available")) using the kernel decompressor, when loading compressed modules. However, the kernel XZ decompressor is XZ Embedded, which doesn't handle CRC64 and dictionaries larger than 1MiB. Use CRC32 and 1MiB dictionary when XZ compressing and installing kernel modules. Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1050582 Signed-off-by: Martin Nybo Andersen Signed-off-by: Masahiro Yamada --- scripts/Makefile.modinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 346f5ec50682..0afd75472679 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -144,7 +144,7 @@ endif quiet_cmd_gzip = GZIP $@ cmd_gzip = $(KGZIP) -n -f $< quiet_cmd_xz = XZ $@ - cmd_xz = $(XZ) --lzma2=dict=2MiB -f $< + cmd_xz = $(XZ) --check=crc32 --lzma2=dict=1MiB -f $< quiet_cmd_zstd = ZSTD $@ cmd_zstd = $(ZSTD) -T0 --rm -f -q $< -- cgit From 28d49e171676afb7df7f47798391364af9abed7f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 17 Sep 2023 21:19:59 +0200 Subject: Documentation: kbuild: explain handling optional dependencies This problem frequently comes up in randconfig testing, with drivers failing to link because of a dependency on an optional feature. The Kconfig language for this is very confusing, so try to document it in "Kconfig hints" section. Reviewed-by: Javier Martinez Canillas Reviewed-by: Sakari Ailus Reviewed-by: Nicolas Schier Signed-off-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst index 858ed5d80def..0135905c0aa3 100644 --- a/Documentation/kbuild/kconfig-language.rst +++ b/Documentation/kbuild/kconfig-language.rst @@ -573,6 +573,32 @@ above, leading to: bool "Support for foo hardware" depends on ARCH_FOO_VENDOR || COMPILE_TEST +Optional dependencies +~~~~~~~~~~~~~~~~~~~~~ + +Some drivers are able to optionally use a feature from another module +or build cleanly with that module disabled, but cause a link failure +when trying to use that loadable module from a built-in driver. + +The most common way to express this optional dependency in Kconfig logic +uses the slightly counterintuitive:: + + config FOO + tristate "Support for foo hardware" + depends on BAR || !BAR + +This means that there is either a dependency on BAR that disallows +the combination of FOO=y with BAR=m, or BAR is completely disabled. +For a more formalized approach if there are multiple drivers that have +the same dependency, a helper symbol can be used, like:: + + config FOO + tristate "Support for foo hardware" + depends on BAR_OPTIONAL + + config BAR_OPTIONAL + def_tristate BAR || !BAR + Kconfig recursive dependency limitations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- cgit From 753a4d531bc518633ea88ac0ed02b25a16823d51 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 22 Sep 2023 22:55:16 +0200 Subject: ata: libata-sata: increase PMP SRST timeout to 10s On certain SATA controllers, softreset fails after wakeup from S2RAM with the message "softreset failed (1st FIS failed)", sometimes resulting in drives not being detected again. With the increased timeout, this issue is avoided. Instead, "softreset failed (device not ready)" is now logged 1-2 times; this later failure seems to cause fewer problems however, and the drives are detected reliably once they've spun up and the probe is retried. The issue was observed with the primary SATA controller of the QNAP TS-453B, which is an "Intel Corporation Celeron/Pentium Silver Processor SATA Controller [8086:31e3] (rev 06)" integrated in the Celeron J4125 CPU, and the following drives: - Seagate IronWolf ST12000VN0008 - Seagate IronWolf ST8000NE0004 The SATA controller seems to be more relevant to this issue than the drives, as the same drives are always detected reliably on the secondary SATA controller on the same board (an ASMedia 106x) without any "softreset failed" errors even without the increased timeout. Fixes: e7d3ef13d52a ("libata: change drive ready wait after hard reset to 5s") Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index bf4913f4d7ac..84aca8c44fa3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -259,7 +259,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that -- cgit From 61304336c67358d49a989e5e0060d8c99bad6ca8 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 1 Aug 2023 02:47:51 -0400 Subject: wifi: mac80211: allow transmitting EAPOL frames with tainted key Lower layer device driver stop/wake TX by calling ieee80211_stop_queue()/ ieee80211_wake_queue() while hw scan. Sometimes hw scan and PTK rekey are running in parallel, when M4 sent from wpa_supplicant arrive while the TX queue is stopped, then the M4 will pending send, and then new key install from wpa_supplicant. After TX queue wake up by lower layer device driver, the M4 will be dropped by below call stack. When key install started, the current key flag is set KEY_FLAG_TAINTED in ieee80211_pairwise_rekey(), and then mac80211 wait key install complete by lower layer device driver. Meanwhile ieee80211_tx_h_select_key() will return TX_DROP for the M4 in step 12 below, and then ieee80211_free_txskb() called by ieee80211_tx_dequeue(), so the M4 will not send and free, then the rekey process failed becaue AP not receive M4. Please see details in steps below. There are a interval between KEY_FLAG_TAINTED set for current key flag and install key complete by lower layer device driver, the KEY_FLAG_TAINTED is set in this interval, all packet including M4 will be dropped in this interval, the interval is step 8~13 as below. issue steps: TX thread install key thread 1. stop_queue -idle- 2. sending M4 -idle- 3. M4 pending -idle- 4. -idle- starting install key from wpa_supplicant 5. -idle- =>ieee80211_key_replace() 6. -idle- =>ieee80211_pairwise_rekey() and set currently key->flags |= KEY_FLAG_TAINTED 7. -idle- =>ieee80211_key_enable_hw_accel() 8. -idle- =>drv_set_key() and waiting key install complete from lower layer device driver 9. wake_queue -waiting state- 10. re-sending M4 -waiting state- 11. =>ieee80211_tx_h_select_key() -waiting state- 12. drop M4 by KEY_FLAG_TAINTED -waiting state- 13. -idle- install key complete with success/fail success: clear flag KEY_FLAG_TAINTED fail: start disconnect Hence add check in step 11 above to allow the EAPOL send out in the interval. If lower layer device driver use the old key/cipher to encrypt the M4, then AP received/decrypt M4 correctly, after M4 send out, lower layer device driver install the new key/cipher to hardware and return success. If lower layer device driver use new key/cipher to send the M4, then AP will/should drop the M4, then it is same result with this issue, AP will/ should kick out station as well as this issue. issue log: kworker/u16:4-5238 [000] 6456.108926: stop_queue: phy1 queue:0, reason:0 wpa_supplicant-961 [003] 6456.119737: rdev_tx_control_port: wiphy_name=phy1 name=wlan0 ifindex=6 dest=ARRAY[9e, 05, 31, 20, 9b, d0] proto=36488 unencrypted=0 wpa_supplicant-961 [003] 6456.119839: rdev_return_int_cookie: phy1, returned 0, cookie: 504 wpa_supplicant-961 [003] 6456.120287: rdev_add_key: phy1, netdev:wlan0(6), key_index: 0, mode: 0, pairwise: true, mac addr: 9e:05:31:20:9b:d0 wpa_supplicant-961 [003] 6456.120453: drv_set_key: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 cipher:0xfac04, flags=0x9, keyidx=0, hw_key_idx=0 kworker/u16:9-3829 [001] 6456.168240: wake_queue: phy1 queue:0, reason:0 kworker/u16:9-3829 [001] 6456.168255: drv_wake_tx_queue: phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 ac:0 tid:7 kworker/u16:9-3829 [001] 6456.168305: cfg80211_control_port_tx_status: wdev(1), cookie: 504, ack: false wpa_supplicant-961 [003] 6459.167982: drv_return_int: phy1 - -110 issue call stack: nl80211_frame_tx_status+0x230/0x340 [cfg80211] cfg80211_control_port_tx_status+0x1c/0x28 [cfg80211] ieee80211_report_used_skb+0x374/0x3e8 [mac80211] ieee80211_free_txskb+0x24/0x40 [mac80211] ieee80211_tx_dequeue+0x644/0x954 [mac80211] ath10k_mac_tx_push_txq+0xac/0x238 [ath10k_core] ath10k_mac_op_wake_tx_queue+0xac/0xe0 [ath10k_core] drv_wake_tx_queue+0x80/0x168 [mac80211] __ieee80211_wake_txqs+0xe8/0x1c8 [mac80211] _ieee80211_wake_txqs+0xb4/0x120 [mac80211] ieee80211_wake_txqs+0x48/0x80 [mac80211] tasklet_action_common+0xa8/0x254 tasklet_action+0x2c/0x38 __do_softirq+0xdc/0x384 Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20230801064751.25803-1-quic_wgong@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7fe7280e8437..d45d4be63dd8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -665,7 +665,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED && - !ieee80211_is_deauth(hdr->frame_control))) + !ieee80211_is_deauth(hdr->frame_control)) && + tx->skb->protocol != tx->sdata->control_port_protocol) return TX_DROP; if (!skip_hw && tx->key && -- cgit From 0e4cac557531a4c93de108d9ff11329fcad482ff Mon Sep 17 00:00:00 2001 From: Ricky WU Date: Wed, 20 Sep 2023 09:11:19 +0000 Subject: misc: rtsx: Fix some platforms can not boot and move the l1ss judgment to probe commit 101bd907b424 ("misc: rtsx: judge ASPM Mode to set PETXCFG Reg") some readers no longer force #CLKREQ to low when the system need to enter ASPM. But some platform maybe not implement complete ASPM? it causes some platforms can not boot Like in the past only the platform support L1ss we release the #CLKREQ. Move the judgment (L1ss) to probe, we think read config space one time when the driver start is enough Fixes: 101bd907b424 ("misc: rtsx: judge ASPM Mode to set PETXCFG Reg") Cc: stable Reported-by: Paul Grandperrin Signed-off-by: Ricky Wu Tested-By: Jade Lovelace Link: https://lore.kernel.org/r/37b1afb997f14946a8784c73d1f9a4f5@realtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5227.c | 55 ++++-------------------------------- drivers/misc/cardreader/rts5228.c | 57 ++++++++++++-------------------------- drivers/misc/cardreader/rts5249.c | 56 +++++-------------------------------- drivers/misc/cardreader/rts5260.c | 43 +++++++++------------------- drivers/misc/cardreader/rts5261.c | 52 +++++++++------------------------- drivers/misc/cardreader/rtsx_pcr.c | 51 ++++++++++++++++++++++++++++++---- 6 files changed, 102 insertions(+), 212 deletions(-) diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 3dae5e3a1697..cd512284bfb3 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -83,63 +83,20 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) static void rts5227_init_from_cfg(struct rtsx_pcr *pcr) { - struct pci_dev *pdev = pcr->pci; - int l1ss; - u32 lval; struct rtsx_cr_option *option = &pcr->option; - l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); - if (!l1ss) - return; - - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval); - if (CHK_PCI_PID(pcr, 0x522A)) { - if (0 == (lval & 0x0F)) - rtsx_pci_enable_oobs_polling(pcr); - else + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) rtsx_pci_disable_oobs_polling(pcr); + else + rtsx_pci_enable_oobs_polling(pcr); } - if (lval & PCI_L1SS_CTL1_ASPM_L1_1) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_2) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); - else - rtsx_clear_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, PM_L1_2_EN); - if (option->ltr_en) { - u16 val; - - pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; + if (option->ltr_enabled) rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } } - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; - } static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) @@ -195,7 +152,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) } } - if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG) + if (option->force_clkreq_0) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); else diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c index f4ab09439da7..0c7f10bcf6f1 100644 --- a/drivers/misc/cardreader/rts5228.c +++ b/drivers/misc/cardreader/rts5228.c @@ -386,59 +386,25 @@ static void rts5228_process_ocp(struct rtsx_pcr *pcr) static void rts5228_init_from_cfg(struct rtsx_pcr *pcr) { - struct pci_dev *pdev = pcr->pci; - int l1ss; - u32 lval; struct rtsx_cr_option *option = &pcr->option; - l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); - if (!l1ss) - return; - - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval); - - if (0 == (lval & 0x0F)) - rtsx_pci_enable_oobs_polling(pcr); - else + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) rtsx_pci_disable_oobs_polling(pcr); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_1) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_2) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); else - rtsx_clear_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, PM_L1_2_EN); + rtsx_pci_enable_oobs_polling(pcr); rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0); - if (option->ltr_en) { - u16 val; - pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; + if (option->ltr_en) { + if (option->ltr_enabled) rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } } } static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) { + struct rtsx_cr_option *option = &pcr->option; rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1, CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); @@ -469,6 +435,17 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) else rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); if (pcr->rtd3_en) { diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index 47ab72a43256..6c81040e18be 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -86,64 +86,22 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) { - struct pci_dev *pdev = pcr->pci; - int l1ss; struct rtsx_cr_option *option = &(pcr->option); - u32 lval; - - l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); - if (!l1ss) - return; - - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval); if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) { - if (0 == (lval & 0x0F)) - rtsx_pci_enable_oobs_polling(pcr); - else + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) rtsx_pci_disable_oobs_polling(pcr); + else + rtsx_pci_enable_oobs_polling(pcr); } - - if (lval & PCI_L1SS_CTL1_ASPM_L1_1) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_2) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); - if (option->ltr_en) { - u16 val; - - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; + if (option->ltr_enabled) rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } } } -static int rts5249_init_from_hw(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &(pcr->option); - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; - - return 0; -} - static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime) { /* Set relink_time to 0 */ @@ -276,7 +234,6 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) struct rtsx_cr_option *option = &(pcr->option); rts5249_init_from_cfg(pcr); - rts5249_init_from_hw(pcr); rtsx_pci_init_cmd(pcr); @@ -327,11 +284,12 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) } } + /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced * to drive low, and we forcibly request clock. */ - if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG) + if (option->force_clkreq_0) rtsx_pci_write_register(pcr, PETXCFG, FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); else diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 79b18f6f73a8..d2d3a6ccb8f7 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -480,47 +480,19 @@ static void rts5260_pwr_saving_setting(struct rtsx_pcr *pcr) static void rts5260_init_from_cfg(struct rtsx_pcr *pcr) { - struct pci_dev *pdev = pcr->pci; - int l1ss; struct rtsx_cr_option *option = &pcr->option; - u32 lval; - - l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); - if (!l1ss) - return; - - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_1) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_2) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); rts5260_pwr_saving_setting(pcr); if (option->ltr_en) { - u16 val; - - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; + if (option->ltr_enabled) rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } } } static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) { + struct rtsx_cr_option *option = &pcr->option; /* Set mcu_cnt to 7 to ensure data can be sampled properly */ rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07); @@ -539,6 +511,17 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) rts5260_init_hw(pcr); + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); return 0; diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index 94af6bf8a25a..67252512a132 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -454,54 +454,17 @@ static void rts5261_init_from_hw(struct rtsx_pcr *pcr) static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) { - struct pci_dev *pdev = pcr->pci; - int l1ss; - u32 lval; struct rtsx_cr_option *option = &pcr->option; - l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); - if (!l1ss) - return; - - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, &lval); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_1) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_ASPM_L1_2) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); - else - rtsx_clear_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); - else - rtsx_clear_dev_flag(pcr, PM_L1_2_EN); - - rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0); if (option->ltr_en) { - u16 val; - - pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; + if (option->ltr_enabled) rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } } } static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) { + struct rtsx_cr_option *option = &pcr->option; u32 val; rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, @@ -547,6 +510,17 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) else rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB); if (pcr->rtd3_en) { diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index a3f4b52bb159..a30751ad3733 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1326,11 +1326,8 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) return err; } - if (pcr->aspm_mode == ASPM_MODE_REG) { + if (pcr->aspm_mode == ASPM_MODE_REG) rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30); - rtsx_pci_write_register(pcr, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); - } /* No CD interrupt if probing driver with card inserted. * So we need to initialize pcr->card_exist here. @@ -1345,7 +1342,9 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) { - int err; + struct rtsx_cr_option *option = &(pcr->option); + int err, l1ss; + u32 lval; u16 cfg_val; u8 val; @@ -1430,6 +1429,48 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) pcr->aspm_enabled = true; } + l1ss = pci_find_ext_capability(pcr->pci, PCI_EXT_CAP_ID_L1SS); + if (l1ss) { + pci_read_config_dword(pcr->pci, l1ss + PCI_L1SS_CTL1, &lval); + + if (lval & PCI_L1SS_CTL1_ASPM_L1_1) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & PCI_L1SS_CTL1_ASPM_L1_2) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PCI_L1SS_CTL1_PCIPM_L1_1) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PCI_L1SS_CTL1_PCIPM_L1_2) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_2_EN); + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cfg_val); + if (cfg_val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + } else { + option->ltr_enabled = false; + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; + } else { + option->ltr_enabled = false; + option->force_clkreq_0 = true; + } + if (pcr->ops->fetch_vendor_settings) pcr->ops->fetch_vendor_settings(pcr); -- cgit From 599522d9d2e19d6240e4312577f1c5f3ffca22f6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 14 Sep 2023 19:58:40 +0530 Subject: perf/x86/amd: Do not WARN() on every IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zen 4 systems running buggy microcode can hit a WARN_ON() in the PMI handler, as shown below, several times while perf runs. A simple `perf top` run is enough to render the system unusable: WARNING: CPU: 18 PID: 20608 at arch/x86/events/amd/core.c:944 amd_pmu_v2_handle_irq+0x1be/0x2b0 This happens because the Performance Counter Global Status Register (PerfCntGlobalStatus) has one or more bits set which are considered reserved according to the "AMD64 Architecture Programmer’s Manual, Volume 2: System Programming, 24593": https://www.amd.com/system/files/TechDocs/24593.pdf To make this less intrusive, warn just once if any reserved bit is set and prompt the user to update the microcode. Also sanitize the value to what the code is handling, so that the overflow events continue to be handled for the number of counters that are known to be sane. Going forward, the following microcode patch levels are recommended for Zen 4 processors in order to avoid such issues with reserved bits: Family=0x19 Model=0x11 Stepping=0x01: Patch=0x0a10113e Family=0x19 Model=0x11 Stepping=0x02: Patch=0x0a10123e Family=0x19 Model=0xa0 Stepping=0x01: Patch=0x0aa00116 Family=0x19 Model=0xa0 Stepping=0x02: Patch=0x0aa00212 Commit f2eb058afc57 ("linux-firmware: Update AMD cpu microcode") from the linux-firmware tree has binaries that meet the minimum required patch levels. [ sandipan: - add message to prompt users to update microcode - rework commit message and call out required microcode levels ] Fixes: 7685665c390d ("perf/x86/amd/core: Add PerfMonV2 overflow handling") Reported-by: Jirka Hladky Signed-off-by: Breno Leitao Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/3540f985652f41041e54ee82aa53e7dbd55739ae.1694696888.git.sandipan.das@amd.com/ --- arch/x86/events/amd/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index ed626bfa1eed..e24976593a29 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -886,7 +886,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) struct hw_perf_event *hwc; struct perf_event *event; int handled = 0, idx; - u64 status, mask; + u64 reserved, status, mask; bool pmu_enabled; /* @@ -911,6 +911,14 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) status &= ~GLOBAL_STATUS_LBRS_FROZEN; } + reserved = status & ~amd_pmu_global_cntr_mask; + if (reserved) + pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n", + reserved); + + /* Clear any reserved bits set by buggy microcode */ + status &= amd_pmu_global_cntr_mask; + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; -- cgit From b07eba71a512eb196cbcc29765c29c8c29b11b59 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 19 Aug 2023 16:14:43 +0800 Subject: iommu/mediatek: Fix share pgtable for iova over 4GB In mt8192/mt8186, there is only one MM IOMMU that supports 16GB iova space, which is shared by display, vcodec and camera. These two SoC use one pgtable and have not the flag SHARE_PGTABLE, we should also keep share pgtable for this case. In mtk_iommu_domain_finalise, MM IOMMU always share pgtable, thus remove the flag SHARE_PGTABLE checking. Infra IOMMU always uses independent pgtable. Fixes: cf69ef46dbd9 ("iommu/mediatek: Fix two IOMMU share pagetable issue") Reported-by: Laura Nao Closes: https://lore.kernel.org/linux-iommu/20230818154156.314742-1-laura.nao@collabora.com/ Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Tested-by: Laura Nao Link: https://lore.kernel.org/r/20230819081443.8333-1-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 640275873a27..fab6c347ce57 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -262,7 +262,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; - struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */ + struct mtk_iommu_domain *share_dom; struct regmap *pericfg; struct mutex mutex; /* Protect m4u_group/m4u_dom above */ @@ -643,8 +643,8 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, struct mtk_iommu_domain *share_dom = data->share_dom; const struct mtk_iommu_iova_region *region; - /* Always use share domain in sharing pgtable case */ - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) { + /* Share pgtable when 2 MM IOMMU share the pgtable or one IOMMU use multiple iova ranges */ + if (share_dom) { dom->iop = share_dom->iop; dom->cfg = share_dom->cfg; dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap; @@ -677,8 +677,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) - data->share_dom = dom; + data->share_dom = dom; update_iova_region: /* Update the iova region for this domain */ -- cgit From c7bd8a1f45bada7725d11266df7fd5cb549b3098 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 22 Sep 2023 23:55:23 +0900 Subject: iommu/apple-dart: Handle DMA_FQ domains in attach_dev() Commit a4fdd9762272 ("iommu: Use flush queue capability") hid the IOMMU_DOMAIN_DMA_FQ domain type from domain allocation. A check was introduced in iommu_dma_init_domain() to fall back if not supported, but this check runs too late: by that point, devices have been attached to the IOMMU, and apple-dart's attach_dev() callback does not expect IOMMU_DOMAIN_DMA_FQ domains. Change the logic so the IOMMU_DOMAIN_DMA codepath is the default, instead of explicitly enumerating all types. Fixes an apple-dart regression in v6.5. Cc: regressions@lists.linux.dev Cc: stable@vger.kernel.org Suggested-by: Robin Murphy Fixes: a4fdd9762272 ("iommu: Use flush queue capability") Signed-off-by: Hector Martin Reviewed-by: Neal Gompa Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230922-iommu-type-regression-v2-1-689b2ba9b673@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 2082081402d3..0b8927508427 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -671,8 +671,7 @@ static int apple_dart_attach_dev(struct iommu_domain *domain, return ret; switch (domain->type) { - case IOMMU_DOMAIN_DMA: - case IOMMU_DOMAIN_UNMANAGED: + default: ret = apple_dart_domain_add_streams(dart_domain, cfg); if (ret) return ret; -- cgit From 2d1b3bbc3dd56fc8364350eb93e5d67a05cb2c23 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Sep 2023 00:21:35 -0600 Subject: ovl: disable IOCB_DIO_CALLER_COMP overlayfs copies the kiocb flags when it sets up a new kiocb to handle a write, but it doesn't properly support dealing with the deferred caller completions of the kiocb. This means it doesn't get the final write completion value, and hence will complete the write with '0' as the result. We could support the caller completions in overlayfs, but for now let's just disable them in the generated write kiocb. Reported-by: Zorro Lang Link: https://lore.kernel.org/io-uring/20230924142754.ejwsjen5pvyc32l4@dell-per750-06-vm-08.rhts.eng.pek2.redhat.com/ Fixes: 8c052fb3002e ("iomap: support IOCB_DIO_CALLER_COMP") Signed-off-by: Jens Axboe Message-Id: <71897125-e570-46ce-946a-d4729725e28f@kernel.dk> Signed-off-by: Christian Brauner --- fs/overlayfs/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 3b4cc633d763..ca18db79daf4 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -393,6 +393,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); + /* + * Overlayfs doesn't support deferred completions, don't copy + * this property in case it is set by the issuer. + */ + ifl &= ~IOCB_DIO_CALLER_COMP; + old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { file_start_write(real.file); -- cgit From ef8f8f04a0b25e8f294b24350e8463a8d6a9ba0b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Sep 2023 09:06:56 +0200 Subject: MIPS: Alchemy: only build mmc support helpers if au1xmmc is enabled While commit d4a5c59a955b ("mmc: au1xmmc: force non-modular build and remove symbol_get usage") to be built in, it can still build a kernel without MMC support and thuse no mmc_detect_change symbol at all. Add ifdefs to build the mmc support code in the alchemy arch code conditional on mmc support. Fixes: d4a5c59a955b ("mmc: au1xmmc: force non-modular build and remove symbol_get usage") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/devboards/db1000.c | 4 ++++ arch/mips/alchemy/devboards/db1200.c | 6 ++++++ arch/mips/alchemy/devboards/db1300.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c index 012da042d0a4..7b9f91db227f 100644 --- a/arch/mips/alchemy/devboards/db1000.c +++ b/arch/mips/alchemy/devboards/db1000.c @@ -164,6 +164,7 @@ static struct platform_device db1x00_audio_dev = { /******************************************************************************/ +#ifdef CONFIG_MMC_AU1X static irqreturn_t db1100_mmc_cd(int irq, void *ptr) { mmc_detect_change(ptr, msecs_to_jiffies(500)); @@ -369,6 +370,7 @@ static struct platform_device db1100_mmc1_dev = { .num_resources = ARRAY_SIZE(au1100_mmc1_res), .resource = au1100_mmc1_res, }; +#endif /* CONFIG_MMC_AU1X */ /******************************************************************************/ @@ -440,8 +442,10 @@ static struct platform_device *db1x00_devs[] = { static struct platform_device *db1100_devs[] = { &au1100_lcd_device, +#ifdef CONFIG_MMC_AU1X &db1100_mmc0_dev, &db1100_mmc1_dev, +#endif }; int __init db1000_dev_setup(void) diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c index 76080c71a2a7..f521874ebb07 100644 --- a/arch/mips/alchemy/devboards/db1200.c +++ b/arch/mips/alchemy/devboards/db1200.c @@ -326,6 +326,7 @@ static struct platform_device db1200_ide_dev = { /**********************************************************************/ +#ifdef CONFIG_MMC_AU1X /* SD carddetects: they're supposed to be edge-triggered, but ack * doesn't seem to work (CPLD Rev 2). Instead, the screaming one * is disabled and its counterpart enabled. The 200ms timeout is @@ -584,6 +585,7 @@ static struct platform_device pb1200_mmc1_dev = { .num_resources = ARRAY_SIZE(au1200_mmc1_res), .resource = au1200_mmc1_res, }; +#endif /* CONFIG_MMC_AU1X */ /**********************************************************************/ @@ -751,7 +753,9 @@ static struct platform_device db1200_audiodma_dev = { static struct platform_device *db1200_devs[] __initdata = { NULL, /* PSC0, selected by S6.8 */ &db1200_ide_dev, +#ifdef CONFIG_MMC_AU1X &db1200_mmc0_dev, +#endif &au1200_lcd_dev, &db1200_eth_dev, &db1200_nand_dev, @@ -762,7 +766,9 @@ static struct platform_device *db1200_devs[] __initdata = { }; static struct platform_device *pb1200_devs[] __initdata = { +#ifdef CONFIG_MMC_AU1X &pb1200_mmc1_dev, +#endif }; /* Some peripheral base addresses differ on the PB1200 */ diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c index ff61901329c6..d377e043b49f 100644 --- a/arch/mips/alchemy/devboards/db1300.c +++ b/arch/mips/alchemy/devboards/db1300.c @@ -450,6 +450,7 @@ static struct platform_device db1300_ide_dev = { /**********************************************************************/ +#ifdef CONFIG_MMC_AU1X static irqreturn_t db1300_mmc_cd(int irq, void *ptr) { disable_irq_nosync(irq); @@ -632,6 +633,7 @@ static struct platform_device db1300_sd0_dev = { .resource = au1300_sd0_res, .num_resources = ARRAY_SIZE(au1300_sd0_res), }; +#endif /* CONFIG_MMC_AU1X */ /**********************************************************************/ @@ -767,8 +769,10 @@ static struct platform_device *db1300_dev[] __initdata = { &db1300_5waysw_dev, &db1300_nand_dev, &db1300_ide_dev, +#ifdef CONFIG_MMC_AU1X &db1300_sd0_dev, &db1300_sd1_dev, +#endif &db1300_lcd_dev, &db1300_ac97_dev, &db1300_i2s_dev, -- cgit From f4dcf06bc6e0161920b700ba3966411d716a321b Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 25 Sep 2023 16:08:44 +0800 Subject: ACPI: video: Fix NULL pointer dereference in acpi_video_bus_add() acpi_video_bus_add_notify_handler() could free video->input and set it to NULL on failure, but this failure would be missed in its caller acpi_video_bus_add(). As a result, when an error happens in acpi_dev_install_notify_handler(), acpi_video_bus_add() would call acpi_video_bus_remove_notify_handler(), where a potential NULL pointer video->input is dereferenced in input_unregister_device(). Fix this by adding a return value check and adjusting the following error handling code. Fixes: 6f7016819766 ("ACPI: video: Install Notify() handler directly") Signed-off-by: Dinghao Liu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 948e31f7ce6e..b411948594ff 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2057,7 +2057,9 @@ static int acpi_video_bus_add(struct acpi_device *device) !auto_detect) acpi_video_bus_register_backlight(video); - acpi_video_bus_add_notify_handler(video); + error = acpi_video_bus_add_notify_handler(video); + if (error) + goto err_del; error = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY, acpi_video_bus_notify); @@ -2067,10 +2069,11 @@ static int acpi_video_bus_add(struct acpi_device *device) return 0; err_remove: + acpi_video_bus_remove_notify_handler(video); +err_del: mutex_lock(&video_list_lock); list_del(&video->entry); mutex_unlock(&video_list_lock); - acpi_video_bus_remove_notify_handler(video); acpi_video_bus_unregister_backlight(video); err_put_video: acpi_video_bus_put_devices(video); -- cgit From 5c8a033f5674ae62d5aa2ebbdb9980b89380c34f Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 29 Aug 2023 19:16:19 +0200 Subject: dt-bindings: ASoC: rockchip: Add compatible for RK3128 spdif Add compatible for RK3128's S/PDIF. Signed-off-by: Alex Bee Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230829171647.187787-4-knaerzche@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/rockchip-spdif.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index 4f51b2fa82db..c3c989ef2a2c 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml @@ -26,6 +26,7 @@ properties: - const: rockchip,rk3568-spdif - items: - enum: + - rockchip,rk3128-spdif - rockchip,rk3188-spdif - rockchip,rk3288-spdif - rockchip,rk3308-spdif -- cgit From 197c53c8ecb34f2cd5922f4bdcffa8f701a134eb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 19 Sep 2023 17:42:13 +0800 Subject: ASoC: fsl_sai: Don't disable bitclock for i.MX8MP On i.MX8MP, the BCE and TERE bit are binding with mclk enablement, if BCE and TERE are cleared the MCLK also be disabled on output pin, that cause the external codec (wm8960) in wrong state. Codec (wm8960) is using the mclk to generate PLL clock, if mclk is disabled before disabling PLL, the codec (wm8960) won't generate bclk and frameclk when sysclk switch to MCLK source in next test case. The test case: $aplay -r44100 test1.wav (PLL source) $aplay -r48000 test2.wav (MCLK source) aplay: pcm_write:2127: write error: Input/output error Fixes: 269f399dc19f ("ASoC: fsl_sai: Disable bit clock with transmitter") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1695116533-23287-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 1e4020fae05a..8a9a30dd31e2 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -710,10 +710,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) { unsigned int ofs = sai->soc_data->reg_offset; bool tx = dir == TX; - u32 xcsr, count = 100; + u32 xcsr, count = 100, mask; + + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output) + mask = FSL_SAI_CSR_TERE; + else + mask = FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE; regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), - FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0); + mask, 0); /* TERE will remain set till the end of current frame */ do { -- cgit From 3b4e5194138b4576e7b703edcd85ffe8783df798 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 24 Sep 2023 15:39:04 -0300 Subject: dt-bindings: spi: fsl-imx-cspi: Document missing entries The imx25, imx50, imx51 and imx53 SPIs are compatible with the imx35. Document them accordingly. Signed-off-by: Fabio Estevam Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230924183904.752415-1-festevam@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml index 2f593c7225e5..14cac0e6e0a1 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml @@ -22,6 +22,13 @@ properties: - const: fsl,imx35-cspi - const: fsl,imx51-ecspi - const: fsl,imx53-ecspi + - items: + - enum: + - fsl,imx25-cspi + - fsl,imx50-cspi + - fsl,imx51-cspi + - fsl,imx53-cspi + - const: fsl,imx35-cspi - items: - const: fsl,imx8mp-ecspi - const: fsl,imx6ul-ecspi -- cgit From 493c71926c20309226b6d73f6b661a9813de5f0b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Sep 2023 13:08:52 +0200 Subject: ntfs3: put resources during ntfs_fill_super() During ntfs_fill_super() some resources are allocated that we need to cleanup in ->put_super() such as additional inodes. When ntfs_fill_super() fails these resources need to be cleaned up as well. Reported-by: syzbot+2751da923b5eb8307b0b@syzkaller.appspotmail.com Fixes: 78a06688a4d4 ("ntfs3: drop inode references in ntfs_put_super()") Signed-off-by: Christian Brauner --- fs/ntfs3/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cfec5e0c7f66..5661a363005e 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1562,6 +1562,7 @@ load_root: put_inode_out: iput(inode); out: + ntfs3_put_sbi(sbi); kfree(boot2); return err; } -- cgit From 03dbab3bba5f009d053635c729d1244f2c8bad38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 13 Sep 2023 09:33:12 -0400 Subject: overlayfs: set ctime when setting mtime and atime Nathan reported that he was seeing the new warning in setattr_copy_mgtime pop when starting podman containers. Overlayfs is trying to set the atime and mtime via notify_change without also setting the ctime. POSIX states that when the atime and mtime are updated via utimes() that we must also update the ctime to the current time. The situation with overlayfs copy-up is analogies, so add ATTR_CTIME to the bitmask. notify_change will fill in the value. Reported-by: Nathan Chancellor Signed-off-by: Jeff Layton Tested-by: Nathan Chancellor Acked-by: Christian Brauner Acked-by: Amir Goldstein Message-Id: <20230913-ctime-v1-1-c6bc509cbc27@kernel.org> Signed-off-by: Christian Brauner --- fs/overlayfs/copy_up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index bae404a1bad4..557e46bc4361 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -337,7 +337,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry, { struct iattr attr = { .ia_valid = - ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME, .ia_atime = stat->atime, .ia_mtime = stat->mtime, }; -- cgit From c153a4edff6ab01370fcac8e46f9c89cca1060c2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 20 Sep 2023 11:09:10 -0700 Subject: pinctrl: avoid unsafe code pattern in find_pinctrl() The code in find_pinctrl() takes a mutex and traverses a list of pinctrl structures. Later the caller bumps up reference count on the found structure. Such pattern is not safe as pinctrl that was found may get deleted before the caller gets around to increasing the reference count. Fix this by taking the reference count in find_pinctrl(), while it still holds the mutex. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/ZQs1RgTKg6VJqmPs@google.com Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e9dc9638120a..e2f7519bef04 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1022,17 +1022,20 @@ static int add_setting(struct pinctrl *p, struct pinctrl_dev *pctldev, static struct pinctrl *find_pinctrl(struct device *dev) { - struct pinctrl *p; + struct pinctrl *entry, *p = NULL; mutex_lock(&pinctrl_list_mutex); - list_for_each_entry(p, &pinctrl_list, node) - if (p->dev == dev) { - mutex_unlock(&pinctrl_list_mutex); - return p; + + list_for_each_entry(entry, &pinctrl_list, node) { + if (entry->dev == dev) { + p = entry; + kref_get(&p->users); + break; } + } mutex_unlock(&pinctrl_list_mutex); - return NULL; + return p; } static void pinctrl_free(struct pinctrl *p, bool inlist); @@ -1140,7 +1143,6 @@ struct pinctrl *pinctrl_get(struct device *dev) p = find_pinctrl(dev); if (p) { dev_dbg(dev, "obtain a copy of previously claimed pinctrl\n"); - kref_get(&p->users); return p; } -- cgit From 59df44bfb0ca4c3ee1f1c3c5d0ee8e314844799e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 25 Sep 2023 20:04:17 +0800 Subject: iommu/vt-d: Avoid memory allocation in iommu_suspend() The iommu_suspend() syscore suspend callback is invoked with IRQ disabled. Allocating memory with the GFP_KERNEL flag may re-enable IRQs during the suspend callback, which can cause intermittent suspend/hibernation problems with the following kernel traces: Calling iommu_suspend+0x0/0x1d0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 15 at kernel/time/timekeeping.c:868 ktime_get+0x9b/0xb0 ... CPU: 0 PID: 15 Comm: rcu_preempt Tainted: G U E 6.3-intel #r1 RIP: 0010:ktime_get+0x9b/0xb0 ... Call Trace: tick_sched_timer+0x22/0x90 ? __pfx_tick_sched_timer+0x10/0x10 __hrtimer_run_queues+0x111/0x2b0 hrtimer_interrupt+0xfa/0x230 __sysvec_apic_timer_interrupt+0x63/0x140 sysvec_apic_timer_interrupt+0x7b/0xa0 asm_sysvec_apic_timer_interrupt+0x1f/0x30 ... ------------[ cut here ]------------ Interrupts enabled after iommu_suspend+0x0/0x1d0 WARNING: CPU: 0 PID: 27420 at drivers/base/syscore.c:68 syscore_suspend+0x147/0x270 CPU: 0 PID: 27420 Comm: rtcwake Tainted: G U W E 6.3-intel #r1 RIP: 0010:syscore_suspend+0x147/0x270 ... Call Trace: hibernation_snapshot+0x25b/0x670 hibernate+0xcd/0x390 state_store+0xcf/0xe0 kobj_attr_store+0x13/0x30 sysfs_kf_write+0x3f/0x50 kernfs_fop_write_iter+0x128/0x200 vfs_write+0x1fd/0x3c0 ksys_write+0x6f/0xf0 __x64_sys_write+0x1d/0x30 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Given that only 4 words memory is needed, avoid the memory allocation in iommu_suspend(). CC: stable@kernel.org Fixes: 33e07157105e ("iommu/vt-d: Avoid GFP_ATOMIC where it is not needed") Signed-off-by: Zhang Rui Tested-by: Ooi, Chin Hao Link: https://lore.kernel.org/r/20230921093956.234692-1-rui.zhang@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230925120417.55977-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 16 ---------------- drivers/iommu/intel/iommu.h | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5db283c17e0d..3685ba90ec88 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2998,13 +2998,6 @@ static int iommu_suspend(void) struct intel_iommu *iommu = NULL; unsigned long flag; - for_each_active_iommu(iommu, drhd) { - iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32), - GFP_KERNEL); - if (!iommu->iommu_state) - goto nomem; - } - iommu_flush_all(); for_each_active_iommu(iommu, drhd) { @@ -3024,12 +3017,6 @@ static int iommu_suspend(void) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } return 0; - -nomem: - for_each_active_iommu(iommu, drhd) - kfree(iommu->iommu_state); - - return -ENOMEM; } static void iommu_resume(void) @@ -3061,9 +3048,6 @@ static void iommu_resume(void) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } - - for_each_active_iommu(iommu, drhd) - kfree(iommu->iommu_state); } static struct syscore_ops iommu_syscore_ops = { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c18fb699c87a..7dac94f62b4e 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -681,7 +681,7 @@ struct intel_iommu { struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; struct q_inval *qi; /* Queued invalidation info */ - u32 *iommu_state; /* Store iommu states between suspend and resume.*/ + u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ -- cgit From 381c043233e66b1c160ef235675e65cf6c580e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Sep 2023 08:54:45 -0700 Subject: iomap: add a workaround for racy i_size updates on block devices A szybot reproducer that does write I/O while truncating the size of a block device can end up in clean_bdev_aliases, which tries to clean the bdev aliases that it uses. This is because iomap_to_bh automatically sets the BH_New flag when outside of i_size. For block devices updates to i_size are racy and we can hit this case in a tiny race window, leading to the eventual clean_bdev_aliases call. Fix this by erroring out of > i_size I/O on block devices. Reported-by: syzbot+1fa947e7f09e136925b8@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Tested-by: syzbot+1fa947e7f09e136925b8@syzkaller.appspotmail.com Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/buffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index a6785cd07081..12e9a71c693d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2058,8 +2058,17 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, fallthrough; case IOMAP_MAPPED: if ((iomap->flags & IOMAP_F_NEW) || - offset >= i_size_read(inode)) + offset >= i_size_read(inode)) { + /* + * This can happen if truncating the block device races + * with the check in the caller as i_size updates on + * block devices aren't synchronized by i_rwsem for + * block devices. + */ + if (S_ISBLK(inode->i_mode)) + return -EIO; set_buffer_new(bh); + } bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> inode->i_blkbits; set_buffer_mapped(bh); -- cgit From 9dda1178479aa0a73fe0eaabfe2d9a1c603cfeed Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 19 Sep 2023 18:41:01 +0100 Subject: firmware: arm_ffa: Don't set the memory region attributes for MEM_LEND As per the FF-A specification: section "Usage of other memory region attributes", in a transaction to donate memory or lend memory to a single borrower, if the receiver is a PE or Proxy endpoint, the owner must not specify the attributes and the relayer will return INVALID_PARAMETERS if the attributes are set. Let us not set the memory region attributes for MEM_LEND. Fixes: 82a8daaecfd9 ("firmware: arm_ffa: Add support for MEM_LEND") Reported-by: Joao Alves Reported-by: Olivier Deprez Link: https://lore.kernel.org/r/20230919-ffa_v1-1_notif-v2-13-6f3a3ca3923c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 2109cd178ff7..121f4fc903cd 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -397,6 +397,19 @@ static u32 ffa_get_num_pages_sg(struct scatterlist *sg) return num_pages; } +static u8 ffa_memory_attributes_get(u32 func_id) +{ + /* + * For the memory lend or donate operation, if the receiver is a PE or + * a proxy endpoint, the owner/sender must not specify the attributes + */ + if (func_id == FFA_FN_NATIVE(MEM_LEND) || + func_id == FFA_MEM_LEND) + return 0; + + return FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | FFA_MEM_INNER_SHAREABLE; +} + static int ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, struct ffa_mem_ops_args *args) @@ -413,8 +426,7 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, mem_region->tag = args->tag; mem_region->flags = args->flags; mem_region->sender_id = drv_info->vm_id; - mem_region->attributes = FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | - FFA_MEM_INNER_SHAREABLE; + mem_region->attributes = ffa_memory_attributes_get(func_id); ep_mem_access = &mem_region->ep_mem_access[0]; for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { -- cgit From 33908660e814203e996f6e775d033c5c32fcf9a7 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Sat, 26 Aug 2023 15:16:53 +0800 Subject: ACPI: NFIT: Fix incorrect calculation of idt size acpi_nfit_interleave's field 'line_offset' is switched to flexible array [1], but sizeof_idt() still calculates the size in the form of 1-element array. Therefore, fix incorrect calculation in sizeof_idt(). [1] https://lore.kernel.org/lkml/2652195.BddDVKsqQX@kreacher/ Fixes: 2a5ab99847bd ("ACPICA: struct acpi_nfit_interleave: Replace 1-element array with flexible array") Cc: stable@vger.kernel.org # v6.4+ Signed-off-by: Yu Liao Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20230826071654.564372-1-liaoyu15@huawei.com Signed-off-by: Dave Jiang --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f0e6738ae3c9..f96bf32cd368 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -855,7 +855,7 @@ static size_t sizeof_idt(struct acpi_nfit_interleave *idt) { if (idt->header.length < sizeof(*idt)) return 0; - return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1); + return sizeof(*idt) + sizeof(u32) * idt->line_count; } static bool add_idt(struct acpi_nfit_desc *acpi_desc, -- cgit From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 4 May 2023 14:00:42 +0200 Subject: KVM: x86/pmu: Truncate counter value to allowed width on write Performance counters are defined to have width less than 64 bits. The vPMU code maintains the counters in u64 variables but assumes the value to fit within the defined width. However, for Intel non-full-width counters (MSR_IA32_PERFCTRx) the value receieved from the guest is truncated to 32 bits and then sign-extended to full 64 bits. If a negative value is set, it's sign-extended to 64 bits, but then in kvm_pmu_incr_counter() it's incremented, truncated, and compared to the previous value for overflow detection. That previous value is not truncated, so it always evaluates bigger than the truncated new one, and a PMI is injected. If the PMI handler writes a negative counter value itself, the vCPU never quits the PMI loop. Turns out that Linux PMI handler actually does write the counter with the value just read with RDPMC, so when no full-width support is exposed via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to a negative value, it locks up. This has been observed in the field, for example, when the guest configures atop to use perfevents and runs two instances of it simultaneously. To address the problem, maintain the invariant that the counter value always fits in the defined bit width, by truncating the received value in the respective set_msr methods. For better readability, factor the out into a helper function, pmc_write_counter(), shared by vmx and svm parts. Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de Tested-by: Like Xu [sean: tweak changelog, s/set/write in the helper] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.h | 6 ++++++ arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7d9ba301c090..1d64113de488 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) return counter & pmc_bitmask(pmc); } +static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val) +{ + pmc->counter += val - pmc_read_counter(pmc); + pmc->counter &= pmc_bitmask(pmc); +} + static inline void pmc_release_perf_event(struct kvm_pmc *pmc) { if (pmc->perf_event) { diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cef5a3d0abd0..373ff6a6687b 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f2efa0bf7ae8..820d3e1f6b4f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { -- cgit From a16eb25b09c02a54c1c1b449d4b6cfa2cf3f013a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:47 +0000 Subject: KVM: x86: Mask LVTPC when handling a PMI Per the SDM, "When the local APIC handles a performance-monitoring counters interrupt, it automatically sets the mask flag in the LVT performance counter register." Add this behavior to KVM's local APIC emulation. Failure to mask the LVTPC entry results in spurious PMIs, e.g. when running Linux as a guest, PMI handlers that do a "late_ack" spew a large number of "dazed and confused" spurious NMI warnings. Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests") Cc: stable@vger.kernel.org Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-3-mizhang@google.com [sean: massage changelog, correct Fixes] Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index dcd60b39e794..3e977dbbf993 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_lapic_get_reg(apic, lvt_type); int vector, mode, trig_mode; + int r; if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode, - NULL); + + r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); + if (r && lvt_type == APIC_LVTPC) + kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); + return r; } return 0; } -- cgit From 73554b29bd70546c1a9efc9c160641ef1b849358 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:46 +0000 Subject: KVM: x86/pmu: Synthesize at most one PMI per VM-exit When the irq_work callback, kvm_pmi_trigger_fn(), is invoked during a VM-exit that also invokes __kvm_perf_overflow() as a result of instruction emulation, kvm_pmu_deliver_pmi() will be called twice before the next VM-entry. Calling kvm_pmu_deliver_pmi() twice is unlikely to be problematic now that KVM sets the LVTPC mask bit when delivering a PMI. But using IRQ work to trigger the PMI is still broken, albeit very theoretically. E.g. if the self-IPI to trigger IRQ work is be delayed long enough for the vCPU to be migrated to a different pCPU, then it's possible for kvm_pmi_trigger_fn() to race with the kvm_pmu_deliver_pmi() from KVM_REQ_PMI and still generate two PMIs. KVM could set the mask bit using an atomic operation, but that'd just be piling on unnecessary code to workaround what is effectively a hack. The *only* reason KVM uses IRQ work is to ensure the PMI is treated as a wake event, e.g. if the vCPU just executed HLT. Remove the irq_work callback for synthesizing a PMI, and all of the logic for invoking it. Instead, to prevent a vcpu from leaving C0 with a PMI pending, add a check for KVM_REQ_PMI to kvm_vcpu_has_events(). Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Tested-by: Dapeng Mi Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-2-mizhang@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/pmu.c | 27 +-------------------------- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17715cb8731d..70d139406bc8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -528,7 +528,6 @@ struct kvm_pmu { u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; - struct irq_work irq_work; /* * Overlay the bitmap with a 64-bit atomic so that all bits can be diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index edb89b51b383..9ae07db6f0f6 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static void kvm_pmi_trigger_fn(struct irq_work *irq_work) -{ - struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - kvm_pmu_deliver_pmi(vcpu); -} - static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - if (!pmc->intr || skip_pmi) - return; - - /* - * Inject PMI. If vcpu was in a guest mode during NMI PMI - * can be ejected on a guest mode re-entry. Otherwise we can't - * be sure that vcpu wasn't executing hlt instruction at the - * time of vmexit and is not going to re-enter guest mode until - * woken up. So we should wake it, but this is impossible from - * NMI context. Do it from irq work instead. - */ - if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu)) - irq_work_queue(&pmc_to_pmu(pmc)->irq_work); - else + if (pmc->intr && !skip_pmi) kvm_make_request(KVM_REQ_PMI, pmc->vcpu); } @@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) void kvm_pmu_reset(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - - irq_work_sync(&pmu->irq_work); static_call(kvm_x86_pmu_reset)(vcpu); } @@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); static_call(kvm_x86_pmu_init)(vcpu); - init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); pmu->event_count = 0; pmu->need_cleanup = false; kvm_pmu_refresh(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..42a4e8f5e89a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12843,6 +12843,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; #endif + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) -- cgit From a578a25339aca38e23bb5af6e3fc6c2c51f0215c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Sep 2023 17:52:32 -0700 Subject: block: fix kernel-doc for disk_force_media_change() Drop one function parameter's kernel-doc comment since the parameter was removed. This prevents a kernel-doc warning: block/disk-events.c:300: warning: Excess function parameter 'events' description in 'disk_force_media_change' Fixes: ab6860f62bfe ("block: simplify the disk_force_media_change interface") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: lore.kernel.org/r/202309060957.vfl0mUur-lkp@intel.com Cc: Christoph Hellwig Cc: Jens Axboe Cc: linux-block@vger.kernel.org Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230926005232.23666-1-rdunlap@infradead.org Signed-off-by: Jens Axboe --- block/disk-events.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/disk-events.c b/block/disk-events.c index 422db8292d09..13c3372c465a 100644 --- a/block/disk-events.c +++ b/block/disk-events.c @@ -290,7 +290,6 @@ EXPORT_SYMBOL(disk_check_media_change); /** * disk_force_media_change - force a media change event * @disk: the disk which will raise the event - * @events: the events to raise * * Should be called when the media changes for @disk. Generates a uevent * and attempts to free all dentries and inodes and invalidates all block -- cgit From 334bf33eec5701a1e4e967bcb7cc8611a998334b Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 25 Sep 2023 17:18:56 +0200 Subject: wifi: cfg80211: avoid leaking stack data into trace If the structure is not initialized then boolean types might be copied into the tracing data without being initialised. This causes data from the stack to leak into the trace and also triggers a UBSAN failure which can easily be avoided here. Signed-off-by: Benjamin Berg Link: https://lore.kernel.org/r/20230925171855.a9271ef53b05.I8180bae663984c91a3e036b87f36a640ba409817@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7a88361b3414..931a03f4549c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8501,7 +8501,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct mesh_config cfg; + struct mesh_config cfg = {}; u32 mask; int err; -- cgit From aaba3cd33fc9593a858beeee419c0e6671ee9551 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 25 Sep 2023 17:30:29 +0200 Subject: wifi: mac80211: Create resources for disabled links When associating to an MLD AP, links may be disabled. Create all resources associated with a disabled link so that we can later enable it without having to create these resources on the fly. Fixes: 6d543b34dbcf ("wifi: mac80211: Support disabled links during association") Signed-off-by: Benjamin Berg Link: https://lore.kernel.org/r/20230925173028.f9afdb26f6c7.I4e6e199aaefc1bf017362d64f3869645fa6830b5@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0e61eb5a29d1..0c9198997482 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5130,9 +5130,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, continue; valid_links |= BIT(link_id); - if (assoc_data->link[link_id].disabled) { + if (assoc_data->link[link_id].disabled) dormant_links |= BIT(link_id); - } else if (link_id != assoc_data->assoc_link_id) { + + if (link_id != assoc_data->assoc_link_id) { err = ieee80211_sta_allocate_link(sta, link_id); if (err) goto out_err; @@ -5147,7 +5148,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link; struct link_sta_info *link_sta; - if (!cbss || assoc_data->link[link_id].disabled) + if (!cbss) continue; link = sdata_dereference(sdata->link[link_id], sdata); -- cgit From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 17 Sep 2023 15:07:40 +0200 Subject: rbd: move rbd_dev_refresh() definition Move rbd_dev_refresh() definition further down to avoid having to move struct parent_image_info definition in the next commit. This spares some forward declarations too. Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 68 ++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3de11f077144..5da001f1fe94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -7043,6 +7008,39 @@ err_out_format: return ret; } +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + u64 mapping_size; + int ret; + + down_write(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + + ret = rbd_dev_header_info(rbd_dev); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out; + } + + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; + +out: + up_write(&rbd_dev->header_rwsem); + if (!ret && mapping_size != rbd_dev->mapping.size) + rbd_dev_update_size(rbd_dev); + + return ret; +} + static ssize_t do_rbd_add(const char *buf, size_t count) { struct rbd_device *rbd_dev = NULL; -- cgit From 510a7330c82a7754d5df0117a8589e8a539067c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 19 Sep 2023 20:41:47 +0200 Subject: rbd: decouple header read-in from updating rbd_dev->header Make rbd_dev_header_info() populate a passed struct rbd_image_header instead of rbd_dev->header and introduce rbd_dev_update_header() for updating mutable fields in rbd_dev->header upon refresh. The initial read-in of both mutable and immutable fields in rbd_dev_image_probe() passes in rbd_dev->header so no update step is required there. rbd_init_layout() is now called directly from rbd_dev_image_probe() instead of individually in format 1 and format 2 implementations. Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 206 +++++++++++++++++++++++++++++----------------------- 1 file changed, 114 insertions(+), 92 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5da001f1fe94..6ed5520ef303 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -993,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1068,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -4857,7 +4862,9 @@ out_req: * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4905,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -5468,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5496,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5556,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5837,14 +5832,14 @@ out_err: return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5856,27 +5851,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6068,7 +6069,8 @@ out_err: return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6129,9 +6131,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6180,38 +6180,42 @@ out: return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6699,60 +6703,49 @@ out: */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); + if (ret) + return ret; } - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); return 0; - -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; } /* @@ -6947,13 +6940,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + rbd_init_layout(rbd_dev); + /* * If this image is the one being mapped, we have pool name and * id, image name and id, and snap name - need to fill snap id. @@ -7008,15 +7003,39 @@ err_out_format: return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + rbd_dev->header.image_size = header->image_size; + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { + struct rbd_image_header header = { 0 }; u64 mapping_size; int ret; down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &header, false); if (ret) goto out; @@ -7030,6 +7049,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } + rbd_dev_update_header(rbd_dev, &header); + rbd_assert(!rbd_is_snap(rbd_dev)); rbd_dev->mapping.size = rbd_dev->header.image_size; @@ -7038,6 +7059,7 @@ out: if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + rbd_image_header_cleanup(&header); return ret; } -- cgit From c10311776f0a8ddea2276df96e255625b07045a8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Sep 2023 18:38:26 +0200 Subject: rbd: decouple parent info read-in from updating rbd_dev Unlike header read-in, parent info read-in is already decoupled in get_parent_info(), but it's buried in rbd_dev_v2_parent_info() along with the processing logic. Separate the initial read-in and update read-in logic into rbd_dev_setup_parent() and rbd_dev_update_parent() respectively and have rbd_dev_v2_parent_info() just populate struct parent_image_info (i.e. what get_parent_info() did). Some existing QoI issues, like flatten of a standalone clone being disregarded on refresh, remain. Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 142 +++++++++++++++++++++++++++++----------------------- 1 file changed, 80 insertions(+), 62 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6ed5520ef303..d62a0298c890 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5594,6 +5594,14 @@ struct parent_image_info { u64 overlap; }; +static void rbd_parent_info_cleanup(struct parent_image_info *pii) +{ + kfree(pii->pool_ns); + kfree(pii->image_id); + + memset(pii, 0, sizeof(*pii)); +} + /* * The caller is responsible for @pii. */ @@ -5663,6 +5671,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev, if (pii->has_overlap) ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: @@ -5701,14 +5712,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev, pii->has_overlap = true; ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, + pii->has_overlap, pii->overlap); return 0; e_inval: return -EINVAL; } -static int get_parent_info(struct rbd_device *rbd_dev, - struct parent_image_info *pii) +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev, + struct parent_image_info *pii) { struct page *req_page, *reply_page; void *p; @@ -5736,7 +5750,7 @@ static int get_parent_info(struct rbd_device *rbd_dev, return ret; } -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +static int rbd_dev_setup_parent(struct rbd_device *rbd_dev) { struct rbd_spec *parent_spec; struct parent_image_info pii = { 0 }; @@ -5746,37 +5760,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (!parent_spec) return -ENOMEM; - ret = get_parent_info(rbd_dev, &pii); + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); if (ret) goto out_err; - dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", - __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id, - pii.has_overlap, pii.overlap); - - if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) { - /* - * Either the parent never existed, or we have - * record of it but the image got flattened so it no - * longer has a parent. When the parent of a - * layered image disappears we immediately set the - * overlap to 0. The effect of this is that all new - * requests will be treated as if the image had no - * parent. - * - * If !pii.has_overlap, the parent image spec is not - * applicable. It's there to avoid duplication in each - * snapshot record. - */ - if (rbd_dev->parent_overlap) { - rbd_dev->parent_overlap = 0; - rbd_dev_parent_put(rbd_dev); - pr_info("%s: clone image has been flattened\n", - rbd_dev->disk->disk_name); - } - + if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) goto out; /* No parent? No problem. */ - } /* The ceph file layout needs to fit pool id in 32 bits */ @@ -5788,46 +5777,34 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } /* - * The parent won't change (except when the clone is - * flattened, already handled that). So we only need to - * record the parent spec we have not already done so. + * The parent won't change except when the clone is flattened, + * so we only need to record the parent image spec once. */ - if (!rbd_dev->parent_spec) { - parent_spec->pool_id = pii.pool_id; - if (pii.pool_ns && *pii.pool_ns) { - parent_spec->pool_ns = pii.pool_ns; - pii.pool_ns = NULL; - } - parent_spec->image_id = pii.image_id; - pii.image_id = NULL; - parent_spec->snap_id = pii.snap_id; - - rbd_dev->parent_spec = parent_spec; - parent_spec = NULL; /* rbd_dev now owns this */ + parent_spec->pool_id = pii.pool_id; + if (pii.pool_ns && *pii.pool_ns) { + parent_spec->pool_ns = pii.pool_ns; + pii.pool_ns = NULL; } + parent_spec->image_id = pii.image_id; + pii.image_id = NULL; + parent_spec->snap_id = pii.snap_id; + + rbd_assert(!rbd_dev->parent_spec); + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ /* - * We always update the parent overlap. If it's zero we issue - * a warning, as we will proceed as if there was no parent. + * Record the parent overlap. If it's zero, issue a warning as + * we will proceed as if there is no parent. */ - if (!pii.overlap) { - if (parent_spec) { - /* refresh, careful to warn just once */ - if (rbd_dev->parent_overlap) - rbd_warn(rbd_dev, - "clone now standalone (overlap became 0)"); - } else { - /* initial probe */ - rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); - } - } + if (!pii.overlap) + rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); rbd_dev->parent_overlap = pii.overlap; out: ret = 0; out_err: - kfree(pii.pool_ns); - kfree(pii.image_id); + rbd_parent_info_cleanup(&pii); rbd_spec_put(parent_spec); return ret; } @@ -6977,7 +6954,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) } if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { - ret = rbd_dev_v2_parent_info(rbd_dev); + ret = rbd_dev_setup_parent(rbd_dev); if (ret) goto err_out_probe; } @@ -7026,9 +7003,47 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev, } } +static void rbd_dev_update_parent(struct rbd_device *rbd_dev, + struct parent_image_info *pii) +{ + if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { + /* + * Either the parent never existed, or we have + * record of it but the image got flattened so it no + * longer has a parent. When the parent of a + * layered image disappears we immediately set the + * overlap to 0. The effect of this is that all new + * requests will be treated as if the image had no + * parent. + * + * If !pii.has_overlap, the parent image spec is not + * applicable. It's there to avoid duplication in each + * snapshot record. + */ + if (rbd_dev->parent_overlap) { + rbd_dev->parent_overlap = 0; + rbd_dev_parent_put(rbd_dev); + pr_info("%s: clone has been flattened\n", + rbd_dev->disk->disk_name); + } + } else { + rbd_assert(rbd_dev->parent_spec); + + /* + * Update the parent overlap. If it became zero, issue + * a warning as we will proceed as if there is no parent. + */ + if (!pii->overlap && rbd_dev->parent_overlap) + rbd_warn(rbd_dev, + "clone has become standalone (overlap 0)"); + rbd_dev->parent_overlap = pii->overlap; + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { struct rbd_image_header header = { 0 }; + struct parent_image_info pii = { 0 }; u64 mapping_size; int ret; @@ -7044,12 +7059,14 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) * mapped image getting flattened. */ if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); + ret = rbd_dev_v2_parent_info(rbd_dev, &pii); if (ret) goto out; } rbd_dev_update_header(rbd_dev, &header); + if (rbd_dev->parent) + rbd_dev_update_parent(rbd_dev, &pii); rbd_assert(!rbd_is_snap(rbd_dev)); rbd_dev->mapping.size = rbd_dev->header.image_size; @@ -7059,6 +7076,7 @@ out: if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + rbd_parent_info_cleanup(&pii); rbd_image_header_cleanup(&header); return ret; } -- cgit From 0b207d02bd9ab8dcc31b262ca9f60dbc1822500d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Sep 2023 19:01:03 +0200 Subject: rbd: take header_rwsem in rbd_dev_refresh() only when updating rbd_dev_refresh() has been holding header_rwsem across header and parent info read-in unnecessarily for ages. With commit 870611e4877e ("rbd: get snapshot context after exclusive lock is ensured to be held"), the potential for deadlocks became much more real owning to a) header_rwsem now nesting inside lock_rwsem and b) rw_semaphores not allowing new readers after a writer is registered. For example, assuming that I/O request 1, I/O request 2 and header read-in request all target the same OSD: 1. I/O request 1 comes in and gets submitted 2. watch error occurs 3. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and releases lock_rwsem 4. after reestablishing the watch, rbd_reregister_watch() calls rbd_dev_refresh() which takes header_rwsem for write and submits a header read-in request 5. I/O request 2 comes in: after taking lock_rwsem for read in __rbd_img_handle_request(), it blocks trying to take header_rwsem for read in rbd_img_object_requests() 6. another watch error occurs 7. rbd_watch_errcb() blocks trying to take lock_rwsem for write 8. I/O request 1 completion is received by the messenger but can't be processed because lock_rwsem won't be granted anymore 9. header read-in request completion can't be received, let alone processed, because the messenger is stranded Change rbd_dev_refresh() to take header_rwsem only for actually updating rbd_dev->header. Header and parent info read-in don't need any locking. Cc: stable@vger.kernel.org # 0b035401c570: rbd: move rbd_dev_refresh() definition Cc: stable@vger.kernel.org # 510a7330c82a: rbd: decouple header read-in from updating rbd_dev->header Cc: stable@vger.kernel.org # c10311776f0a: rbd: decouple parent info read-in from updating rbd_dev Cc: stable@vger.kernel.org Fixes: 870611e4877e ("rbd: get snapshot context after exclusive lock is ensured to be held") Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d62a0298c890..a999b698b131 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6986,7 +6986,14 @@ static void rbd_dev_update_header(struct rbd_device *rbd_dev, rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ - rbd_dev->header.image_size = header->image_size; + if (rbd_dev->header.image_size != header->image_size) { + rbd_dev->header.image_size = header->image_size; + + if (!rbd_is_snap(rbd_dev)) { + rbd_dev->mapping.size = header->image_size; + rbd_dev_update_size(rbd_dev); + } + } ceph_put_snap_context(rbd_dev->header.snapc); rbd_dev->header.snapc = header->snapc; @@ -7044,11 +7051,9 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) { struct rbd_image_header header = { 0 }; struct parent_image_info pii = { 0 }; - u64 mapping_size; int ret; - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; + dout("%s rbd_dev %p\n", __func__, rbd_dev); ret = rbd_dev_header_info(rbd_dev, &header, false); if (ret) @@ -7064,18 +7069,13 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } + down_write(&rbd_dev->header_rwsem); rbd_dev_update_header(rbd_dev, &header); if (rbd_dev->parent) rbd_dev_update_parent(rbd_dev, &pii); - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); +out: rbd_parent_info_cleanup(&pii); rbd_image_header_cleanup(&header); return ret; -- cgit From 2b21207afd06714986a3d22442ed4860ba4f9ced Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 20 Sep 2023 17:43:12 +0800 Subject: ASoC: fsl-asoc-card: use integer type for fll_id and pll_id As the pll_id and pll_id can be zero (WM8960_SYSCLK_AUTO) with the commit 2bbc2df46e67 ("ASoC: wm8960: Make automatic the default clocking mode") Then the machine driver will skip to call set_sysclk() and set_pll() for codec, when the sysclk rate is different with what wm8960 read at probe, the output sound frequency is wrong. So change the fll_id and pll_id initial value, still keep machine driver's behavior same as before. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1695202992-24864-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl-asoc-card.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 76b5bfc288fd..bab7d34cf585 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -52,8 +52,8 @@ struct codec_priv { unsigned long mclk_freq; unsigned long free_freq; u32 mclk_id; - u32 fll_id; - u32 pll_id; + int fll_id; + int pll_id; }; /** @@ -206,7 +206,7 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, } /* Specific configuration for PLL */ - if (codec_priv->pll_id && codec_priv->fll_id) { + if (codec_priv->pll_id >= 0 && codec_priv->fll_id >= 0) { if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE) pll_out = priv->sample_rate * 384; else @@ -248,7 +248,7 @@ static int fsl_asoc_card_hw_free(struct snd_pcm_substream *substream) priv->streams &= ~BIT(substream->stream); - if (!priv->streams && codec_priv->pll_id && codec_priv->fll_id) { + if (!priv->streams && codec_priv->pll_id >= 0 && codec_priv->fll_id >= 0) { /* Force freq to be free_freq to avoid error message in codec */ ret = snd_soc_dai_set_sysclk(asoc_rtd_to_codec(rtd, 0), codec_priv->mclk_id, @@ -621,6 +621,10 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.dapm_routes = audio_map; priv->card.num_dapm_routes = ARRAY_SIZE(audio_map); priv->card.driver_name = DRIVER_NAME; + + priv->codec_priv.fll_id = -1; + priv->codec_priv.pll_id = -1; + /* Diversify the card configurations */ if (of_device_is_compatible(np, "fsl,imx-audio-cs42888")) { codec_dai_name = "cs42888"; -- cgit From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 20 Sep 2023 13:01:54 +0300 Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys After the change to use dynamic cache structure, new cache entries can be added and the mkey allocation can no longer assume that all mkeys created for the cache have access_flags equal to zero. Example of a flow that exposes the issue: A user registers MR with RO on a HCA that cannot UMR RO and the mkey is created outside of the cache. When the user deregisters the MR, a new cache entry is created to store mkeys with RO. Later, the user registers 2 MRs with RO. The first MR is reused from the new cache entry. When we try to get the second mkey from the cache we see the entry is empty so we go to the MR cache mkey allocation flow which would have allocated a mkey with no access flags, resulting the user getting a MR without RO. Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") Reviewed-by: Edward Srouji Signed-off-by: Michael Guralnik Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e345ef380f1..10da9e81fa62 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -301,7 +301,8 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) { - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); + set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, + ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); -- cgit From 2fad8f06a582cd431d398a0b3f9be21d069603ab Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Wed, 20 Sep 2023 13:01:55 +0300 Subject: RDMA/mlx5: Fix mutex unlocking on error flow for steering anchor creation The mutex was not unlocked on some of the error flows. Moved the unlock location to include all the error flow scenarios. Fixes: e1f4a52ac171 ("RDMA/mlx5: Create an indirect flow table for steering anchor") Reviewed-by: Mark Bloch Signed-off-by: Hamdan Igbaria Link: https://lore.kernel.org/r/1244a69d783da997c0af0b827c622eb00495492e.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 1e419e080b53..520034acf73a 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2470,8 +2470,8 @@ destroy_res: mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: put_flow_table(dev, ft_prio, true); - mutex_unlock(&dev->flow_db->lock); free_obj: + mutex_unlock(&dev->flow_db->lock); kfree(obj); return err; -- cgit From dab994bcc609a172bfdab15a0d4cb7e50e8b5458 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 20 Sep 2023 13:01:56 +0300 Subject: RDMA/mlx5: Fix NULL string error checkpath is complaining about NULL string, change it to 'Unknown'. Fixes: 37aa5c36aa70 ("IB/mlx5: Add UARs write-combining and non-cached mapping") Signed-off-by: Shay Drory Link: https://lore.kernel.org/r/8638e5c14fadbde5fa9961874feae917073af920.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aed5cdea50e6..555629b798b9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2084,7 +2084,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) case MLX5_IB_MMAP_DEVICE_MEM: return "Device Memory"; default: - return NULL; + return "Unknown"; } } -- cgit From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 12 Sep 2023 13:07:45 +0300 Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup Fix the deadlock by refactoring the MR cache cleanup flow to flush the workqueue without holding the rb_lock. This adds a race between cache cleanup and creation of new entries which we solve by denied creation of new entries after cache cleanup started. Lockdep: WARNING: possible circular locking dependency detected [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted [ 2785.339778 ] ------------------------------------------------------ [ 2785.340848 ] devlink/53872 is trying to acquire lock: [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900 [ 2785.343403 ] [ 2785.343403 ] but task is already holding lock: [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] [ 2785.346273 ] [ 2785.346273 ] which lock already depends on the new lock. [ 2785.346273 ] [ 2785.347720 ] [ 2785.347720 ] the existing dependency chain (in reverse order) is: [ 2785.349003 ] [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}: [ 2785.350160 ] __mutex_lock+0x14c/0x15c0 [ 2785.350962 ] delayed_cache_work_func+0x2d1/0x610 [mlx5_ib] [ 2785.352044 ] process_one_work+0x7c2/0x1310 [ 2785.352879 ] worker_thread+0x59d/0xec0 [ 2785.353636 ] kthread+0x28f/0x330 [ 2785.354370 ] ret_from_fork+0x1f/0x30 [ 2785.355135 ] [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}: [ 2785.356515 ] __lock_acquire+0x2d8a/0x5fe0 [ 2785.357349 ] lock_acquire+0x1c1/0x540 [ 2785.358121 ] __flush_work+0xe8/0x900 [ 2785.358852 ] __cancel_work_timer+0x2c7/0x3f0 [ 2785.359711 ] mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib] [ 2785.360781 ] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib] [ 2785.361969 ] __mlx5_ib_remove+0x68/0x120 [mlx5_ib] [ 2785.362960 ] mlx5r_remove+0x63/0x80 [mlx5_ib] [ 2785.363870 ] auxiliary_bus_remove+0x52/0x70 [ 2785.364715 ] device_release_driver_internal+0x3c1/0x600 [ 2785.365695 ] bus_remove_device+0x2a5/0x560 [ 2785.366525 ] device_del+0x492/0xb80 [ 2785.367276 ] mlx5_detach_device+0x1a9/0x360 [mlx5_core] [ 2785.368615 ] mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core] [ 2785.369934 ] mlx5_devlink_reload_down+0x292/0x580 [mlx5_core] [ 2785.371292 ] devlink_reload+0x439/0x590 [ 2785.372075 ] devlink_nl_cmd_reload+0xaef/0xff0 [ 2785.372973 ] genl_family_rcv_msg_doit.isra.0+0x1bd/0x290 [ 2785.374011 ] genl_rcv_msg+0x3ca/0x6c0 [ 2785.374798 ] netlink_rcv_skb+0x12c/0x360 [ 2785.375612 ] genl_rcv+0x24/0x40 [ 2785.376295 ] netlink_unicast+0x438/0x710 [ 2785.377121 ] netlink_sendmsg+0x7a1/0xca0 [ 2785.377926 ] sock_sendmsg+0xc5/0x190 [ 2785.378668 ] __sys_sendto+0x1bc/0x290 [ 2785.379440 ] __x64_sys_sendto+0xdc/0x1b0 [ 2785.380255 ] do_syscall_64+0x3d/0x90 [ 2785.381031 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 2785.381967 ] [ 2785.381967 ] other info that might help us debug this: [ 2785.381967 ] [ 2785.383448 ] Possible unsafe locking scenario: [ 2785.383448 ] [ 2785.384544 ] CPU0 CPU1 [ 2785.385383 ] ---- ---- [ 2785.386193 ] lock(&dev->cache.rb_lock); [ 2785.386940 ] lock((work_completion)(&(&ent->dwork)->work)); [ 2785.388327 ] lock(&dev->cache.rb_lock); [ 2785.389425 ] lock((work_completion)(&(&ent->dwork)->work)); [ 2785.390414 ] [ 2785.390414 ] *** DEADLOCK *** [ 2785.390414 ] [ 2785.391579 ] 6 locks held by devlink/53872: [ 2785.392341 ] #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 [ 2785.393630 ] #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0 [ 2785.395324 ] #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core] [ 2785.397322 ] #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core] [ 2785.399231 ] #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600 [ 2785.400864 ] #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree") Signed-off-by: Shay Drory Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 16713baf0d06..d1ff98aad162 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -798,6 +798,7 @@ struct mlx5_mkey_cache { struct dentry *fs_root; unsigned long last_add; struct delayed_work remove_ent_dwork; + u8 disable: 1; }; struct mlx5_ib_port_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 10da9e81fa62..433f96459246 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1025,19 +1025,27 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) if (!dev->cache.wq) return; - cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); mutex_lock(&dev->cache.rb_lock); + dev->cache.disable = true; for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); - cancel_delayed_work_sync(&ent->dwork); } + mutex_unlock(&dev->cache.rb_lock); + + /* + * After all entries are disabled and will not reschedule on WQ, + * flush it and all async commands. + */ + flush_workqueue(dev->cache.wq); mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); + /* At this point all entries are disabled and have no concurrent work. */ + mutex_lock(&dev->cache.rb_lock); node = rb_first(root); while (node) { ent = rb_entry(node, struct mlx5_cache_ent, node); @@ -1822,6 +1830,10 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, } mutex_lock(&cache->rb_lock); + if (cache->disable) { + mutex_unlock(&cache->rb_lock); + return 0; + } ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); if (ent) { if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -- cgit From 534c673b4e0277cd5925ebe89bd2689373352921 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Fri, 22 Sep 2023 15:34:06 -0700 Subject: MAINTAINERS: aspeed: Update git tree URL The description for joel/aspeed.git on git.kernel.org currently says: Old Aspeed tree. Please see joel/bmc.git Let's update MAINTAINERS accordingly. Signed-off-by: Zev Weiss Acked-by: Joel Stanley Link: https://lore.kernel.org/r/20230922223405.24717-2-zev@bewilderbeest.net Signed-off-by: Joel Stanley --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..8bf29812f1fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1968,7 +1968,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) S: Supported Q: https://patchwork.ozlabs.org/project/linux-aspeed/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joel/aspeed.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc.git F: Documentation/devicetree/bindings/arm/aspeed/ F: arch/arm/boot/dts/aspeed/ F: arch/arm/mach-aspeed/ -- cgit From 9c888dbf2164e320e63f3ebfd85a99486bee9c3a Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 25 Sep 2023 12:36:47 +0930 Subject: MAINTAINERS: aspeed: Update Andrew's email address I've changed employers, have company email that deals with patch-based workflows without too much of a headache, and am trying to steer some content out of my personal mail. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20230925030647.40283-1-andrew@codeconstruct.com.au Signed-off-by: Joel Stanley --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8bf29812f1fb..d52634553ddc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1963,7 +1963,7 @@ F: drivers/irqchip/irq-aspeed-i2c-ic.c ARM/ASPEED MACHINE SUPPORT M: Joel Stanley -R: Andrew Jeffery +R: Andrew Jeffery L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) S: Supported @@ -3058,7 +3058,7 @@ F: Documentation/devicetree/bindings/peci/peci-aspeed.yaml F: drivers/peci/controller/peci-aspeed.c ASPEED PINCTRL DRIVERS -M: Andrew Jeffery +M: Andrew Jeffery L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) L: openbmc@lists.ozlabs.org (moderated for non-subscribers) L: linux-gpio@vger.kernel.org @@ -3075,7 +3075,7 @@ F: drivers/irqchip/irq-aspeed-scu-ic.c F: include/dt-bindings/interrupt-controller/aspeed-scu-ic.h ASPEED SD/MMC DRIVER -M: Andrew Jeffery +M: Andrew Jeffery L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) L: openbmc@lists.ozlabs.org (moderated for non-subscribers) L: linux-mmc@vger.kernel.org -- cgit From 1bbac8d6af085408885675c1e29b2581250be124 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 25 Aug 2023 15:55:02 +0200 Subject: dt-bindings: mmc: sdhci-msm: correct minimum number of clocks In the TXT binding before conversion, the "xo" clock was listed as optional. Conversion kept it optional in "clock-names", but not in "clocks". This fixes dbts_check warnings like: qcom-sdx65-mtp.dtb: mmc@8804000: clocks: [[13, 59], [13, 58]] is too short Cc: Fixes: a45537723f4b ("dt-bindings: mmc: sdhci-msm: Convert bindings to yaml") Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230825135503.282135-1-krzysztof.kozlowski@linaro.org Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-msm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml index 80141eb7fc6b..10f34aa8ba8a 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml @@ -69,7 +69,7 @@ properties: maxItems: 4 clocks: - minItems: 3 + minItems: 2 items: - description: Main peripheral bus clock, PCLK/HCLK - AHB Bus clock - description: SDC MMC clock, MCLK -- cgit From 32a9cdb8869dc111a0c96cf8e1762be9684af15b Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 30 Aug 2023 17:39:22 +0800 Subject: mmc: core: sdio: hold retuning if sdio in 1-bit mode tuning only support in 4-bit mode or 8 bit mode, so in 1-bit mode, need to hold retuning. Find this issue when use manual tuning method on imx93. When system resume back, SDIO WIFI try to switch back to 4 bit mode, first will trigger retuning, and all tuning command failed. Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230830093922.3095850-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index f64b9ac76a5c..5914516df2f7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1089,8 +1089,14 @@ static int mmc_sdio_resume(struct mmc_host *host) } err = mmc_sdio_reinit_card(host); } else if (mmc_card_wake_sdio_irq(host)) { - /* We may have switched to 1-bit mode during suspend */ + /* + * We may have switched to 1-bit mode during suspend, + * need to hold retuning, because tuning only supprt + * 4-bit mode or 8 bit mode. + */ + mmc_retune_hold_now(host); err = sdio_enable_4bit_bus(host->card); + mmc_retune_release(host); } if (err) -- cgit From 1202d617e3d04c8d27a14ef30784a698c48170b3 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Thu, 31 Aug 2023 16:00:56 +0000 Subject: mmc: sdhci-pci-gli: fix LPM negotiation so x86/S0ix SoCs can suspend To improve the r/w performance of GL9763E, the current driver inhibits LPM negotiation while the device is active. This prevents a large number of SoCs from suspending, notably x86 systems which commonly use S0ix as the suspend mechanism - for example, Intel Alder Lake and Raptor Lake processors. Failure description: 1. Userspace initiates s2idle suspend (e.g. via writing to /sys/power/state) 2. This switches the runtime_pm device state to active, which disables LPM negotiation, then calls the "regular" suspend callback 3. With LPM negotiation disabled, the bus cannot enter low-power state 4. On a large number of SoCs, if the bus not in a low-power state, S0ix cannot be entered, which in turn prevents the SoC from entering suspend. Fix by re-enabling LPM negotiation in the device's suspend callback. Suggested-by: Stanislaw Kardach Fixes: f9e5b33934ce ("mmc: host: Improve I/O read/write performance for GL9763E") Cc: stable@vger.kernel.org Signed-off-by: Sven van Ashbrook Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20230831160055.v3.1.I7ed1ca09797be2dd76ca914c57d88b32d24dac88@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 104 +++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index ae8c307b7aa7..109d4b010f97 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1144,42 +1144,6 @@ static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) return value; } -#ifdef CONFIG_PM_SLEEP -static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - - pci_free_irq_vectors(slot->chip->pdev); - gli_pcie_enable_msi(slot); - - return sdhci_pci_resume_host(chip); -} - -static int sdhci_cqhci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = sdhci_pci_gli_resume(chip); - if (ret) - return ret; - - return cqhci_resume(slot->host->mmc); -} - -static int sdhci_cqhci_gli_suspend(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = cqhci_suspend(slot->host->mmc); - if (ret) - return ret; - - return sdhci_suspend_host(slot->host); -} -#endif - static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc, struct mmc_ios *ios) { @@ -1420,6 +1384,70 @@ static int gl9763e_runtime_resume(struct sdhci_pci_chip *chip) } #endif +#ifdef CONFIG_PM_SLEEP +static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + + pci_free_irq_vectors(slot->chip->pdev); + gli_pcie_enable_msi(slot); + + return sdhci_pci_resume_host(chip); +} + +static int gl9763e_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + ret = sdhci_pci_gli_resume(chip); + if (ret) + return ret; + + ret = cqhci_resume(slot->host->mmc); + if (ret) + return ret; + + /* + * Disable LPM negotiation to bring device back in sync + * with its runtime_pm state. + */ + gl9763e_set_low_power_negotiation(slot, false); + + return 0; +} + +static int gl9763e_suspend(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + /* + * Certain SoCs can suspend only with the bus in low- + * power state, notably x86 SoCs when using S0ix. + * Re-enable LPM negotiation to allow entering L1 state + * and entering system suspend. + */ + gl9763e_set_low_power_negotiation(slot, true); + + ret = cqhci_suspend(slot->host->mmc); + if (ret) + goto err_suspend; + + ret = sdhci_suspend_host(slot->host); + if (ret) + goto err_suspend_host; + + return 0; + +err_suspend_host: + cqhci_resume(slot->host->mmc); +err_suspend: + gl9763e_set_low_power_negotiation(slot, false); + return ret; +} +#endif + static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; @@ -1527,8 +1555,8 @@ const struct sdhci_pci_fixes sdhci_gl9763e = { .probe_slot = gli_probe_slot_gl9763e, .ops = &sdhci_gl9763e_ops, #ifdef CONFIG_PM_SLEEP - .resume = sdhci_cqhci_gli_resume, - .suspend = sdhci_cqhci_gli_suspend, + .resume = gl9763e_resume, + .suspend = gl9763e_suspend, #endif #ifdef CONFIG_PM .runtime_suspend = gl9763e_runtime_suspend, -- cgit From 168054ca5cf783e07518681fad0904de8dcc6b17 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Sep 2023 12:54:50 +0300 Subject: mmc: sdhci-sprd: Fix error code in sdhci_sprd_tuning() Return an error code if sdhci_sprd_get_best_clk_sample() fails. Currently, it returns success. Fixes: d83d251bf3c2 ("mmc: sdhci-sprd: Add SD HS mode online tuning") Signed-off-by: Dan Carpenter Reviewed-by: Wenchao Chen Reviewed-by: Baolin Wang Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/a8af0a08-8405-43cc-bd83-85ff25f572ca@moroto.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 649ae075e229..6b84ba27e6ab 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -644,6 +644,7 @@ static int sdhci_sprd_tuning(struct mmc_host *mmc, struct mmc_card *card, best_clk_sample = sdhci_sprd_get_best_clk_sample(mmc, value); if (best_clk_sample < 0) { dev_err(mmc_dev(host->mmc), "all tuning phase fail!\n"); + err = best_clk_sample; goto out; } -- cgit From f19c5a73e6f78d69efce66cfdce31148c76a61a6 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 13 Sep 2023 13:29:21 +0200 Subject: mmc: core: Fix error propagation for some ioctl commands Userspace has currently no way of checking the internal R1 response error bits for some commands. This is a problem for some commands, like RPMB for example. Typically, we may detect that the busy completion has successfully ended, while in fact the card did not complete the requested operation. To fix the problem, let's always poll with CMD13 for these commands and during the polling, let's also aggregate the R1 response bits. Before completing the ioctl request, let's propagate the R1 response bits too. Reviewed-by: Avri Altman Co-developed-by: Christian Loehle Signed-off-by: Christian Loehle Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230913112921.553019-1-ulf.hansson@linaro.org --- drivers/mmc/core/block.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index b5b414a71e0b..3a8f27c3e310 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -179,6 +179,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_queue *mq); static void mmc_blk_hsq_req_done(struct mmc_request *mrq); static int mmc_spi_err_check(struct mmc_card *card); +static int mmc_blk_busy_cb(void *cb_data, bool *busy); static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) { @@ -470,7 +471,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, struct mmc_data data = {}; struct mmc_request mrq = {}; struct scatterlist sg; - bool r1b_resp, use_r1b_resp = false; + bool r1b_resp; unsigned int busy_timeout_ms; int err; unsigned int target_part; @@ -551,8 +552,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, busy_timeout_ms = idata->ic.cmd_timeout_ms ? : MMC_BLK_TIMEOUT_MS; r1b_resp = (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B; if (r1b_resp) - use_r1b_resp = mmc_prepare_busy_cmd(card->host, &cmd, - busy_timeout_ms); + mmc_prepare_busy_cmd(card->host, &cmd, busy_timeout_ms); mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); @@ -605,19 +605,28 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->ic.postsleep_min_us) usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us); - /* No need to poll when using HW busy detection. */ - if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) && use_r1b_resp) - return 0; - if (mmc_host_is_spi(card->host)) { if (idata->ic.write_flag || r1b_resp || cmd.flags & MMC_RSP_SPI_BUSY) return mmc_spi_err_check(card); return err; } - /* Ensure RPMB/R1B command has completed by polling with CMD13. */ - if (idata->rpmb || r1b_resp) - err = mmc_poll_for_busy(card, busy_timeout_ms, false, - MMC_BUSY_IO); + + /* + * Ensure RPMB, writes and R1B responses are completed by polling with + * CMD13. Note that, usually we don't need to poll when using HW busy + * detection, but here it's needed since some commands may indicate the + * error through the R1 status bits. + */ + if (idata->rpmb || idata->ic.write_flag || r1b_resp) { + struct mmc_blk_busy_data cb_data = { + .card = card, + }; + + err = __mmc_poll_for_busy(card->host, 0, busy_timeout_ms, + &mmc_blk_busy_cb, &cb_data); + + idata->ic.response[0] = cb_data.status; + } return err; } -- cgit From c7bb120c1c66672b657e95d0942c989b8275aeb3 Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Fri, 22 Sep 2023 17:53:48 +0800 Subject: mmc: mtk-sd: Use readl_poll_timeout_atomic in msdc_reset_hw Use atomic readl_poll_timeout_atomic, because msdc_reset_hw may be invoked in IRQ handler in the following context: msdc_irq() -> msdc_cmd_done() -> msdc_reset_hw() The following kernel BUG stack trace can be observed on Genio 1200 EVK after initializing MSDC1 hardware during kernel boot: [ 1.187441] BUG: scheduling while atomic: swapper/0/0/0x00010002 [ 1.189157] Modules linked in: [ 1.204633] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.15.42-mtk+modified #1 [ 1.205713] Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT) [ 1.206484] Call trace: [ 1.206796] dump_backtrace+0x0/0x1ac [ 1.207266] show_stack+0x24/0x30 [ 1.207692] dump_stack_lvl+0x68/0x84 [ 1.208162] dump_stack+0x1c/0x38 [ 1.208587] __schedule_bug+0x68/0x80 [ 1.209056] __schedule+0x6ec/0x7c0 [ 1.209502] schedule+0x7c/0x110 [ 1.209915] schedule_hrtimeout_range_clock+0xc4/0x1f0 [ 1.210569] schedule_hrtimeout_range+0x20/0x30 [ 1.211148] usleep_range_state+0x84/0xc0 [ 1.211661] msdc_reset_hw+0xc8/0x1b0 [ 1.212134] msdc_cmd_done.isra.0+0x4ac/0x5f0 [ 1.212693] msdc_irq+0x104/0x2d4 [ 1.213121] __handle_irq_event_percpu+0x68/0x280 [ 1.213725] handle_irq_event+0x70/0x15c [ 1.214230] handle_fasteoi_irq+0xb0/0x1a4 [ 1.214755] handle_domain_irq+0x6c/0x9c [ 1.215260] gic_handle_irq+0xc4/0x180 [ 1.215741] call_on_irq_stack+0x2c/0x54 [ 1.216245] do_interrupt_handler+0x5c/0x70 [ 1.216782] el1_interrupt+0x30/0x80 [ 1.217242] el1h_64_irq_handler+0x1c/0x2c [ 1.217769] el1h_64_irq+0x78/0x7c [ 1.218206] cpuidle_enter_state+0xc8/0x600 [ 1.218744] cpuidle_enter+0x44/0x5c [ 1.219205] do_idle+0x224/0x2d0 [ 1.219624] cpu_startup_entry+0x30/0x80 [ 1.220129] rest_init+0x108/0x134 [ 1.220568] arch_call_rest_init+0x1c/0x28 [ 1.221094] start_kernel+0x6c0/0x700 [ 1.221564] __primary_switched+0xc0/0xc8 Fixes: ffaea6ebfe9c ("mmc: mtk-sd: Use readl_poll_timeout instead of open-coded polling") Signed-off-by: Pablo Sun Reviewed-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230922095348.22182-1-pablo.sun@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 5392200cfdf7..97f7c3d4be6e 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -669,11 +669,11 @@ static void msdc_reset_hw(struct msdc_host *host) u32 val; sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_RST); - readl_poll_timeout(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); sdr_set_bits(host->base + MSDC_FIFOCS, MSDC_FIFOCS_CLR); - readl_poll_timeout(host->base + MSDC_FIFOCS, val, - !(val & MSDC_FIFOCS_CLR), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_FIFOCS, val, + !(val & MSDC_FIFOCS_CLR), 0, 0); val = readl(host->base + MSDC_INT); writel(val, host->base + MSDC_INT); -- cgit From 8adb4e647a83cb5928c05dae95b010224aea0705 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 19 Sep 2023 00:50:26 +0200 Subject: regulator/core: regulator_register: set device->class earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fixing a memory leak in commit d3c731564e09 ("regulator: plug of_node leak in regulator_register()'s error path") it moved the device_initialize() call earlier, but did not move the `dev->class` initialization. The bug was spotted and fixed by reverting part of the commit (in commit 5f4b204b6b81 "regulator: core: fix kobject release warning and memory leak in regulator_register()") but introducing a different bug: now early error paths use `kfree(dev)` instead of `put_device()` for an already initialized `struct device`. Move the missing assignments to just after `device_initialize()`. Fixes: d3c731564e09 ("regulator: plug of_node leak in regulator_register()'s error path") Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/b5b19cb458c40c9d02f3d5a7bd1ba7d97ba17279.1695077303.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d8e1caaf207e..2820badc7a12 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5542,6 +5542,8 @@ regulator_register(struct device *dev, goto rinse; } device_initialize(&rdev->dev); + dev_set_drvdata(&rdev->dev, rdev); + rdev->dev.class = ®ulator_class; spin_lock_init(&rdev->err_lock); /* @@ -5603,11 +5605,9 @@ regulator_register(struct device *dev, rdev->supply_name = regulator_desc->supply_name; /* register with sysfs */ - rdev->dev.class = ®ulator_class; rdev->dev.parent = config->dev; dev_set_name(&rdev->dev, "regulator.%lu", (unsigned long) atomic_inc_return(®ulator_no)); - dev_set_drvdata(&rdev->dev, rdev); /* set regulator constraints */ if (init_data) -- cgit From 6e800968f6a715c0661716d2ec5e1f56ed9f9c08 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 19 Sep 2023 00:50:27 +0200 Subject: regulator/core: Revert "fix kobject release warning and memory leak in regulator_register()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5f4b204b6b8153923d5be8002c5f7082985d153f. Since rdev->dev now has a release() callback, the proper way of freeing the initialized device can be restored. Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/d7f469f3f7b1f0e1d52f9a7ede3f3c5703382090.1695077303.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 2820badc7a12..3137e40fcd3e 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5724,15 +5724,11 @@ wash: mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); - put_device(&rdev->dev); - rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); - if (rdev && rdev->dev.of_node) - of_node_put(rdev->dev.of_node); - kfree(rdev); kfree(config); + put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); -- cgit From 863a8eb3f27098b42772f668e3977ff4cae10b04 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 19 Sep 2023 20:48:55 +0100 Subject: i915: Limit the length of an sg list to the requested length The folio conversion changed the behaviour of shmem_sg_alloc_table() to put the entire length of the last folio into the sg list, even if the sg list should have been shorter. gen8_ggtt_insert_entries() relied on the list being the right length and would overrun the end of the page tables. Other functions may also have been affected. Clamp the length of the last entry in the sg list to be the expected length. Signed-off-by: Matthew Wilcox (Oracle) Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch") Cc: stable@vger.kernel.org # 6.5.x Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9256 Link: https://lore.kernel.org/lkml/6287208.lOV4Wx5bFT@natalenko.name/ Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230919194855.347582-1-willy@infradead.org (cherry picked from commit 26a8e32e6d77900819c0c730fbfb393692dbbeea) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 8f1633c3fb93..73a4a4eb29e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents = 0; for (i = 0; i < page_count; i++) { struct folio *folio; + unsigned long nr_pages; const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 0, @@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); + nr_pages = min_t(unsigned long, + folio_nr_pages(folio), page_count - i); if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, sg = sg_next(sg); st->nents++; - sg_set_folio(sg, folio, folio_size(folio), 0); + sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { /* XXX: could overflow? */ - sg->length += folio_size(folio); + sg->length += nr_pages * PAGE_SIZE; } - next_pfn = folio_pfn(folio) + folio_nr_pages(folio); - i += folio_nr_pages(folio) - 1; + next_pfn = folio_pfn(folio) + nr_pages; + i += nr_pages - 1; /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL); -- cgit From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001 From: Javier Pello Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello Reviewed-by: Daniele Ceraolo Spurio Cc: Fernando Pacheco Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697dbcf62d6d8@otheo.eu (cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n"); -- cgit From 907ef0398c938be8232b77c61cfcf50fbfd95554 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Sep 2023 12:21:17 -0700 Subject: i915/guc: Get runtime pm in busyness worker only if already active Ideally the busyness worker should take a gt pm wakeref because the worker only needs to be active while gt is awake. However, the gt_park path cancels the worker synchronously and this complicates the flow if the worker is also running at the same time. The cancel waits for the worker and when the worker releases the wakeref, that would call gt_park and would lead to a deadlock. The resolution is to take the global pm wakeref if runtime pm is already active. If not, we don't need to update the busyness stats as the stats would already be updated when the gt was parked. Note: - We do not requeue the worker if we cannot take a reference to runtime pm since intel_guc_busyness_unpark would requeue the worker in the resume path. - If the gt was parked longer than time taken for GT timestamp to roll over, we ignore those rollovers since we don't care about tracking the exact GT time. We only care about roll overs when the gt is active and running workloads. - There is a window of time between gt_park and runtime suspend, where the worker may run. This is acceptable since the worker will not find any new data to update busyness. v2: (Daniele) - Edit commit message and code comment - Use runtime pm in the worker - Put runtime pm after enabling the worker - Use Link tag and add Fixes tag v3: (Daniele) - Reword commit and comments and add details Link: https://gitlab.freedesktop.org/drm/intel/-/issues/7077 Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230925192117.2497058-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit e2f99b79d4c594cdf7ab449e338d4947f5ea8903) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 38 +++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b5b7f2fe8c78..dc7b40e06e38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1432,6 +1432,36 @@ static void guc_timestamp_ping(struct work_struct *wrk) unsigned long index; int srcu, ret; + /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting @@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); /* adjust context stats for overflow */ xa_for_each(&guc->context_lookup, index, ce) @@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) -- cgit From be210c6d3597faf330cb9af33b9f1591d7b2a983 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Thu, 21 Sep 2023 06:45:05 +0000 Subject: ima: Finish deprecation of IMA_TRUSTED_KEYRING Kconfig The removal of IMA_TRUSTED_KEYRING made IMA_LOAD_X509 and IMA_BLACKLIST_KEYRING unavailable because the latter two depend on the former. Since IMA_TRUSTED_KEYRING was deprecated in favor of INTEGRITY_TRUSTED_KEYRING use it as a dependency for the two Kconfigs affected by the deprecation. Fixes: 5087fd9e80e5 ("ima: Remove deprecated IMA_TRUSTED_KEYRING Kconfig") Signed-off-by: Oleksandr Tymoshenko Reviewed-by: Nayna Jain Signed-off-by: Mimi Zohar --- security/integrity/ima/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index ecddc807c536..4e559bd1fd41 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -269,7 +269,7 @@ config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY config IMA_BLACKLIST_KEYRING bool "Create IMA machine owner blacklist keyrings (EXPERIMENTAL)" depends on SYSTEM_TRUSTED_KEYRING - depends on IMA_TRUSTED_KEYRING + depends on INTEGRITY_TRUSTED_KEYRING default n help This option creates an IMA blacklist keyring, which contains all @@ -279,7 +279,7 @@ config IMA_BLACKLIST_KEYRING config IMA_LOAD_X509 bool "Load X509 certificate onto the '.ima' trusted keyring" - depends on IMA_TRUSTED_KEYRING + depends on INTEGRITY_TRUSTED_KEYRING default n help File signature verification is based on the public keys -- cgit From 6f874fa021dfc7bf37f4f37da3a5aaa41fe9c39c Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Wed, 27 Sep 2023 02:03:23 +0800 Subject: selftests: Fix wrong TARGET in kselftest top level Makefile The 'uevents' subdirectory does not exist in tools/testing/selftests/ and adding 'uevents' to the TARGETS list results in the following error: make[1]: Entering directory 'xx/tools/testing/selftests/uevents' make[1]: *** No targets specified and no makefile found. Stop. make[1]: Leaving directory 'xx/tools/testing/selftests/uevents' What actually exists in tools/testing/selftests/ is the 'uevent' subdirectory. Signed-off-by: Juntong Deng Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 42806add0114..1a21d6beebc6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -92,7 +92,7 @@ endif TARGETS += tmpfs TARGETS += tpm2 TARGETS += tty -TARGETS += uevents +TARGETS += uevent TARGETS += user TARGETS += user_events TARGETS += vDSO -- cgit From 69390f3528f5a06e2ba291c2e6c20e62dc49c3e4 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 26 Sep 2023 03:49:03 +0200 Subject: MAINTAINERS: update nouveau maintainers Since I will continue to work on Nouveau consistently, also beyond my former and still ongoing VM_BIND/EXEC work, add myself to the list of Nouveau maintainers. Signed-off-by: Danilo Krummrich Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230926014913.7721-1-dakr@redhat.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b19995690904..67ce91c8778a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6647,6 +6647,7 @@ F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS M: Karol Herbst M: Lyude Paul +M: Danilo Krummrich L: dri-devel@lists.freedesktop.org L: nouveau@lists.freedesktop.org S: Supported -- cgit From 48a3adcf47888019f953c57a7290036744635912 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Wed, 20 Sep 2023 12:23:49 +0000 Subject: perf pmu: Fix perf stat output with correct scale and unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perf_pmu__parse_* functions for the sysfs files of pmu event’s scale, unit, per-pkg and snapshot were updated in commit 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs"). However, the paths for these sysfs files were incorrect. This resulted in perf stat reporting values with wrong scaling and missing units. This is fixed by correcting the paths for these sysfs files. Before this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 351,217,188,864 power/energy-pkg/ 2.004127961 seconds time elapsed After this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 80.58 Joules power/energy-pkg/ 2.004009749 seconds time elapsed Fixes: 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs") Signed-off-by: Wyes Karny Reviewed-by: Ian Rogers Cc: ravi.bangoria@amd.com Cc: sandipan.das@amd.com Cc: james.clark@arm.com Cc: kan.liang@linux.intel.com Link: https://lore.kernel.org/r/20230920122349.418673-1-wyes.karny@amd.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8de6f39abd1b..d515ba8a0e16 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -295,7 +295,7 @@ static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *al len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.scale", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -330,7 +330,7 @@ static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *ali len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.unit", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -364,7 +364,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.per-pkg", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -385,7 +385,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.snapshot", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) -- cgit From b0873eead1d1eadf13b5c80ad5d8f88b91e4910a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 25 Sep 2023 14:11:32 +0200 Subject: accel/ivpu: Do not use wait event interruptible If we receive signal when waiting for IPC message response in ivpu_ipc_receive() we return error and continue to operate. Then the driver can send another IPC messages and re-use occupied slot of the message still processed by the firmware. This can result in corrupting firmware memory and following FW crash with messages: [ 3698.569719] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_send_receive_internal(): IPC receive failed: type 0x1103, ret -512 [ 3698.569747] intel_vpu 0000:00:0b.0: [drm] ivpu_jsm_unregister_db(): Failed to unregister doorbell 3: -512 [ 3698.569756] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_tx_prepare(): IPC message vpu:0x88980000 not released by firmware [ 3698.569763] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_tx_prepare(): JSM message vpu:0x88980040 not released by firmware [ 3698.570234] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_send_receive_internal(): IPC receive failed: type 0x110e, ret -512 [ 3698.570318] intel_vpu 0000:00:0b.0: [drm] *ERROR* ivpu_mmu_dump_event(): MMU EVTQ: 0x10 (Translation fault) SSID: 0 SID: 3, e[2] 00000000, e[3] 00000208, in addr: 0x88988000, fetch addr: 0x0 To fix the issue don't use interruptible variant of wait event to allow firmware to finish IPC processing. Fixes: 5d7422cfb498 ("accel/ivpu: Add IPC driver and JSM messages") Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_ipc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index fa0af59e39ab..295c0d7b5039 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -209,10 +209,10 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_rx_msg *rx_msg; int wait_ret, ret = 0; - wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq, - (IS_KTHREAD() && kthread_should_stop()) || - !list_empty(&cons->rx_msg_list), - msecs_to_jiffies(timeout_ms)); + wait_ret = wait_event_timeout(cons->rx_msg_wq, + (IS_KTHREAD() && kthread_should_stop()) || + !list_empty(&cons->rx_msg_list), + msecs_to_jiffies(timeout_ms)); if (IS_KTHREAD() && kthread_should_stop()) return -EINTR; @@ -220,9 +220,6 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, if (wait_ret == 0) return -ETIMEDOUT; - if (wait_ret < 0) - return -ERESTARTSYS; - spin_lock_irq(&cons->rx_msg_lock); rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); if (!rx_msg) { -- cgit From 002652555022728c42b5517c6c11265b8c3ab827 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 25 Sep 2023 14:11:33 +0200 Subject: accel/ivpu: Don't flood dmesg with VPU ready message Use ivpu_dbg() to print the VPU ready message so it doesn't pollute the dmesg. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-3-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index aa7314fdbc0f..467a60235370 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -327,7 +327,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) } if (!ret) - ivpu_info(vdev, "VPU ready message received successfully\n"); + ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); else ivpu_hw_diagnose_failure(vdev); -- cgit From 6c3f2f90ccad024806f72c49740742df4ded3727 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:34 +0200 Subject: accel/ivpu/40xx: Ensure clock resource ownership Ack before Power-Up We need to wait for the CLOCK_RESOURCE_OWN_ACK bit to be set after configuring the workpoint. This step ensures that the VPU microcontroller clock is actively toggling and ready for operation. Previously, we relied solely on the READY bit in the VPU_STATUS register, which indicated the completion of the workpoint download. However, this approach was insufficient, as the READY bit could be set while the device was still running on a sideband clock until the PLL locked. To guarantee that the PLL is locked and the device is running on the main clock source, we now wait for the CLOCK_RESOURCE_OWN_ACK before proceeding with the remainder of the power-up sequence. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-4-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 14 ++++++++++++++ drivers/accel/ivpu/ivpu_hw_40xx_reg.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 00c5dbbe6847..f4a251a58ca4 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -196,6 +196,14 @@ static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); } +static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev) +{ + if (ivpu_is_simics(vdev)) + return 0; + + return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); +} + static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; @@ -556,6 +564,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) { int ret; + ret = ivpu_wait_for_clock_own_resource_ack(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n"); + return ret; + } + ivpu_boot_pwr_island_trickle_drive(vdev, true); ivpu_boot_pwr_island_drive(vdev, true); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index 5139cfe88532..ff4a5d4f5821 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -70,6 +70,8 @@ #define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) #define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) #define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) +#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) +#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) #define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) #define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) -- cgit From ec3e3adc6d53b0f4a9afc8f903fbf851341e0193 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:35 +0200 Subject: accel/ivpu/40xx: Disable frequency change interrupt Do not enable frequency change interrupt on 40xx as it might lead to an interrupt storm in current design. FREQ_CHANGE interrupt is triggered on D0I2 entry which will cause KMD to check VPU interrupt sources by reading VPUIP registers. Access to those registers will toggle necessary clocks and trigger another FREQ_CHANGE interrupt possibly ending in an infinite loop. FREQ_CHANGE interrupt has only debug purposes and can be permanently disabled. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-5-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index f4a251a58ca4..87b1085d44cf 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -57,8 +57,7 @@ #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ +#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \ -- cgit From 09bb81cf243d151dd1c02fcd727a4604829d9927 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:36 +0200 Subject: accel/ivpu/40xx: Fix missing VPUIP interrupts Move sequence of masking and unmasking global interrupts from buttress interrupt handler to generic one that handles both VPUIP and BTRS interrupts. Unmasking global interrupts will re-trigger MSI for any pending interrupts. Lack of this sequence can randomly cause to miss any VPUIP interrupt that comes after reading VPU_40XX_HOST_SS_ICB_STATUS_0 and before clearing all active interrupt sources. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-6-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 87b1085d44cf..8bdb59a45da6 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1059,9 +1059,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) if (status == 0) return IRQ_NONE; - /* Disable global interrupt before handling local buttress interrupts */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); @@ -1109,9 +1106,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) /* This must be done after interrupts are cleared at the source. */ REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); - /* Re-enable global interrupt */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - if (schedule_recovery) ivpu_pm_schedule_recovery(vdev); @@ -1123,9 +1117,14 @@ static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr) struct ivpu_device *vdev = ptr; irqreturn_t ret = IRQ_NONE; + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); + ret |= ivpu_hw_40xx_irqv_handler(vdev, irq); ret |= ivpu_hw_40xx_irqb_handler(vdev, irq); + /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); + if (ret & IRQ_WAKE_THREAD) return IRQ_WAKE_THREAD; -- cgit From 645d694559cab36fe6a57c717efcfa27d9321396 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Tue, 26 Sep 2023 14:09:43 +0200 Subject: accel/ivpu: Use cached buffers for FW loading Create buffers with cache coherency on the CPU side (write-back) while disabling snooping on the VPU side. These buffers require an explicit cache flush after each CPU-side modification. Configuring pages as write-combined may introduce significant delays, potentially taking hundreds of milliseconds for 64 MB buffers. Added internal DRM_IVPU_BO_NOSNOOP mask which disables snooping on the VPU side. Allocate FW runtime memory buffer (64 MB) as cached with snooping-disabled. This fixes random long FW loading times and boot params memory corruption on warmboot (due to missed wmb). Fixes: 02d5b0aacd05 ("accel/ivpu: Implement firmware parsing and booting") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230926120943.GD846747@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 8 +++++--- drivers/accel/ivpu/ivpu_gem.h | 5 +++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 9827ea4d7b83..0191cf8e5964 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -220,7 +220,8 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (ret) return ret; - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_NOSNOOP); if (!fw->mem) { ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); return -ENOMEM; @@ -330,7 +331,7 @@ int ivpu_fw_load(struct ivpu_device *vdev) memset(start, 0, size); } - wmb(); /* Flush WC buffers after writing fw->mem */ + clflush_cache_range(fw->mem->kvaddr, fw->mem->base.size); return 0; } @@ -432,6 +433,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params if (!ivpu_fw_is_cold_boot(vdev)) { boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; + clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); return; } @@ -493,7 +495,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); - wmb(); /* Flush WC buffers after writing bootparams */ + clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); ivpu_fw_boot_params_print(vdev, boot_params); } diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 6b0ceda5f253..f4130586ff1b 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -8,6 +8,8 @@ #include #include +#define DRM_IVPU_BO_NOSNOOP 0x10000000 + struct dma_buf; struct ivpu_bo_ops; struct ivpu_file_priv; @@ -83,6 +85,9 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { + if (bo->flags & DRM_IVPU_BO_NOSNOOP) + return false; + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } -- cgit From 9e8bc2dda5a7a8e2babc9975f4b11c9a6196e490 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Tue, 26 Sep 2023 10:29:14 +0000 Subject: gpio: timberdale: Fix potential deadlock on &tgpio->lock As timbgpio_irq_enable()/timbgpio_irq_disable() callback could be executed under irq context, it could introduce double locks on &tgpio->lock if it preempts other execution units requiring the same locks. timbgpio_gpio_set() --> timbgpio_update_bit() --> spin_lock(&tgpio->lock) --> timbgpio_irq_disable() --> spin_lock_irqsave(&tgpio->lock) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. To prevent the potential deadlock, the patch uses spin_lock_irqsave() on &tgpio->lock inside timbgpio_gpio_set() to prevent the possible deadlock scenario. Signed-off-by: Chengfeng Ye Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-timberdale.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index bbd9e9191199..fad979797486 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -43,9 +43,10 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index, unsigned offset, bool enabled) { struct timbgpio *tgpio = gpiochip_get_data(gpio); + unsigned long flags; u32 reg; - spin_lock(&tgpio->lock); + spin_lock_irqsave(&tgpio->lock, flags); reg = ioread32(tgpio->membase + offset); if (enabled) @@ -54,7 +55,7 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index, reg &= ~(1 << index); iowrite32(reg, tgpio->membase + offset); - spin_unlock(&tgpio->lock); + spin_unlock_irqrestore(&tgpio->lock, flags); return 0; } -- cgit From 26d9e5640d2130ee16df7b1fb6a908f460ab004c Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Thu, 21 Sep 2023 20:25:27 +0800 Subject: gpio: pmic-eic-sprd: Add can_sleep flag for PMIC EIC chip The drivers uses a mutex and I2C bus access in its PMIC EIC chip get implementation. This means these functions can sleep and the PMIC EIC chip should set the can_sleep property to true. This will ensure that a warning is printed when trying to get the value from a context that potentially can't sleep. Fixes: 348f3cde84ab ("gpio: Add Spreadtrum PMIC EIC driver support") Signed-off-by: Wenhua Lin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pmic-eic-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c index 2b9b7be9b8fd..01c0fd0a9d8c 100644 --- a/drivers/gpio/gpio-pmic-eic-sprd.c +++ b/drivers/gpio/gpio-pmic-eic-sprd.c @@ -352,6 +352,7 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev) pmic_eic->chip.set_config = sprd_pmic_eic_set_config; pmic_eic->chip.set = sprd_pmic_eic_set; pmic_eic->chip.get = sprd_pmic_eic_get; + pmic_eic->chip.can_sleep = true; irq = &pmic_eic->chip.irq; gpio_irq_chip_set_chip(irq, &pmic_eic_irq_chip); -- cgit From 1943feecf80e73ecc03ce40271f29c6cea142bac Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 27 Sep 2023 16:19:13 +0800 Subject: LoongArch: numa: Fix high_memory calculation For 64bit kernel without HIGHMEM, high_memory is the virtual address of the highest physical address in the system. But __va(get_num_physpages() << PAGE_SHIFT) is not what we want for high_memory because there may be holes in the physical address space. On the other hand, max_low_pfn is calculated from memblock_end_of_DRAM(), which is exactly corresponding to the highest physical address, so use it for high_memory calculation. Cc: Fixes: d4b6f1562a3c3284adce ("LoongArch: Add Non-Uniform Memory Access (NUMA) support") Signed-off-by: Chong Qiao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index c7d33c489e04..6e65ff12d5c7 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -436,7 +436,7 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); } -- cgit From 2761498876adebff77a43574639005b29e912c43 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 27 Sep 2023 16:19:13 +0800 Subject: LoongArch: Define relocation types for ABI v2.10 The relocation types from 101 to 109 are used by GNU binutils >= 2.41, add their definitions to use them in later patches. Link: https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=include/elf/loongarch.h#l230 Cc: Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 9 +++++++++ arch/loongarch/kernel/module.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 7af0cebf28d7..b9a4ab54285c 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -111,6 +111,15 @@ #define R_LARCH_TLS_GD_HI20 98 #define R_LARCH_32_PCREL 99 #define R_LARCH_RELAX 100 +#define R_LARCH_DELETE 101 +#define R_LARCH_ALIGN 102 +#define R_LARCH_PCREL20_S2 103 +#define R_LARCH_CFA 104 +#define R_LARCH_ADD6 105 +#define R_LARCH_SUB6 106 +#define R_LARCH_ADD_ULEB128 107 +#define R_LARCH_SUB_ULEB128 108 +#define R_LARCH_64_PCREL 109 #ifndef ELF_ARCH diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index b8b86088b2dd..c3f9d2f5d840 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -382,7 +382,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, /* The handlers for known reloc types */ static reloc_rela_handler reloc_rela_handlers[] = { - [R_LARCH_NONE ... R_LARCH_RELAX] = apply_r_larch_error, + [R_LARCH_NONE ... R_LARCH_64_PCREL] = apply_r_larch_error, [R_LARCH_NONE] = apply_r_larch_none, [R_LARCH_32] = apply_r_larch_32, -- cgit From c1c2ce2d3bf903c50f3da7346d394127ffcc93ac Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 27 Sep 2023 16:19:13 +0800 Subject: LoongArch: Add support for 32_PCREL relocation type When build and update kernel with the latest upstream binutils and loongson3_defconfig, module loader fails with: kmod: zsmalloc: Unsupport relocation type 99, please add its support. kmod: fuse: Unsupport relocation type 99, please add its support. kmod: ipmi_msghandler: Unsupport relocation type 99, please add its support. kmod: ipmi_msghandler: Unsupport relocation type 99, please add its support. kmod: pstore: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: fuse: Unsupport relocation type 99, please add its support. kmod: fat: Unsupport relocation type 99, please add its support. This is because the latest upstream binutils replaces a pair of ADD32 and SUB32 with 32_PCREL, so add support for 32_PCREL relocation type. Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=ecb802d02eeb Cc: Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index c3f9d2f5d840..9e10c4400743 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -367,6 +367,15 @@ static int apply_r_larch_got_pc(struct module *mod, return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type); } +static int apply_r_larch_32_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + + *(u32 *)location = offset; + return 0; +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -396,6 +405,7 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, + [R_LARCH_32_PCREL] = apply_r_larch_32_pcrel, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- cgit From b1dc55a3d6a86cc2c1ae664ad7280bff4c0fc28f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 27 Sep 2023 16:19:13 +0800 Subject: LoongArch: Add support for 64_PCREL relocation type When build and update kernel with the latest upstream binutils and loongson3_defconfig, module loader fails with: kmod: zsmalloc: Unknown relocation type 109 kmod: fuse: Unknown relocation type 109 kmod: fuse: Unknown relocation type 109 kmod: radeon: Unknown relocation type 109 kmod: nf_tables: Unknown relocation type 109 kmod: nf_tables: Unknown relocation type 109 This is because the latest upstream binutils replaces a pair of ADD64 and SUB64 with 64_PCREL, so add support for 64_PCREL relocation type. Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=ecb802d02eeb Cc: Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 9e10c4400743..b13b2858fe39 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -376,6 +376,15 @@ static int apply_r_larch_32_pcrel(struct module *mod, u32 *location, Elf_Addr v, return 0; } +static int apply_r_larch_64_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + + *(u64 *)location = offset; + return 0; +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -406,6 +415,7 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, [R_LARCH_32_PCREL] = apply_r_larch_32_pcrel, + [R_LARCH_64_PCREL] = apply_r_larch_64_pcrel, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- cgit From 8be586f78dfd2aebbcd06d855e2d6f23402bbc58 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Sep 2023 10:51:57 +0200 Subject: Revert "pinctrl: tegra: Add support to display pin function" This reverts commit d1cd5b51bc9152dc2b63c5f843590272d6694d50. It was reported that some I2C3 functions stop working after this patch, and it is just debug help so let's revert it and investigate. Link: https://lore.kernel.org/linux-gpio/20230925183049.10a40546@booty/ Signed-off-by: Linus Walleij --- drivers/pinctrl/tegra/pinctrl-tegra.c | 19 ++----------------- drivers/pinctrl/tegra/pinctrl-tegra.h | 2 -- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c index cfeda5b3e048..734c71ef005b 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.c +++ b/drivers/pinctrl/tegra/pinctrl-tegra.c @@ -96,7 +96,6 @@ static const struct cfg_param { {"nvidia,slew-rate-falling", TEGRA_PINCONF_PARAM_SLEW_RATE_FALLING}, {"nvidia,slew-rate-rising", TEGRA_PINCONF_PARAM_SLEW_RATE_RISING}, {"nvidia,drive-type", TEGRA_PINCONF_PARAM_DRIVE_TYPE}, - {"nvidia,function", TEGRA_PINCONF_PARAM_FUNCTION}, }; static int tegra_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, @@ -471,12 +470,6 @@ static int tegra_pinconf_reg(struct tegra_pmx *pmx, *bit = g->drvtype_bit; *width = 2; break; - case TEGRA_PINCONF_PARAM_FUNCTION: - *bank = g->mux_bank; - *reg = g->mux_reg; - *bit = g->mux_bit; - *width = 2; - break; default: dev_err(pmx->dev, "Invalid config param %04x\n", param); return -ENOTSUPP; @@ -640,16 +633,8 @@ static void tegra_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, val >>= bit; val &= (1 << width) - 1; - if (cfg_params[i].param == TEGRA_PINCONF_PARAM_FUNCTION) { - u8 idx = pmx->soc->groups[group].funcs[val]; - - seq_printf(s, "\n\t%s=%s", - strip_prefix(cfg_params[i].property), - pmx->functions[idx].name); - } else { - seq_printf(s, "\n\t%s=%u", - strip_prefix(cfg_params[i].property), val); - } + seq_printf(s, "\n\t%s=%u", + strip_prefix(cfg_params[i].property), val); } } diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.h b/drivers/pinctrl/tegra/pinctrl-tegra.h index e728efeaa4de..b3289bdf727d 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.h +++ b/drivers/pinctrl/tegra/pinctrl-tegra.h @@ -54,8 +54,6 @@ enum tegra_pinconf_param { TEGRA_PINCONF_PARAM_SLEW_RATE_RISING, /* argument: Integer, range is HW-dependant */ TEGRA_PINCONF_PARAM_DRIVE_TYPE, - /* argument: pinmux settings */ - TEGRA_PINCONF_PARAM_FUNCTION, }; enum tegra_pinconf_pull { -- cgit From 59a98f4f1e10902f94610d4cf99de86322016464 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Sep 2023 17:35:48 -0700 Subject: ARM: uniphier: fix cache kernel-doc warnings Fix kernel-doc warning(s) as reported by lkp: arch/arm/mm/cache-uniphier.c:72: warning: cannot understand function prototype: 'struct uniphier_cache_data ' cache-uniphier.c:82: warning: Function parameter or member 'way_ctrl_base' not described in 'uniphier_cache_data' Fixes: e7ecbc057bc5 ("ARM: uniphier: add outer cache support") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Masahiro Yamada Cc: Olof Johansson Cc: Arnd Bergmann Cc: soc@kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: Kunihiko Hayashi Cc: Masami Hiramatsu Link: lore.kernel.org/r/202309260130.Uvwh8ceE-lkp@intel.com # fixes only one item Link: https://lore.kernel.org/r/20230926003548.22066-1-rdunlap@infradead.org Signed-off-by: Arnd Bergmann --- arch/arm/mm/cache-uniphier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c index ff2881458504..84a2f17ff32d 100644 --- a/arch/arm/mm/cache-uniphier.c +++ b/arch/arm/mm/cache-uniphier.c @@ -58,11 +58,13 @@ ((op & UNIPHIER_SSCOQM_S_MASK) == UNIPHIER_SSCOQM_S_RANGE) /** - * uniphier_cache_data - UniPhier outer cache specific data + * struct uniphier_cache_data - UniPhier outer cache specific data * * @ctrl_base: virtual base address of control registers * @rev_base: virtual base address of revision registers * @op_base: virtual base address of operation registers + * @way_ctrl_base: virtual address of the way control registers for this + * SoC revision * @way_mask: each bit specifies if the way is present * @nsets: number of associativity sets * @line_size: line size in bytes -- cgit From 7d3e4e9d3bde9c8bd8914d47ddaa90e0d0ffbcab Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Thu, 21 Sep 2023 17:57:22 +0300 Subject: arm64: defconfig: remove CONFIG_COMMON_CLK_NPCM8XX=y There is no code for this config option and enabling it in defconfig causes warnings from tools which are detecting unused and obsolete kernel config flags since the flag will be completely missing from effective build config after "make olddefconfig". Fixes yocto kernel recipe build time warning: WARNING: [kernel config]: This BSP contains fragments with warnings: ... [INFO]: the following symbols were not found in the active configuration: - CONFIG_COMMON_CLK_NPCM8XX The flag was added with commit 45472f1e5348c7b755b4912f2f529ec81cea044b v5.19-rc4-15-g45472f1e5348 so 6.1 and 6.4 stable kernel trees are affected. Fixes: 45472f1e5348c7b755b4912f2f529ec81cea044b ("arm64: defconfig: Add Nuvoton NPCM family support") Cc: stable@kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Bjorn Andersson Cc: Krzysztof Kozlowski Cc: Konrad Dybcio Cc: Neil Armstrong Cc: Tomer Maimon Cc: Bruce Ashfield Cc: Jon Mason Cc: Jon Mason Cc: Ross Burton Cc: Arnd Bergmann Signed-off-by: Mikko Rapeli Signed-off-by: Arnd Bergmann --- arch/arm64/configs/defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5315789f4868..24531891c7be 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1175,7 +1175,6 @@ CONFIG_COMMON_CLK_S2MPS11=y CONFIG_COMMON_CLK_PWM=y CONFIG_COMMON_CLK_RS9_PCIE=y CONFIG_COMMON_CLK_VC5=y -CONFIG_COMMON_CLK_NPCM8XX=y CONFIG_COMMON_CLK_BD718XX=m CONFIG_CLK_RASPBERRYPI=m CONFIG_CLK_IMX8MM=y -- cgit From 8e4a28f9796114ee83a20223a319436d4a100bfa Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 31 Aug 2023 19:43:11 +0800 Subject: soc: loongson: loongson_pm2: Add dependency for INPUT Since commit 67694c076bd7 ("soc: loongson2_pm: add power management support"), the Loongson-2K PM driver was added, but it didn't update the Kconfig entry for the INPUT dependency, leading to build errors, so update the Kconfig entry to depend on INPUT. /opt/crosstool/gcc-13.2.0-nolibc/loongarch64-linux/bin/loongarch64-linux-ld: drivers/soc/loongson/loongson2_pm.o: in function `loongson2_power_button_init': /work/lnx/next/linux-next-20230825/LOONG64/../drivers/soc/loongson/loongson2_pm.c:101:(.text+0x350): undefined reference to `input_allocate_device' /opt/crosstool/gcc-13.2.0-nolibc/loongarch64-linux/bin/loongarch64-linux-ld: /work/lnx/next/linux-next-20230825/LOONG64/../drivers/soc/loongson/loongson2_pm.c:109:(.text+0x3dc): undefined reference to `input_set_capability' /opt/crosstool/gcc-13.2.0-nolibc/loongarch64-linux/bin/loongarch64-linux-ld: /work/lnx/next/linux-next-20230825/LOONG64/../drivers/soc/loongson/loongson2_pm.c:111:(.text+0x3e4): undefined reference to `input_register_device' /opt/crosstool/gcc-13.2.0-nolibc/loongarch64-linux/bin/loongarch64-linux-ld: /work/lnx/next/linux-next-20230825/LOONG64/../drivers/soc/loongson/loongson2_pm.c:125:(.text+0x3fc): undefined reference to `input_free_device' /opt/crosstool/gcc-13.2.0-nolibc/loongarch64-linux/bin/loongarch64-linux-ld: drivers/soc/loongson/loongson2_pm.o: in function `input_report_key': /work/lnx/next/linux-next-20230825/LOONG64/../include/linux/input.h:425:(.text+0x58c): undefined reference to `input_event' Reported-by: Randy Dunlap Signed-off-by: Binbin Zhou Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/loongson/Kconfig b/drivers/soc/loongson/Kconfig index 314e13bb3e01..368344943a93 100644 --- a/drivers/soc/loongson/Kconfig +++ b/drivers/soc/loongson/Kconfig @@ -20,6 +20,7 @@ config LOONGSON2_GUTS config LOONGSON2_PM bool "Loongson-2 SoC Power Management Controller Driver" depends on LOONGARCH && OF + depends on INPUT=y help The Loongson-2's power management controller was ACPI, supports ACPI S2Idle (Suspend To Idle), ACPI S3 (Suspend To RAM), ACPI S4 (Suspend To -- cgit From 380054cb050b071f37ab9ac9823b785c29db0c13 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 31 Aug 2023 19:43:12 +0800 Subject: dt-bindings: soc: loongson,ls2k-pmc: Use fallbacks for ls2k-pmc compatible The Loongson-2K series chips (ls2k0500/ls2k1000/ls2k2000) share the same PM system controller, using ls2k0500 compatible as fallback for the others. Signed-off-by: Binbin Zhou Acked-by: Conor Dooley Signed-off-by: Arnd Bergmann --- .../bindings/soc/loongson/loongson,ls2k-pmc.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml b/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml index da2dcfeebf12..c45f5e7fc0e6 100644 --- a/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml +++ b/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml @@ -11,11 +11,16 @@ maintainers: properties: compatible: - items: - - enum: - - loongson,ls2k0500-pmc - - loongson,ls2k1000-pmc - - const: syscon + oneOf: + - items: + - const: loongson,ls2k0500-pmc + - const: syscon + - items: + - enum: + - loongson,ls2k1000-pmc + - loongson,ls2k2000-pmc + - const: loongson,ls2k0500-pmc + - const: syscon reg: maxItems: 1 @@ -44,7 +49,7 @@ examples: #include power-management@1fe27000 { - compatible = "loongson,ls2k1000-pmc", "syscon"; + compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x1fe27000 0x58>; interrupt-parent = <&liointc1>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; -- cgit From e26e788a2a0be414397ced9cc8e462e6baa497c6 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 31 Aug 2023 19:43:13 +0800 Subject: soc: loongson: loongson_pm2: Drop useless of_device_id compatible Now, "loongson,ls2k0500-pmc" is used as fallback compatible, so the ls2k1000 compatible in the driver can be dropped directly. Signed-off-by: Binbin Zhou Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/loongson2_pm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/loongson/loongson2_pm.c b/drivers/soc/loongson/loongson2_pm.c index 796add6e8b63..5ffb77afd9eb 100644 --- a/drivers/soc/loongson/loongson2_pm.c +++ b/drivers/soc/loongson/loongson2_pm.c @@ -197,7 +197,6 @@ static int loongson2_pm_probe(struct platform_device *pdev) static const struct of_device_id loongson2_pm_match[] = { { .compatible = "loongson,ls2k0500-pmc", }, - { .compatible = "loongson,ls2k1000-pmc", }, {}, }; -- cgit From 8c4102f20a968ef466ed6b63930a546f57966ca1 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 31 Aug 2023 19:43:24 +0800 Subject: dt-bindings: soc: loongson,ls2k-pmc: Allow syscon-reboot/syscon-poweroff as child The reboot and poweroff features are actually part of the Power Management Unit system controller, thus allow them as its children, instead of specifying as separate device nodes with syscon phandle. Without it, the reboot/poweroff feature becomes unavailable. Signed-off-by: Binbin Zhou Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- .../bindings/soc/loongson/loongson,ls2k-pmc.yaml | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml b/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml index c45f5e7fc0e6..510f6cb0f084 100644 --- a/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml +++ b/Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml @@ -37,6 +37,18 @@ properties: addition, the PM need according to it to indicate that current SoC whether support Suspend To RAM. + syscon-poweroff: + $ref: /schemas/power/reset/syscon-poweroff.yaml# + type: object + description: + Node for power off method + + syscon-reboot: + $ref: /schemas/power/reset/syscon-reboot.yaml# + type: object + description: + Node for reboot method + required: - compatible - reg @@ -54,4 +66,18 @@ examples: interrupt-parent = <&liointc1>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; loongson,suspend-address = <0x0 0x1c000500>; + + syscon-reboot { + compatible = "syscon-reboot"; + offset = <0x30>; + mask = <0x1>; + }; + + syscon-poweroff { + compatible = "syscon-poweroff"; + regmap = <&pmc>; + offset = <0x14>; + mask = <0x3c00>; + value = <0x3c00>; + }; }; -- cgit From a2fd542287d02d35d61839a09d4b18ccc4b2ff0e Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 31 Aug 2023 19:43:25 +0800 Subject: soc: loongson: loongson_pm2: Populate children syscon nodes The syscon poweroff and reboot nodes logically belong to the Power Management Unit so populate possible children. Without it, the reboot/poweroff feature becomes unavailable. Signed-off-by: Binbin Zhou Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/loongson2_pm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/soc/loongson/loongson2_pm.c b/drivers/soc/loongson/loongson2_pm.c index 5ffb77afd9eb..b8e5e1e3528a 100644 --- a/drivers/soc/loongson/loongson2_pm.c +++ b/drivers/soc/loongson/loongson2_pm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -192,6 +193,11 @@ static int loongson2_pm_probe(struct platform_device *pdev) if (loongson_sysconf.suspend_addr) suspend_set_ops(&loongson2_suspend_ops); + /* Populate children */ + retval = devm_of_platform_populate(dev); + if (retval) + dev_err(dev, "Error populating children, reboot and poweroff might not work properly\n"); + return 0; } -- cgit From daacef89cd1bb7e345539db10e979e1b78451591 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Fri, 1 Sep 2023 14:25:48 +0800 Subject: soc: loongson: loongson2_guts: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Dongliang Mu Signed-off-by: Binbin Zhou Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/loongson2_guts.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/soc/loongson/loongson2_guts.c b/drivers/soc/loongson/loongson2_guts.c index bace4bc8e03b..d97c77a9a4a2 100644 --- a/drivers/soc/loongson/loongson2_guts.c +++ b/drivers/soc/loongson/loongson2_guts.c @@ -94,7 +94,6 @@ static int loongson2_guts_probe(struct platform_device *pdev) { struct device_node *root, *np = pdev->dev.of_node; struct device *dev = &pdev->dev; - struct resource *res; const struct loongson2_soc_die_attr *soc_die; const char *machine; u32 svr; @@ -106,8 +105,7 @@ static int loongson2_guts_probe(struct platform_device *pdev) guts->little_endian = of_property_read_bool(np, "little-endian"); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - guts->regs = ioremap(res->start, res->end - res->start + 1); + guts->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(guts->regs)) return PTR_ERR(guts->regs); -- cgit From a776cc49718cc5230aa83a0389002ed92bfc76d7 Mon Sep 17 00:00:00 2001 From: Mingtong Bao Date: Fri, 1 Sep 2023 14:25:49 +0800 Subject: soc: loongson: loongson2_guts: Remove unneeded semicolon No functional modification involved. ./drivers/soc/loongson/loongson2_guts.c:73:2-3: Unneeded semicolon. Reviewed-by: Huacai Chen Signed-off-by: Mingtong Bao Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/loongson2_guts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/loongson/loongson2_guts.c b/drivers/soc/loongson/loongson2_guts.c index d97c77a9a4a2..9a469779eea7 100644 --- a/drivers/soc/loongson/loongson2_guts.c +++ b/drivers/soc/loongson/loongson2_guts.c @@ -70,7 +70,7 @@ static const struct loongson2_soc_die_attr *loongson2_soc_die_match( if (matches->svr == (svr & matches->mask)) return matches; matches++; - }; + } return NULL; } -- cgit From 7e1fe5d9e7eae67e218f878195d1d348d01f9af7 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 27 Sep 2023 12:44:10 +0530 Subject: ASoC: SOF: amd: fix for firmware reload failure after playback Setting ACP ACLK as clock source when ACP enters D0 state causing firmware load failure as mentioned in below scenario. - Load snd_sof_amd_rembrandt - Play or Record audio - Stop audio - Unload snd_sof_amd_rembrandt - Reload snd_sof_amd_rembrandt If acp_clkmux_sel register field is set, then clock source will be set to ACP ACLK when ACP enters D0 state. During stream stop, if there is no active stream is running then acp firmware will set the ACP ACLK value to zero. When driver is reloaded and clock source is selected as ACP ACLK, as ACP ACLK is programmed to zero, firmware loading will fail. For RMB platform, remove the clock mux selection field so that ACP will use internal clock source when ACP enters D0 state. Fixes: 41cb85bc4b52 ("ASoC: SOF: amd: Add support for Rembrandt plaform.") Reported-by: coolstar Closes: https://github.com/thesofproject/sof/issues/8137 Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20230927071412.2416250-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/pci-rmb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/sof/amd/pci-rmb.c b/sound/soc/sof/amd/pci-rmb.c index 9935e457b467..a7ae76efc2dd 100644 --- a/sound/soc/sof/amd/pci-rmb.c +++ b/sound/soc/sof/amd/pci-rmb.c @@ -35,7 +35,6 @@ static const struct sof_amd_acp_desc rembrandt_chip_info = { .dsp_intr_base = ACP6X_DSP_SW_INTR_BASE, .sram_pte_offset = ACP6X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP6X_AXI2DAGB_SEM_0, - .acp_clkmux_sel = ACP6X_CLKMUX_SEL, .fusion_dsp_offset = ACP6X_DSP_FUSION_RUNSTALL, .probe_reg_offset = ACP6X_FUTURE_REG_ACLK_0, }; -- cgit From 2d5780bbef8dbe6375d481cbea212606a80e4453 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 26 Sep 2023 20:55:56 +0200 Subject: swiotlb: fix the check whether a device has used software IO TLB When CONFIG_SWIOTLB_DYNAMIC=y, devices which do not use the software IO TLB can avoid swiotlb lookup. A flag is added by commit 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it"), the flag is correctly set, but it is then never checked. Add the actual check here. Note that this code is an alternative to the default pool check, not an additional check, because: 1. swiotlb_find_pool() also searches the default pool; 2. if dma_uses_io_tlb is false, the default swiotlb pool is not used. Tested in a KVM guest against a QEMU RAM-backed SATA disk over virtio and *not* using software IO TLB, this patch increases IOPS by approx 2% for 4-way parallel I/O. The write memory barrier in swiotlb_dyn_alloc() is not needed, because a newly allocated pool must always be observed by swiotlb_find_slots() before an address from that pool is passed to is_swiotlb_buffer(). Correctness was verified using the following litmus test: C swiotlb-new-pool (* * Result: Never * * Check that a newly allocated pool is always visible when the * corresponding swiotlb buffer is visible. *) { mem_pools = default; } P0(int **mem_pools, int *pool) { /* add_mem_pool() */ WRITE_ONCE(*pool, 999); rcu_assign_pointer(*mem_pools, pool); } P1(int **mem_pools, int *flag, int *buf) { /* swiotlb_find_slots() */ int *r0; int r1; rcu_read_lock(); r0 = READ_ONCE(*mem_pools); r1 = READ_ONCE(*r0); rcu_read_unlock(); if (r1) { WRITE_ONCE(*flag, 1); smp_mb(); } /* device driver (presumed) */ WRITE_ONCE(*buf, r1); } P2(int **mem_pools, int *flag, int *buf) { /* device driver (presumed) */ int r0 = READ_ONCE(*buf); /* is_swiotlb_buffer() */ int r1; int *r2; int r3; smp_rmb(); r1 = READ_ONCE(*flag); if (r1) { /* swiotlb_find_pool() */ rcu_read_lock(); r2 = READ_ONCE(*mem_pools); r3 = READ_ONCE(*r2); rcu_read_unlock(); } } exists (2:r0<>0 /\ 2:r3=0) (* Not found. *) Fixes: 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it") Reported-by: Jonathan Corbet Closes: https://lore.kernel.org/linux-iommu/87a5uz3ob8.fsf@meer.lwn.net/ Signed-off-by: Petr Tesarik Reviewed-by: Catalin Marinas Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 23 ++++++++++++++++------- kernel/dma/swiotlb.c | 26 ++++++++++++++++++++------ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b4536626f8ff..ecde0312dd52 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,14 +172,23 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { - /* Pairs with smp_wmb() in swiotlb_find_slots() and - * swiotlb_dyn_alloc(), which modify the RCU lists. - */ - smp_rmb(); - return swiotlb_find_pool(dev, paddr); - } +#ifdef CONFIG_SWIOTLB_DYNAMIC + /* + * All SWIOTLB buffer addresses must have been returned by + * swiotlb_tbl_map_single() and passed to a device driver. + * If a SWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in swiotlb_find_slots(). + */ + smp_rmb(); + return READ_ONCE(dev->dma_uses_io_tlb) && + swiotlb_find_pool(dev, paddr); +#else return paddr >= mem->defpool.start && paddr < mem->defpool.end; +#endif } static inline bool is_swiotlb_force_bounce(struct device *dev) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 85dd94323b98..01637677736f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -728,9 +728,6 @@ static void swiotlb_dyn_alloc(struct work_struct *work) } add_mem_pool(mem, pool); - - /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ - smp_wmb(); } /** @@ -1151,9 +1148,26 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); found: - dev->dma_uses_io_tlb = true; - /* Pairs with smp_rmb() in is_swiotlb_buffer() */ - smp_wmb(); + WRITE_ONCE(dev->dma_uses_io_tlb, true); + + /* + * The general barrier orders reads and writes against a presumed store + * of the SWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in is_swiotlb_buffer(). + */ + smp_mb(); *retpool = pool; return index; -- cgit From 22061bfc57fe08c77141dc876b4af75603c4d61d Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 26 Sep 2023 16:55:50 +0300 Subject: wifi: iwlwifi: mvm: Fix incorrect usage of scan API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The support for using link ID in the scan request API was only added in version 16. However, the code wrongly enabled this API usage also for older versions. Fix it. Reported-by: Antoine Beaupré Fixes: e98b23d0d7b8 ("wifi: iwlwifi: mvm: Add support for SCAN API version 16") Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230926165546.086e635fbbe6.Ia660f35ca0b1079f2c2ea92fd8d14d8101a89d03@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index c1d9ce753468..3cbe2c0b8d6b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2342,7 +2342,7 @@ iwl_mvm_scan_umac_fill_general_p_v12(struct iwl_mvm *mvm, if (gen_flags & IWL_UMAC_SCAN_GEN_FLAGS_V2_FRAGMENTED_LMAC2) gp->num_of_fragments[SCAN_HB_LMAC_IDX] = IWL_SCAN_NUM_OF_FRAGS; - if (version < 12) { + if (version < 16) { gp->scan_start_mac_or_link_id = scan_vif->id; } else { struct iwl_mvm_vif_link_info *link_info; -- cgit From 84ee19bffc9306128cd0f1c650e89767079efeff Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 27 Sep 2023 10:15:00 +0300 Subject: mmc: core: Capture correct oemid-bits for eMMC cards The OEMID is an 8-bit binary number rather than 16-bit as the current code parses for. The OEMID occupies bits [111:104] in the CID register, see the eMMC spec JESD84-B51 paragraph 7.2.3. It seems that the 16-bit comes from the legacy MMC specs (v3.31 and before). Let's fix the parsing by simply move to use 8-bit instead of 16-bit. This means we ignore the impact on some of those old MMC cards that may be out there, but on the other hand this shouldn't be a problem as the OEMID seems not be an important feature for these cards. Signed-off-by: Avri Altman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230927071500.1791882-1-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 89cd48fcec79..4a4bab9aa726 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card) case 3: /* MMC v3.1 - v3.3 */ case 4: /* MMC v4 */ card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 8); card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); -- cgit From 5abb5c3cd4b38ec32c38a852c83ea04255cecf25 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 25 Sep 2023 16:38:44 +0100 Subject: riscv: errata: andes: Makefile: Fix randconfig build issue Compile the andes errata with cflags set to " -mcmodel=medany" when CONFIG_RISCV_ALTERNATIVE_EARLY is enabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309111311.8tcq3KVc-lkp@intel.com/ Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230925153844.26820-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/andes/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile index 2d644e19caef..6278c389b801 100644 --- a/arch/riscv/errata/andes/Makefile +++ b/arch/riscv/errata/andes/Makefile @@ -1 +1,5 @@ +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_errata.o := -mcmodel=medany +endif + obj-y += errata.o -- cgit From 1a6a464774947920dcedcf7409be62495c7cedd0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:06 +0200 Subject: timers: Tag (hr)timer softirq as hotplug safe Specific stress involving frequent CPU-hotplug operations, such as running rcutorture for example, may trigger the following message: NOHZ tick-stop error: local softirq work is pending, handler #02!!!" This happens in the CPU-down hotplug process, after CPUHP_AP_SMPBOOT_THREADS whose teardown callback parks ksoftirqd, and before the target CPU shuts down through CPUHP_AP_IDLE_DEAD. In this fragile intermediate state, softirqs waiting for threaded handling may be forever ignored and eventually reported by the idle task as in the above example. However some vectors are known to be safe as long as the corresponding subsystems have teardown callbacks handling the migration of their events. The above error message reports pending timers softirq although this vector can be considered as hotplug safe because the CPUHP_TIMERS_PREPARE teardown callback performs the necessary migration of timers after the death of the CPU. Hrtimers also have a similar hotplug handling. Therefore this error message, as far as (hr-)timers are concerned, can be considered spurious and the relevant softirq vectors can be marked as hotplug safe. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230912104406.312185-6-frederic@kernel.org --- include/linux/interrupt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..4a1dc88ddbff 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -569,8 +569,12 @@ enum * 2) rcu_report_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + * + * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ + BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. -- cgit From a154f5f643c6ecddd44847217a7a3845b4350003 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 18 Sep 2023 15:58:48 -0700 Subject: scsi: target: core: Fix deadlock due to recursive locking The following call trace shows a deadlock issue due to recursive locking of mutex "device_mutex". First lock acquire is in target_for_each_device() and second in target_free_device(). PID: 148266 TASK: ffff8be21ffb5d00 CPU: 10 COMMAND: "iscsi_ttx" #0 [ffffa2bfc9ec3b18] __schedule at ffffffffa8060e7f #1 [ffffa2bfc9ec3ba0] schedule at ffffffffa8061224 #2 [ffffa2bfc9ec3bb8] schedule_preempt_disabled at ffffffffa80615ee #3 [ffffa2bfc9ec3bc8] __mutex_lock at ffffffffa8062fd7 #4 [ffffa2bfc9ec3c40] __mutex_lock_slowpath at ffffffffa80631d3 #5 [ffffa2bfc9ec3c50] mutex_lock at ffffffffa806320c #6 [ffffa2bfc9ec3c68] target_free_device at ffffffffc0935998 [target_core_mod] #7 [ffffa2bfc9ec3c90] target_core_dev_release at ffffffffc092f975 [target_core_mod] #8 [ffffa2bfc9ec3ca0] config_item_put at ffffffffa79d250f #9 [ffffa2bfc9ec3cd0] config_item_put at ffffffffa79d2583 #10 [ffffa2bfc9ec3ce0] target_devices_idr_iter at ffffffffc0933f3a [target_core_mod] #11 [ffffa2bfc9ec3d00] idr_for_each at ffffffffa803f6fc #12 [ffffa2bfc9ec3d60] target_for_each_device at ffffffffc0935670 [target_core_mod] #13 [ffffa2bfc9ec3d98] transport_deregister_session at ffffffffc0946408 [target_core_mod] #14 [ffffa2bfc9ec3dc8] iscsit_close_session at ffffffffc09a44a6 [iscsi_target_mod] #15 [ffffa2bfc9ec3df0] iscsit_close_connection at ffffffffc09a4a88 [iscsi_target_mod] #16 [ffffa2bfc9ec3df8] finish_task_switch at ffffffffa76e5d07 #17 [ffffa2bfc9ec3e78] iscsit_take_action_for_connection_exit at ffffffffc0991c23 [iscsi_target_mod] #18 [ffffa2bfc9ec3ea0] iscsi_target_tx_thread at ffffffffc09a403b [iscsi_target_mod] #19 [ffffa2bfc9ec3f08] kthread at ffffffffa76d8080 #20 [ffffa2bfc9ec3f50] ret_from_fork at ffffffffa8200364 Fixes: 36d4cb460bcb ("scsi: target: Avoid that EXTENDED COPY commands trigger lock inversion") Signed-off-by: Junxiao Bi Link: https://lore.kernel.org/r/20230918225848.66463-1-junxiao.bi@oracle.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b7ac60f4a219..b6523d4b9259 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -843,7 +843,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) EXPORT_SYMBOL(target_to_linux_sector); struct devices_idr_iter { - struct config_item *prev_item; int (*fn)(struct se_device *dev, void *data); void *data; }; @@ -853,11 +852,9 @@ static int target_devices_idr_iter(int id, void *p, void *data) { struct devices_idr_iter *iter = data; struct se_device *dev = p; + struct config_item *item; int ret; - config_item_put(iter->prev_item); - iter->prev_item = NULL; - /* * We add the device early to the idr, so it can be used * by backend modules during configuration. We do not want @@ -867,12 +864,13 @@ static int target_devices_idr_iter(int id, void *p, void *data) if (!target_dev_configured(dev)) return 0; - iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item); - if (!iter->prev_item) + item = config_item_get_unless_zero(&dev->dev_group.cg_item); + if (!item) return 0; mutex_unlock(&device_mutex); ret = iter->fn(dev, iter->data); + config_item_put(item); mutex_lock(&device_mutex); return ret; @@ -895,7 +893,6 @@ int target_for_each_device(int (*fn)(struct se_device *dev, void *data), mutex_lock(&device_mutex); ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter); mutex_unlock(&device_mutex); - config_item_put(iter.prev_item); return ret; } -- cgit From 514f0c400bde6b62405467daaf2a0a86bcf7794b Mon Sep 17 00:00:00 2001 From: Karan Tilak Kumar Date: Tue, 19 Sep 2023 11:24:36 -0700 Subject: scsi: fnic: Fix sg_reset success path sg_reset performs a target or LUN reset. Since the command is issued by the user, it does not come into the driver with a tag or a queue id. Fix the fnic driver to create an io_req and use a SCSI command tag. Fix the ITMF path to special case the sg_reset response. Reviewed-by: Sesidhar Baddela Reviewed-by: Arulprabhu Ponnusamy Tested-by: Karan Tilak Kumar Signed-off-by: Karan Tilak Kumar Link: https://lore.kernel.org/r/20230919182436.6895-1-kartilak@cisco.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 4 ++- drivers/scsi/fnic/fnic_io.h | 2 ++ drivers/scsi/fnic/fnic_main.c | 2 ++ drivers/scsi/fnic/fnic_scsi.c | 64 +++++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 93c68931a593..22cef283b2b9 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -27,7 +27,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.6.0.56" +#define DRV_VERSION "1.6.0.57" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " @@ -237,6 +237,8 @@ struct fnic { unsigned int cq_count; struct mutex sgreset_mutex; + spinlock_t sgreset_lock; /* lock for sgreset */ + struct scsi_cmnd *sgreset_sc; struct dentry *fnic_stats_debugfs_host; struct dentry *fnic_stats_debugfs_file; struct dentry *fnic_reset_debugfs_file; diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h index f4c8769df312..5895ead20e14 100644 --- a/drivers/scsi/fnic/fnic_io.h +++ b/drivers/scsi/fnic/fnic_io.h @@ -52,6 +52,8 @@ struct fnic_io_req { unsigned long start_time; /* in jiffies */ struct completion *abts_done; /* completion for abts */ struct completion *dr_done; /* completion for device reset */ + unsigned int tag; + struct scsi_cmnd *sc; /* midlayer's cmd pointer */ }; enum fnic_port_speeds { diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 984bc5fc55e2..f27f9319e0b2 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -754,6 +754,8 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < FNIC_IO_LOCKS; i++) spin_lock_init(&fnic->io_req_lock[i]); + spin_lock_init(&fnic->sgreset_lock); + err = -ENOMEM; fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache); if (!fnic->io_req_pool) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 9761b2c9db48..416d81954819 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1047,9 +1047,9 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, { u8 type; u8 hdr_status; - struct fcpio_tag tag; + struct fcpio_tag ftag; u32 id; - struct scsi_cmnd *sc; + struct scsi_cmnd *sc = NULL; struct fnic_io_req *io_req; struct fnic_stats *fnic_stats = &fnic->fnic_stats; struct abort_stats *abts_stats = &fnic->fnic_stats.abts_stats; @@ -1058,27 +1058,43 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, unsigned long flags; spinlock_t *io_lock; unsigned long start_time; + unsigned int tag; - fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); - fcpio_tag_id_dec(&tag, &id); + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &ftag); + fcpio_tag_id_dec(&ftag, &id); - if ((id & FNIC_TAG_MASK) >= fnic->fnic_max_tag_id) { + tag = id & FNIC_TAG_MASK; + if (tag == fnic->fnic_max_tag_id) { + if (!(id & FNIC_TAG_DEV_RST)) { + shost_printk(KERN_ERR, fnic->lport->host, + "Tag out of range id 0x%x hdr status = %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); + return; + } + } else if (tag > fnic->fnic_max_tag_id) { shost_printk(KERN_ERR, fnic->lport->host, - "Tag out of range tag %x hdr status = %s\n", - id, fnic_fcpio_status_to_str(hdr_status)); + "Tag out of range tag 0x%x hdr status = %s\n", + tag, fnic_fcpio_status_to_str(hdr_status)); return; } - sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); + if ((tag == fnic->fnic_max_tag_id) && (id & FNIC_TAG_DEV_RST)) { + sc = fnic->sgreset_sc; + io_lock = &fnic->sgreset_lock; + } else { + sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); + io_lock = fnic_io_lock_hash(fnic, sc); + } + WARN_ON_ONCE(!sc); if (!sc) { atomic64_inc(&fnic_stats->io_stats.sc_null); shost_printk(KERN_ERR, fnic->lport->host, "itmf_cmpl sc is null - hdr status = %s tag = 0x%x\n", - fnic_fcpio_status_to_str(hdr_status), id); + fnic_fcpio_status_to_str(hdr_status), tag); return; } - io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); io_req = fnic_priv(sc)->io_req; WARN_ON_ONCE(!io_req); @@ -1089,7 +1105,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, shost_printk(KERN_ERR, fnic->lport->host, "itmf_cmpl io_req is null - " "hdr status = %s tag = 0x%x sc 0x%p\n", - fnic_fcpio_status_to_str(hdr_status), id, sc); + fnic_fcpio_status_to_str(hdr_status), tag, sc); return; } start_time = io_req->start_time; @@ -1938,6 +1954,10 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, struct scsi_lun fc_lun; int ret = 0; unsigned long intr_flags; + unsigned int tag = scsi_cmd_to_rq(sc)->tag; + + if (tag == SCSI_NO_TAG) + tag = io_req->tag; spin_lock_irqsave(host->host_lock, intr_flags); if (unlikely(fnic_chk_state_flags_locked(fnic, @@ -1964,7 +1984,8 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, /* fill in the lun info */ int_to_scsilun(sc->device->lun, &fc_lun); - fnic_queue_wq_copy_desc_itmf(wq, scsi_cmd_to_rq(sc)->tag | FNIC_TAG_DEV_RST, + tag |= FNIC_TAG_DEV_RST; + fnic_queue_wq_copy_desc_itmf(wq, tag, 0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG, fc_lun.scsi_lun, io_req->port_id, fnic->config.ra_tov, fnic->config.ed_tov); @@ -2146,8 +2167,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, .ret = SUCCESS, }; - if (new_sc) - iter_data.lr_sc = lr_sc; + iter_data.lr_sc = lr_sc; scsi_host_busy_iter(fnic->lport->host, fnic_pending_aborts_iter, &iter_data); @@ -2230,8 +2250,14 @@ int fnic_device_reset(struct scsi_cmnd *sc) mutex_lock(&fnic->sgreset_mutex); tag = fnic->fnic_max_tag_id; new_sc = 1; - } - io_lock = fnic_io_lock_hash(fnic, sc); + fnic->sgreset_sc = sc; + io_lock = &fnic->sgreset_lock; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "fcid: 0x%x lun: 0x%llx flags: 0x%x tag: 0x%x Issuing sgreset\n", + rport->port_id, sc->device->lun, fnic_priv(sc)->flags, tag); + } else + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); io_req = fnic_priv(sc)->io_req; @@ -2247,6 +2273,8 @@ int fnic_device_reset(struct scsi_cmnd *sc) } memset(io_req, 0, sizeof(*io_req)); io_req->port_id = rport->port_id; + io_req->tag = tag; + io_req->sc = sc; fnic_priv(sc)->io_req = io_req; } io_req->dr_done = &tm_done; @@ -2400,8 +2428,10 @@ fnic_device_reset_end: (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), fnic_flags_and_state(sc)); - if (new_sc) + if (new_sc) { + fnic->sgreset_sc = NULL; mutex_unlock(&fnic->sgreset_mutex); + } FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Returning from device reset %s\n", -- cgit From 1a8196a93e493c0a50b800cb09cef60b124eee15 Mon Sep 17 00:00:00 2001 From: Charles Kearney Date: Wed, 20 Sep 2023 21:53:39 +0000 Subject: spi: spi-gxp: BUG: Correct spi write return value Bug fix to correct return value of gxp_spi_write function to zero. Completion of succesful operation should return zero. Fixes: 730bc8ba5e9e spi: spi-gxp: Add support for HPE GXP SoCs Signed-off-by: Charles Kearney Link: https://lore.kernel.org/r/20230920215339.4125856-2-charles.kearney@hpe.com Signed-off-by: Mark Brown --- drivers/spi/spi-gxp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-gxp.c b/drivers/spi/spi-gxp.c index fd2fac236bbd..3aff5a166c94 100644 --- a/drivers/spi/spi-gxp.c +++ b/drivers/spi/spi-gxp.c @@ -194,7 +194,7 @@ static ssize_t gxp_spi_write(struct gxp_spi_chip *chip, const struct spi_mem_op return ret; } - return write_len; + return 0; } static int do_gxp_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) -- cgit From b481f644d9174670b385c3a699617052cd2a79d3 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 23 Sep 2023 18:37:23 +0800 Subject: scsi: zfcp: Fix a double put in zfcp_port_enqueue() When device_register() fails, zfcp_port_release() will be called after put_device(). As a result, zfcp_ccw_adapter_put() will be called twice: one in zfcp_port_release() and one in the error path after device_register(). So the reference on the adapter object is doubly put, which may lead to a premature free. Fix this by adjusting the error tag after device_register(). Fixes: f3450c7b9172 ("[SCSI] zfcp: Replace local reference counting with common kref") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20230923103723.10320-1-dinghao.liu@zju.edu.cn Acked-by: Benjamin Block Cc: stable@vger.kernel.org # v2.6.33+ Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_aux.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index df782646e856..ab2f35bc294d 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -518,12 +518,12 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (port) { put_device(&port->dev); retval = -EEXIST; - goto err_out; + goto err_put; } port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL); if (!port) - goto err_out; + goto err_put; rwlock_init(&port->unit_list_lock); INIT_LIST_HEAD(&port->unit_list); @@ -546,7 +546,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); - goto err_out; + goto err_put; } retval = -EINVAL; @@ -563,7 +563,8 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, return port; -err_out: +err_put: zfcp_ccw_adapter_put(adapter); +err_out: return ERR_PTR(retval); } -- cgit From 91e326563ee34509c35267808a4b1b3ea3db62a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Sep 2023 09:22:14 +0200 Subject: ima: rework CONFIG_IMA dependency block Changing the direct dependencies of IMA_BLACKLIST_KEYRING and IMA_LOAD_X509 caused them to no longer depend on IMA, but a a configuration without IMA results in link failures: arm-linux-gnueabi-ld: security/integrity/iint.o: in function `integrity_load_keys': iint.c:(.init.text+0xd8): undefined reference to `ima_load_x509' aarch64-linux-ld: security/integrity/digsig_asymmetric.o: in function `asymmetric_verify': digsig_asymmetric.c:(.text+0x104): undefined reference to `ima_blacklist_keyring' Adding explicit dependencies on IMA would fix this, but a more reliable way to do this is to enclose the entire Kconfig file in an 'if IMA' block. This also allows removing the existing direct dependencies. Fixes: be210c6d3597f ("ima: Finish deprecation of IMA_TRUSTED_KEYRING Kconfig") Signed-off-by: Arnd Bergmann Signed-off-by: Mimi Zohar --- security/integrity/ima/Kconfig | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 4e559bd1fd41..a6bd817efc1a 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -29,9 +29,11 @@ config IMA to learn more about IMA. If unsure, say N. +if IMA + config IMA_KEXEC bool "Enable carrying the IMA measurement list across a soft boot" - depends on IMA && TCG_TPM && HAVE_IMA_KEXEC + depends on TCG_TPM && HAVE_IMA_KEXEC default n help TPM PCRs are only reset on a hard reboot. In order to validate @@ -43,7 +45,6 @@ config IMA_KEXEC config IMA_MEASURE_PCR_IDX int - depends on IMA range 8 14 default 10 help @@ -53,7 +54,7 @@ config IMA_MEASURE_PCR_IDX config IMA_LSM_RULES bool - depends on IMA && AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR) + depends on AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR) default y help Disabling this option will disregard LSM based policy rules. @@ -61,7 +62,6 @@ config IMA_LSM_RULES choice prompt "Default template" default IMA_NG_TEMPLATE - depends on IMA help Select the default IMA measurement template. @@ -80,14 +80,12 @@ endchoice config IMA_DEFAULT_TEMPLATE string - depends on IMA default "ima-ng" if IMA_NG_TEMPLATE default "ima-sig" if IMA_SIG_TEMPLATE choice prompt "Default integrity hash algorithm" default IMA_DEFAULT_HASH_SHA1 - depends on IMA help Select the default hash algorithm used for the measurement list, integrity appraisal and audit log. The compiled default @@ -117,7 +115,6 @@ endchoice config IMA_DEFAULT_HASH string - depends on IMA default "sha1" if IMA_DEFAULT_HASH_SHA1 default "sha256" if IMA_DEFAULT_HASH_SHA256 default "sha512" if IMA_DEFAULT_HASH_SHA512 @@ -126,7 +123,6 @@ config IMA_DEFAULT_HASH config IMA_WRITE_POLICY bool "Enable multiple writes to the IMA policy" - depends on IMA default n help IMA policy can now be updated multiple times. The new rules get @@ -137,7 +133,6 @@ config IMA_WRITE_POLICY config IMA_READ_POLICY bool "Enable reading back the current IMA policy" - depends on IMA default y if IMA_WRITE_POLICY default n if !IMA_WRITE_POLICY help @@ -147,7 +142,6 @@ config IMA_READ_POLICY config IMA_APPRAISE bool "Appraise integrity measurements" - depends on IMA default n help This option enables local measurement integrity appraisal. @@ -304,7 +298,6 @@ config IMA_APPRAISE_SIGNED_INIT config IMA_MEASURE_ASYMMETRIC_KEYS bool - depends on IMA depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y default y @@ -323,7 +316,8 @@ config IMA_SECURE_AND_OR_TRUSTED_BOOT config IMA_DISABLE_HTABLE bool "Disable htable to allow measurement of duplicate records" - depends on IMA default n help This option disables htable to allow measurement of duplicate records. + +endif -- cgit From 5623ecfcbec165f040a23248d39680f0cc5c0854 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Sep 2023 16:05:26 -0400 Subject: SUNRPC: Fail quickly when server does not recognize TLS rpcauth_checkverf() should return a distinct error code when a server recognizes the AUTH_TLS probe but does not support TLS so that the client's header decoder can respond appropriately and quickly. No retries are necessary is in this case, since the server has already affirmatively answered "TLS is unsupported". Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 11 ++++++++--- net/sunrpc/auth_tls.c | 4 ++-- net/sunrpc/clnt.c | 10 +++++++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2f16f9d17966..814b0169f972 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -769,9 +769,14 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) * @task: controlling RPC task * @xdr: xdr_stream containing RPC Reply header * - * On success, @xdr is updated to point past the verifier and - * zero is returned. Otherwise, @xdr is in an undefined state - * and a negative errno is returned. + * Return values: + * %0: Verifier is valid. @xdr now points past the verifier. + * %-EIO: Verifier is corrupted or message ended early. + * %-EACCES: Verifier is intact but not valid. + * %-EPROTONOSUPPORT: Server does not support the requested auth type. + * + * When a negative errno is returned, @xdr is left in an undefined + * state. */ int rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr) diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c index de7678f8a23d..87f570fd3b00 100644 --- a/net/sunrpc/auth_tls.c +++ b/net/sunrpc/auth_tls.c @@ -129,9 +129,9 @@ static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr) if (*p != rpc_auth_null) return -EIO; if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len) - return -EIO; + return -EPROTONOSUPPORT; if (memcmp(str, starttls_token, starttls_len)) - return -EIO; + return -EPROTONOSUPPORT; return 0; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 37b0b212b934..ea12ed3c70ba 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2725,7 +2725,15 @@ out_unparsable: out_verifier: trace_rpc_bad_verifier(task); - goto out_garbage; + switch (error) { + case -EPROTONOSUPPORT: + goto out_err; + case -EACCES: + /* Re-encode with a fresh cred */ + fallthrough; + default: + goto out_garbage; + } out_msg_denied: error = -EACCES; -- cgit From ed1cc05aa1f7fe8197d300e914afc28ab9818f89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Sep 2023 19:05:50 -0400 Subject: NFSv4: Fix a nfs4_state_manager() race If the NFS4CLNT_RUN_MANAGER flag got set just before we cleared NFS4CLNT_MANAGER_RUNNING, then we might have won the race against nfs4_schedule_state_manager(), and are responsible for handling the recovery situation. Fixes: aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e079987af4a3..0bc160fbabec 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2703,6 +2703,13 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); + if (test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, + &clp->cl_state)) { + memflags = memalloc_nofs_save(); + continue; + } + if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) { if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { nfs_client_return_marked_delegations(clp); -- cgit From 956fd46f97d238032cb5fa4771cdaccc6e760f9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Sep 2023 13:14:15 -0400 Subject: NFSv4: Fix a state manager thread deadlock regression Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it prevents the setup of new threads to handle reboot recovery, while the older recovery thread is stuck returning delegations. Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3508d8238826..7016eaadf555 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10622,7 +10622,9 @@ static void nfs4_disable_swap(struct inode *inode) */ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0bc160fbabec..9a5d911a7edc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + struct rpc_clnt *clnt = clp->cl_rpcclient; + bool swapon = false; - if (clp->cl_rpcclient->cl_shutdown) + if (clnt->cl_shutdown) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { - wake_up_var(&clp->cl_state); - return; + + if (atomic_read(&clnt->cl_swapper)) { + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, + &clp->cl_state); + if (!swapon) { + wake_up_var(&clp->cl_state); + return; + } } - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); refcount_inc(&clp->cl_count); @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) __func__, PTR_ERR(task)); if (!nfs_client_init_is_complete(clp)) nfs_mark_client_ready(clp, PTR_ERR(task)); + if (swapon) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs4_clear_state_manager_bit(clp); - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); again: - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); nfs4_state_manager(clp); - if (atomic_read(&cl->cl_swapper)) { + + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && + !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { wait_var_event_interruptible(&clp->cl_state, test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)); - if (atomic_read(&cl->cl_swapper) && - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) + if (!atomic_read(&cl->cl_swapper)) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; /* Either no longer a swapper, or were signalled */ + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); } - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); if (refcount_read(&clp->cl_count) > 1 && !signalled() && test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; nfs_put_client(clp); -- cgit From a275ab62606bcd894ddff09460f7d253828313dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Sep 2023 19:26:46 -0400 Subject: Revert "SUNRPC dont update timeout value on connection reset" This reverts commit 88428cc4ae7abcc879295fbb19373dd76aad2bdd. The problem this commit is intended to fix was comprehensively fixed in commit 7de62bc09fe6 ("SUNRPC dont update timeout value on connection reset"). Since then, this commit has been preventing the correct timeout of soft mounted requests. Cc: stable@vger.kernel.org # 5.9.x: 09252177d5f9: SUNRPC: Handle major timeout in xprt_adjust_timeout() Cc: stable@vger.kernel.org # 5.9.x: 7de62bc09fe6: SUNRPC dont update timeout value on connection reset Cc: stable@vger.kernel.org # 5.9.x Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ea12ed3c70ba..9c210273d06b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2476,8 +2476,7 @@ call_status(struct rpc_task *task) goto out_exit; } task->tk_action = call_encode; - if (status != -ECONNRESET && status != -ECONNABORTED) - rpc_check_timeout(task); + rpc_check_timeout(task); return; out_exit: rpc_call_rpcerror(task, status); -- cgit From 26e8bfa30dac2ccd29dd25f391dfc73475c33329 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 26 Sep 2023 17:03:22 -0400 Subject: SUNRPC/TLS: Lock the lower_xprt during the tls handshake Otherwise we run the risk of having the lower_xprt freed from underneath us, causing an oops that looks like this: [ 224.150698] BUG: kernel NULL pointer dereference, address: 0000000000000018 [ 224.150951] #PF: supervisor read access in kernel mode [ 224.151117] #PF: error_code(0x0000) - not-present page [ 224.151278] PGD 0 P4D 0 [ 224.151361] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 224.151499] CPU: 2 PID: 99 Comm: kworker/u10:6 Not tainted 6.6.0-rc3-g6465e260f487 #41264 a00b0960990fb7bc6d6a330ee03588b67f08a47b [ 224.151977] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022 [ 224.152216] Workqueue: xprtiod xs_tcp_tls_setup_socket [sunrpc] [ 224.152434] RIP: 0010:xs_tcp_tls_setup_socket+0x3cc/0x7e0 [sunrpc] [ 224.152643] Code: 00 00 48 8b 7c 24 08 e9 f3 01 00 00 48 83 7b c0 00 0f 85 d2 01 00 00 49 8d 84 24 f8 05 00 00 48 89 44 24 10 48 8b 00 48 89 c5 <4c> 8b 68 18 66 41 83 3f 0a 75 71 45 31 ff 4c 89 ef 31 f6 e8 5c 76 [ 224.153246] RSP: 0018:ffffb00ec060fd18 EFLAGS: 00010246 [ 224.153427] RAX: 0000000000000000 RBX: ffff8c06c2e53e40 RCX: 0000000000000001 [ 224.153652] RDX: ffff8c073bca2408 RSI: 0000000000000282 RDI: ffff8c06c259ee00 [ 224.153868] RBP: 0000000000000000 R08: ffffffff9da55aa0 R09: 0000000000000001 [ 224.154084] R10: 00000034306c30f1 R11: 0000000000000002 R12: ffff8c06c2e51800 [ 224.154300] R13: ffff8c06c355d400 R14: 0000000004208160 R15: ffff8c06c2e53820 [ 224.154521] FS: 0000000000000000(0000) GS:ffff8c073bd00000(0000) knlGS:0000000000000000 [ 224.154763] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 224.154940] CR2: 0000000000000018 CR3: 0000000062c1e000 CR4: 0000000000750ee0 [ 224.155157] PKRU: 55555554 [ 224.155244] Call Trace: [ 224.155325] [ 224.155395] ? __die_body+0x68/0xb0 [ 224.155507] ? page_fault_oops+0x34c/0x3a0 [ 224.155635] ? _raw_spin_unlock_irqrestore+0xe/0x40 [ 224.155793] ? exc_page_fault+0x7a/0x1b0 [ 224.155916] ? asm_exc_page_fault+0x26/0x30 [ 224.156047] ? xs_tcp_tls_setup_socket+0x3cc/0x7e0 [sunrpc ae3a15912ae37fd51dafbdbc2dbd069117f8f5c8] [ 224.156367] ? xs_tcp_tls_setup_socket+0x2fe/0x7e0 [sunrpc ae3a15912ae37fd51dafbdbc2dbd069117f8f5c8] [ 224.156697] ? __pfx_xs_tls_handshake_done+0x10/0x10 [sunrpc ae3a15912ae37fd51dafbdbc2dbd069117f8f5c8] [ 224.157013] process_scheduled_works+0x24e/0x450 [ 224.157158] worker_thread+0x21c/0x2d0 [ 224.157275] ? __pfx_worker_thread+0x10/0x10 [ 224.157409] kthread+0xe8/0x110 [ 224.157510] ? __pfx_kthread+0x10/0x10 [ 224.157628] ret_from_fork+0x37/0x50 [ 224.157741] ? __pfx_kthread+0x10/0x10 [ 224.157859] ret_from_fork_asm+0x1b/0x30 [ 224.157983] Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 71cd916e384f..a15bf2ede89b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2672,6 +2672,10 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work) rcu_read_lock(); lower_xprt = rcu_dereference(lower_clnt->cl_xprt); rcu_read_unlock(); + + if (wait_on_bit_lock(&lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + goto out_unlock; + status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec); if (status) { trace_rpc_tls_not_started(upper_clnt, upper_xprt); @@ -2681,6 +2685,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work) status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); if (status) goto out_close; + xprt_release_write(lower_xprt, NULL); trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0); if (!xprt_test_and_set_connected(upper_xprt)) { @@ -2702,6 +2707,7 @@ out_unlock: return; out_close: + xprt_release_write(lower_xprt, NULL); rpc_shutdown_client(lower_clnt); /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. -- cgit From 92e73d807b68b2214fcafca4e130b5300a9d4b3c Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Sat, 23 Sep 2023 18:02:14 -0700 Subject: i2c: npcm7xx: Fix callback completion ordering Sometimes, our completions race with new master transfers and override the bus->operation and bus->master_or_slave variables. This causes transactions to timeout and kernel crashes less frequently. To remedy this, we re-order all completions to the very end of the function. Fixes: 56a1485b102e ("i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver") Signed-off-by: William A. Kennington III Reviewed-by: Tali Perry Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 495a8b5f6a2b..ae4bae63ad4f 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -694,6 +694,7 @@ static void npcm_i2c_callback(struct npcm_i2c *bus, { struct i2c_msg *msgs; int msgs_num; + bool do_complete = false; msgs = bus->msgs; msgs_num = bus->msgs_num; @@ -722,23 +723,17 @@ static void npcm_i2c_callback(struct npcm_i2c *bus, msgs[1].flags & I2C_M_RD) msgs[1].len = info; } - if (completion_done(&bus->cmd_complete) == false) - complete(&bus->cmd_complete); - break; - + do_complete = true; + break; case I2C_NACK_IND: /* MASTER transmit got a NACK before tx all bytes */ bus->cmd_err = -ENXIO; - if (bus->master_or_slave == I2C_MASTER) - complete(&bus->cmd_complete); - + do_complete = true; break; case I2C_BUS_ERR_IND: /* Bus error */ bus->cmd_err = -EAGAIN; - if (bus->master_or_slave == I2C_MASTER) - complete(&bus->cmd_complete); - + do_complete = true; break; case I2C_WAKE_UP_IND: /* I2C wake up */ @@ -752,6 +747,8 @@ static void npcm_i2c_callback(struct npcm_i2c *bus, if (bus->slave) bus->master_or_slave = I2C_SLAVE; #endif + if (do_complete) + complete(&bus->cmd_complete); } static u8 npcm_i2c_fifo_usage(struct npcm_i2c *bus) -- cgit From 5e8a380b2dd3643392ba32711176fe710ad86e8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Sep 2023 21:38:10 +0200 Subject: ARM: locomo: fix locomolcd_power declaration The locomolcd driver has one remaining missing-prototype warning: drivers/video/backlight/locomolcd.c:83:6: error: no previous prototype for 'locomolcd_power' [-Werror=missing-prototypes] There is in fact an unused prototype with a similar name in a global header, so move the actual one there and remove the old one. Link: https://lore.kernel.org/r/20230927194844.680771-1-arnd@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/hardware/locomo.h | 2 +- arch/arm/mach-sa1100/include/mach/collie.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/include/asm/hardware/locomo.h b/arch/arm/include/asm/hardware/locomo.h index 246a3de25931..aaaedafef7cc 100644 --- a/arch/arm/include/asm/hardware/locomo.h +++ b/arch/arm/include/asm/hardware/locomo.h @@ -195,7 +195,7 @@ struct locomo_driver { #define LOCOMO_DRIVER_NAME(_ldev) ((_ldev)->dev.driver->name) -void locomo_lcd_power(struct locomo_dev *, int, unsigned int); +extern void locomolcd_power(int on); int locomo_driver_register(struct locomo_driver *); void locomo_driver_unregister(struct locomo_driver *); diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index b7bc23ffd3c6..c95273c9567b 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -16,8 +16,6 @@ #include "hardware.h" /* Gives GPIO_MAX */ -extern void locomolcd_power(int on); - #define COLLIE_SCOOP_GPIO_BASE (GPIO_MAX + 1) #define COLLIE_GPIO_CHARGE_ON (COLLIE_SCOOP_GPIO_BASE + 0) #define COLLIE_SCP_DIAG_BOOT1 SCOOP_GPCR_PA12 -- cgit From d75e870c32f6190f38ae8983932f7dbed1018039 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 1 Sep 2023 13:57:32 +0200 Subject: arm64: defconfig: enable syscon-poweroff driver Enable the generic syscon-poweroff driver used on all Exynos ARM64 SoCs (e.g. Exynos5433) and few APM SoCs. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20230901115732.45854-1-krzysztof.kozlowski@linaro.org Signed-off-by: Arnd Bergmann --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 24531891c7be..a789119e6483 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -636,6 +636,7 @@ CONFIG_POWER_RESET_MSM=y CONFIG_POWER_RESET_QCOM_PON=m CONFIG_POWER_RESET_XGENE=y CONFIG_POWER_RESET_SYSCON=y +CONFIG_POWER_RESET_SYSCON_POWEROFF=y CONFIG_SYSCON_REBOOT_MODE=y CONFIG_NVMEM_REBOOT_MODE=m CONFIG_BATTERY_SBS=m -- cgit From a5ef7d68cea1344cf524f04981c2b3f80bedbb0d Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 28 Sep 2023 14:59:16 +0800 Subject: x86/srso: Add SRSO mitigation for Hygon processors Add mitigation for the speculative return stack overflow vulnerability which exists on Hygon processors too. Signed-off-by: Pu Wen Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/tencent_4A14812842F104E93AA722EC939483CEFF05@qq.com --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 382d4e6b848d..4e5ffc8b0e46 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1303,7 +1303,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), - VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), VULNBL_AMD(0x19, SRSO), {} }; -- cgit From 3c67c5236fbf7a58c1a26d57da4465ea5fb25537 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 21 Sep 2023 10:46:52 -0400 Subject: dmaengine: fsl-dma: fix DMA error when enabling sg if 'DONE' bit is set In eDMAv3, clearing 'DONE' bit (bit 30) of CHn_CSR is required when enabling scatter-gather (SG). eDMAv4 does not require this change. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230921144652.3259813-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 15 ++++++++++++++- drivers/dma/fsl-edma-common.h | 14 +++++++++++++- drivers/dma/fsl-edma-main.c | 2 +- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index a0f5741abcc4..50203b82f9f5 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -448,12 +448,25 @@ static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan, edma_write_tcdreg(fsl_chan, tcd->dlast_sga, dlast_sga); + csr = le16_to_cpu(tcd->csr); + if (fsl_chan->is_sw) { - csr = le16_to_cpu(tcd->csr); csr |= EDMA_TCD_CSR_START; tcd->csr = cpu_to_le16(csr); } + /* + * Must clear CHn_CSR[DONE] bit before enable TCDn_CSR[ESG] at EDMAv3 + * eDMAv4 have not such requirement. + * Change MLINK need clear CHn_CSR[DONE] for both eDMAv3 and eDMAv4. + */ + if (((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_SG) && + (csr & EDMA_TCD_CSR_E_SG)) || + ((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_LINK) && + (csr & EDMA_TCD_CSR_E_LINK))) + edma_writel_chreg(fsl_chan, edma_readl_chreg(fsl_chan, ch_csr), ch_csr); + + edma_write_tcdreg(fsl_chan, tcd->csr, csr); } diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index 3cc0cc8fc2d0..40d50cc3d75a 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -183,11 +183,23 @@ struct fsl_edma_desc { #define FSL_EDMA_DRV_BUS_8BYTE BIT(10) #define FSL_EDMA_DRV_DEV_TO_DEV BIT(11) #define FSL_EDMA_DRV_ALIGN_64BYTE BIT(12) +/* Need clean CHn_CSR DONE before enable TCD's ESG */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_SG BIT(13) +/* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14) #define FSL_EDMA_DRV_EDMA3 (FSL_EDMA_DRV_SPLIT_REG | \ FSL_EDMA_DRV_BUS_8BYTE | \ FSL_EDMA_DRV_DEV_TO_DEV | \ - FSL_EDMA_DRV_ALIGN_64BYTE) + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_SG | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) + +#define FSL_EDMA_DRV_EDMA4 (FSL_EDMA_DRV_SPLIT_REG | \ + FSL_EDMA_DRV_BUS_8BYTE | \ + FSL_EDMA_DRV_DEV_TO_DEV | \ + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) struct fsl_edma_drvdata { u32 dmamuxs; /* only used before v3 */ diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 63d48d046f04..621a460fae0c 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -355,7 +355,7 @@ static struct fsl_edma_drvdata imx93_data3 = { }; static struct fsl_edma_drvdata imx93_data4 = { - .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA3, + .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4, .chreg_space_sz = 0x8000, .chreg_off = 0x10000, .setup_irq = fsl_edma3_irq_init, -- cgit From 54a5aff6f98b69e73cba40470f103a72bd436b20 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Wed, 20 Sep 2023 18:55:26 +0530 Subject: dt-bindings: dmaengine: zynqmp_dma: add xlnx,bus-width required property xlnx,bus-width is a required property. In yaml conversion somehow it got missed out. Bring it back and mention it in required list. Also add Harini and myself to the maintainer list. Fixes: 5a04982df8da ("dt-bindings: dmaengine: zynqmp_dma: convert to yaml") Signed-off-by: Radhey Shyam Pandey Acked-by: Krzysztof Kozlowski Reviewed-by: Michael Tretter Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/1695216326-3841352-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml index 23ada8f87526..769ce23aaac2 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml @@ -13,6 +13,8 @@ description: | maintainers: - Michael Tretter + - Harini Katakam + - Radhey Shyam Pandey allOf: - $ref: ../dma-controller.yaml# @@ -65,6 +67,7 @@ required: - interrupts - clocks - clock-names + - xlnx,bus-width additionalProperties: false -- cgit From 3f4b82167a3b1f4ddb33d890f758a042ef4ceef1 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 23 Aug 2023 14:26:35 -0400 Subject: dmaengine: fsl-edma: fix edma4 channel enable failure on second attempt When attempting to start DMA for the second time using fsl_edma3_enable_request(), channel never start. CHn_MUX must have a unique value when selecting a peripheral slot in the channel mux configuration. The only value that may overlap is source 0. If there is an attempt to write a mux configuration value that is already consumed by another channel, a mux configuration of 0 (SRC = 0) will be written. Check CHn_MUX before writing in fsl_edma3_enable_request(). Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230823182635.2618118-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index 50203b82f9f5..6a3abe5b1790 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -92,8 +92,14 @@ static void fsl_edma3_enable_request(struct fsl_edma_chan *fsl_chan) edma_writel_chreg(fsl_chan, val, ch_sbr); - if (flags & FSL_EDMA_DRV_HAS_CHMUX) - edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux); + if (flags & FSL_EDMA_DRV_HAS_CHMUX) { + /* + * ch_mux: With the exception of 0, attempts to write a value + * already in use will be forced to 0. + */ + if (!edma_readl_chreg(fsl_chan, ch_mux)) + edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux); + } val = edma_readl_chreg(fsl_chan, ch_csr); val |= EDMA_V3_CH_CSR_ERQ; -- cgit From c070e51db5e2a98d3aef7c324b15209ba47f3dca Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 20 Sep 2023 13:54:38 +0200 Subject: ice: always add legacy 32byte RXDID in supported_rxdids When the PF and VF drivers both support flexible rx descriptors and have negotiated the VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC capability, the VF driver queries the PF for the list of supported descriptor formats (VIRTCHNL_OP_GET_SUPPORTED_RXDIDS). The PF driver is supposed to set the supported_rxdids bits that correspond to the descriptor formats the firmware implements. The legacy 32-byte rx desc format is always supported, even though it is not expressed in GLFLXP_RXDID_FLAGS. The ice driver does not advertise the legacy 32-byte rx desc support, which leads to this failure to bring up the VF using the Intel out-of-tree iavf driver: iavf 0000:41:01.0: PF does not list support for default Rx descriptor format ... iavf 0000:41:01.0: PF returned error -5 (VIRTCHNL_STATUS_ERR_PARAM) to our request 6 The in-tree iavf driver does not expose this bug, because it does not yet implement VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC. The ice driver must always set the ICE_RXDID_LEGACY_1 bit in supported_rxdids. The Intel out-of-tree ice driver and the ice driver in DPDK both do this. I copied this piece of the code and the comment text from the Intel out-of-tree driver. Fixes: e753df8fbca5 ("ice: Add support Flex RXD") Signed-off-by: Michal Schmidt Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20230920115439.61172-1-mschmidt@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index b03426ac932b..db97353efd06 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -2617,12 +2617,14 @@ static int ice_vc_query_rxdid(struct ice_vf *vf) goto err; } - /* Read flexiflag registers to determine whether the - * corresponding RXDID is configured and supported or not. - * Since Legacy 16byte descriptor format is not supported, - * start from Legacy 32byte descriptor. + /* RXDIDs supported by DDP package can be read from the register + * to get the supported RXDID bitmap. But the legacy 32byte RXDID + * is not listed in DDP package, add it in the bitmap manually. + * Legacy 16byte descriptor is not supported. */ - for (i = ICE_RXDID_LEGACY_1; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { + rxdid->supported_rxdids |= BIT(ICE_RXDID_LEGACY_1); + + for (i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) -- cgit From 87d1888aa40f25773fa0b948bcb2545f97e2cb15 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:52:19 +0400 Subject: fs/ntfs3: Add ckeck in ni_update_parent() Check simple case when parent inode equals current inode. Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 2b85cb10f0be..d49fbb22bd5e 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3208,6 +3208,12 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, if (!fname || !memcmp(&fname->dup, dup, sizeof(fname->dup))) continue; + /* Check simple case when parent inode equals current inode. */ + if (ino_get(&fname->home) == ni->vfs_inode.i_ino) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + continue; + } + /* ntfs_iget5 may sleep. */ dir = ntfs_iget5(sb, &fname->home, NULL); if (IS_ERR(dir)) { -- cgit From 06ccfb00645990a9fcc14249e6d1c25921ecb836 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:57:19 +0400 Subject: fs/ntfs3: Write immediately updated ntfs state Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 33afee0f5559..edb51dc12f65 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -983,18 +983,11 @@ out: if (err) return err; - mark_inode_dirty(&ni->vfs_inode); + mark_inode_dirty_sync(&ni->vfs_inode); /* verify(!ntfs_update_mftmirr()); */ - /* - * If we used wait=1, sync_inode_metadata waits for the io for the - * inode to finish. It hangs when media is removed. - * So wait=0 is sent down to sync_inode_metadata - * and filemap_fdatawrite is used for the data blocks. - */ - err = sync_inode_metadata(&ni->vfs_inode, 0); - if (!err) - err = filemap_fdatawrite(ni->vfs_inode.i_mapping); + /* write mft record on disk. */ + err = _ni_write_inode(&ni->vfs_inode, 1); return err; } -- cgit From fc471e39e38fea6677017cbdd6d928088a59fc67 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:12:58 +0400 Subject: fs/ntfs3: Use kvmalloc instead of kmalloc(... __GFP_NOWARN) Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 15 +++++++++++++-- fs/ntfs3/bitmap.c | 3 ++- fs/ntfs3/super.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 42631b31adf1..7c01735d1219 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,7 +52,8 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is resident: lsize < record_size (1K or 4K) */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; @@ -80,7 +81,17 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (err < 0) goto out; - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is nonresident. + * The worst case: + * 1T (2^40) extremely fragmented file. + * cluster = 4K (2^12) => 2^28 fragments + * 2^9 fragments per one record => 2^19 records + * 2^5 bytes of ATTR_LIST_ENTRY per one record => 2^24 bytes. + * + * the result is 16M bytes per attribute list. + * Use kvmalloc to allocate in range [several Kbytes - dozen Mbytes] + */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 107e808e06ea..d66055e30aff 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -659,7 +659,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) wnd->bits_last = wbits; wnd->free_bits = - kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN); + kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); + if (!wnd->free_bits) return -ENOMEM; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cfec5e0c7f66..ea4e218c3f75 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1367,7 +1367,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } bytes = inode->i_size; - sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN); + sbi->def_table = t = kvmalloc(bytes, GFP_KERNEL); if (!t) { err = -ENOMEM; goto put_inode_out; -- cgit From 013ff63b649475f0ee134e2c8d0c8e65284ede50 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:17:02 +0400 Subject: fs/ntfs3: Add more attributes checks in mi_enum_attr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/record.c | 68 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index c12ebffc94da..02cc91ed8835 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -193,8 +193,9 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); - u32 t32, off, asize; + u32 t32, off, asize, prev_type; u16 t16; + u64 data_size, alloc_size, tot_size; if (!attr) { u32 total = le32_to_cpu(rec->total); @@ -213,6 +214,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (!is_rec_inuse(rec)) return NULL; + prev_type = 0; attr = Add2Ptr(rec, off); } else { /* Check if input attr inside record. */ @@ -226,11 +228,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* Overflow check. */ + /* Overflow check. */ + if (off + asize < off) return NULL; - } + prev_type = le32_to_cpu(attr->type); attr = Add2Ptr(attr, asize); off += asize; } @@ -250,7 +252,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); - if ((t32 & 0xf) || (t32 > 0x100)) + if (!t32 || (t32 & 0xf) || (t32 > 0x100)) + return NULL; + + /* attributes in record must be ordered by type */ + if (t32 < prev_type) return NULL; /* Check overflow and boundary. */ @@ -259,16 +265,15 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* Check size of attribute. */ if (!attr->non_res) { + /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) return NULL; t16 = le16_to_cpu(attr->res.data_off); - if (t16 > asize) return NULL; - t32 = le32_to_cpu(attr->res.data_size); - if (t16 + t32 > asize) + if (t16 + le32_to_cpu(attr->res.data_size) > asize) return NULL; t32 = sizeof(short) * attr->name_len; @@ -278,21 +283,52 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return attr; } - /* Check some nonresident fields. */ - if (attr->name_len && - le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > - le16_to_cpu(attr->nres.run_off)) { + /* Check nonresident fields. */ + if (attr->non_res != 1) + return NULL; + + t16 = le16_to_cpu(attr->nres.run_off); + if (t16 > asize) + return NULL; + + t32 = sizeof(short) * attr->name_len; + if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) + return NULL; + + /* Check start/end vcn. */ + if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) + return NULL; + + data_size = le64_to_cpu(attr->nres.data_size); + if (le64_to_cpu(attr->nres.valid_size) > data_size) return NULL; - } - if (attr->nres.svcn || !is_attr_ext(attr)) { + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (data_size > alloc_size) + return NULL; + + t32 = mi->sbi->cluster_mask; + if (alloc_size & t32) + return NULL; + + if (!attr->nres.svcn && is_attr_ext(attr)) { + /* First segment of sparse/compressed attribute */ + if (asize + 8 < SIZEOF_NONRESIDENT_EX) + return NULL; + + tot_size = le64_to_cpu(attr->nres.total_size); + if (tot_size & t32) + return NULL; + + if (tot_size > alloc_size) + return NULL; + } else { if (asize + 8 < SIZEOF_NONRESIDENT) return NULL; if (attr->nres.c_unit) return NULL; - } else if (asize + 8 < SIZEOF_NONRESIDENT_EX) - return NULL; + } return attr; } -- cgit From bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:22:53 +0400 Subject: fs/ntfs3: fix deadlock in mark_as_free_ex Reported-by: syzbot+e94d98936a0ed08bde43@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index edb51dc12f65..fbfe21dbb425 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2454,10 +2454,12 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; + bool dirty = false; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (!wnd_is_used(wnd, lcn, len)) { - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + /* mark volume as dirty out of wnd->rw_lock */ + dirty = true; end = lcn + len; len = 0; @@ -2511,6 +2513,8 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) out: up_write(&wnd->rw_lock); + if (dirty) + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* -- cgit From 91a4b1ee78cb100b19b70f077c247f211110348f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:25:25 +0400 Subject: fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 629403ede6e5..788567d71d93 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -42,9 +42,11 @@ enum utf16_endian; #define MINUS_ONE_T ((size_t)(-1)) /* Biggest MFT / smallest cluster */ #define MAXIMUM_BYTES_PER_MFT 4096 +#define MAXIMUM_SHIFT_BYTES_PER_MFT 12 #define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) #define MAXIMUM_BYTES_PER_INDEX 4096 +#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12 #define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) /* NTFS specific error code when fixup failed. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index ea4e218c3f75..f78c67452b2a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -899,9 +899,17 @@ check_boot: goto out; } - sbi->record_size = record_size = - boot->record_size < 0 ? 1 << (-boot->record_size) : - (u32)boot->record_size << cluster_bits; + if (boot->record_size >= 0) { + record_size = (u32)boot->record_size << cluster_bits; + } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { + record_size = 1u << (-boot->record_size); + } else { + ntfs_err(sb, "%s: invalid record size %d.", hint, + boot->record_size); + goto out; + } + + sbi->record_size = record_size; sbi->record_bits = blksize_bits(record_size); sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes @@ -918,9 +926,15 @@ check_boot: goto out; } - sbi->index_size = boot->index_size < 0 ? - 1u << (-boot->index_size) : - (u32)boot->index_size << cluster_bits; + if (boot->index_size >= 0) { + sbi->index_size = (u32)boot->index_size << cluster_bits; + } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { + sbi->index_size = 1u << (-boot->index_size); + } else { + ntfs_err(sb, "%s: invalid index size %d.", hint, + boot->index_size); + goto out; + } /* Check index record size. */ if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { -- cgit From 3f2f09f18972dc4548feebd9946c10c04351333d Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:01:04 +0300 Subject: fs/ntfs3: Use inode_set_ctime_to_ts instead of inode_set_ctime Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb2ed0701495..2f76dc055c1f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -170,8 +170,8 @@ next_attr: nt2kernel(std5->cr_time, &ni->i_crtime); #endif nt2kernel(std5->a_time, &inode->i_atime); - ctime = inode_get_ctime(inode); nt2kernel(std5->c_time, &ctime); + inode_set_ctime_to_ts(inode, ctime); nt2kernel(std5->m_time, &inode->i_mtime); ni->std_fa = std5->fa; -- cgit From 4ad5c924df6cd6d85708fa23f9d9a2b78a2e428e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:07:59 +0300 Subject: fs/ntfs3: Allow repeated call to ntfs3_put_sbi Signed-off-by: Konstantin Komarov --- fs/ntfs3/bitmap.c | 1 + fs/ntfs3/super.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index d66055e30aff..63f14a0232f6 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -125,6 +125,7 @@ void wnd_close(struct wnd_bitmap *wnd) struct rb_node *node, *next; kfree(wnd->free_bits); + wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f78c67452b2a..71c80c578feb 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -576,20 +576,30 @@ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) wnd_close(&sbi->mft.bitmap); wnd_close(&sbi->used.bitmap); - if (sbi->mft.ni) + if (sbi->mft.ni) { iput(&sbi->mft.ni->vfs_inode); + sbi->mft.ni = NULL; + } - if (sbi->security.ni) + if (sbi->security.ni) { iput(&sbi->security.ni->vfs_inode); + sbi->security.ni = NULL; + } - if (sbi->reparse.ni) + if (sbi->reparse.ni) { iput(&sbi->reparse.ni->vfs_inode); + sbi->reparse.ni = NULL; + } - if (sbi->objid.ni) + if (sbi->objid.ni) { iput(&sbi->objid.ni->vfs_inode); + sbi->objid.ni = NULL; + } - if (sbi->volume.ni) + if (sbi->volume.ni) { iput(&sbi->volume.ni->vfs_inode); + sbi->volume.ni = NULL; + } ntfs_update_mftmirr(sbi, 0); @@ -1577,6 +1587,7 @@ put_inode_out: iput(inode); out: kfree(boot2); + ntfs3_put_sbi(sbi); return err; } -- cgit From dcc852e509a4cba0ac6ac734077cef260e4e0fe6 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:12:11 +0300 Subject: fs/ntfs3: Fix alternative boot searching Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 71c80c578feb..d2951b23f52a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -846,7 +846,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, struct ntfs_sb_info *sbi = sb->s_fs_info; int err; u32 mb, gb, boot_sector_size, sct_per_clst, record_size; - u64 sectors, clusters, mlcn, mlcn2; + u64 sectors, clusters, mlcn, mlcn2, dev_size0; struct NTFS_BOOT *boot; struct buffer_head *bh; struct MFT_REC *rec; @@ -855,6 +855,9 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u32 boot_off = 0; const char *hint = "Primary boot"; + /* Save original dev_size. Used with alternative boot. */ + dev_size0 = dev_size; + sbi->volume.blocks = dev_size >> PAGE_SHIFT; bh = ntfs_bread(sb, 0); @@ -1087,9 +1090,9 @@ check_boot: } out: - if (err == -EINVAL && !bh->b_blocknr && dev_size > PAGE_SHIFT) { + if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); - u64 lbo = dev_size - sizeof(*boot); + u64 lbo = dev_size0 - sizeof(*boot); /* * Try alternative boot (last sector) @@ -1103,6 +1106,7 @@ out: boot_off = lbo & (block_size - 1); hint = "Alternative boot"; + dev_size = dev_size0; /* restore original size. */ goto check_boot; } brelse(bh); -- cgit From f684073c09fd4658faa0a75c12b65d89a58b8e83 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:47:07 +0300 Subject: fs/ntfs3: Refactoring and comments Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 +++--- fs/ntfs3/file.c | 4 ++-- fs/ntfs3/inode.c | 3 ++- fs/ntfs3/namei.c | 6 +++--- fs/ntfs3/ntfs.h | 2 +- fs/ntfs3/ntfs_fs.h | 2 -- fs/ntfs3/record.c | 6 ++++++ fs/ntfs3/super.c | 19 ++++++++----------- 8 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index a9d82bbb4729..e16487764282 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1106,10 +1106,10 @@ repack: } } - /* + /* * The code below may require additional cluster (to extend attribute list) - * and / or one MFT record - * It is too complex to undo operations if -ENOSPC occurs deep inside + * and / or one MFT record + * It is too complex to undo operations if -ENOSPC occurs deep inside * in 'ni_insert_nonresident'. * Return in advance -ENOSPC here if there are no free cluster and no free MFT. */ diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 962f12ce6c0a..1f7a194983c5 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -745,8 +745,8 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, unsigned int flags) + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 2f76dc055c1f..d6d021e19aaa 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1660,7 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, d_instantiate(dentry, inode); /* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */ - inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime); + inode->i_atime = inode->i_mtime = + inode_set_ctime_to_ts(inode, ni->i_crtime); dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime); mark_inode_dirty(dir); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index ad430d50bd79..eedacf94edd8 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de) err = ntfs_link_inode(inode, de); if (!err) { - dir->i_mtime = inode_set_ctime_to_ts(inode, - inode_set_ctime_current(dir)); + dir->i_mtime = inode_set_ctime_to_ts( + inode, inode_set_ctime_current(dir)); mark_inode_dirty(inode); mark_inode_dirty(dir); d_instantiate(de, inode); @@ -373,7 +373,7 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, #ifdef CONFIG_NTFS3_FS_POSIX_ACL if (IS_POSIXACL(dir)) { - /* + /* * Load in cache current acl to avoid ni_lock(dir): * ntfs_create_inode -> ntfs_init_acl -> posix_acl_create -> * ntfs_get_acl -> ntfs_get_acl_ex -> ni_lock diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 98b76d1b09e7..86aecbb01a92 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -847,7 +847,7 @@ struct OBJECT_ID { // Birth Volume Id is the Object Id of the Volume on. // which the Object Id was allocated. It never changes. struct GUID BirthVolumeId; //0x10: - + // Birth Object Id is the first Object Id that was // ever assigned to this MFT Record. I.e. If the Object Id // is changed for some reason, this field will reflect the diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 788567d71d93..0e6a2777870c 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -497,8 +497,6 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, - CLST len); int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 02cc91ed8835..53629b1f65e9 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -189,6 +189,12 @@ out: return err; } +/* + * mi_enum_attr - start/continue attributes enumeration in record. + * + * NOTE: mi->mrec - memory of size sbi->record_size + * here we sure that mi->mrec->total == sbi->record_size (see mi_read) + */ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index d2951b23f52a..f9a214367113 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -488,7 +488,6 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, { int err; struct super_block *sb = pde_data(file_inode(file)); - struct ntfs_sb_info *sbi = sb->s_fs_info; ssize_t ret = count; u8 *label = kmalloc(count, GFP_NOFS); @@ -502,7 +501,7 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, while (ret > 0 && label[ret - 1] == '\n') ret -= 1; - err = ntfs_set_label(sbi, label, ret); + err = ntfs_set_label(sb->s_fs_info, label, ret); if (err < 0) { ntfs_err(sb, "failed (%d) to write label", err); @@ -1082,10 +1081,10 @@ check_boot: if (bh->b_blocknr && !sb_rdonly(sb)) { /* - * Alternative boot is ok but primary is not ok. - * Do not update primary boot here 'cause it may be faked boot. - * Let ntfs to be mounted and update boot later. - */ + * Alternative boot is ok but primary is not ok. + * Do not update primary boot here 'cause it may be faked boot. + * Let ntfs to be mounted and update boot later. + */ *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN); } @@ -1549,9 +1548,9 @@ load_root: if (boot2) { /* - * Alternative boot is ok but primary is not ok. - * Volume is recognized as NTFS. Update primary boot. - */ + * Alternative boot is ok but primary is not ok. + * Volume is recognized as NTFS. Update primary boot. + */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { if (buffer_locked(bh0)) @@ -1785,7 +1784,6 @@ static int __init init_ntfs_fs(void) if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); - #ifdef CONFIG_PROC_FS /* Create "/proc/fs/ntfs3" */ proc_info_root = proc_mkdir("fs/ntfs3", NULL); @@ -1827,7 +1825,6 @@ static void __exit exit_ntfs_fs(void) if (proc_info_root) remove_proc_entry("fs/ntfs3", NULL); #endif - } MODULE_LICENSE("GPL"); -- cgit From d27e202b9ac416e52093edf8789614d93dbd6231 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:54:07 +0300 Subject: fs/ntfs3: Add more info into /proc/fs/ntfs3//volinfo Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f9a214367113..5811da7e9d45 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -453,15 +453,23 @@ static struct proc_dir_entry *proc_info_root; * ntfs3.1 * cluster size * number of clusters + * total number of mft records + * number of used mft records ~= number of files + folders + * real state of ntfs "dirty"/"clean" + * current state of ntfs "dirty"/"clean" */ static int ntfs3_volinfo(struct seq_file *m, void *o) { struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; - seq_printf(m, "ntfs%d.%d\n%u\n%zu\n", sbi->volume.major_ver, - sbi->volume.minor_ver, sbi->cluster_size, - sbi->used.bitmap.nbits); + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n", + sbi->volume.major_ver, sbi->volume.minor_ver, + sbi->cluster_size, sbi->used.bitmap.nbits, + sbi->mft.bitmap.nbits, + sbi->mft.bitmap.nbits - wnd_zeroes(&sbi->mft.bitmap), + sbi->volume.real_dirty ? "dirty" : "clean", + (sbi->volume.flags & VOLUME_FLAG_DIRTY) ? "dirty" : "clean"); return 0; } -- cgit From e52dce610a2d53bf2b5e94a8843c71cb73a91ea5 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:56:15 +0300 Subject: fs/ntfs3: Do not allow to change label if volume is read-only Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5811da7e9d45..cf0a720523f0 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -497,7 +497,12 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, int err; struct super_block *sb = pde_data(file_inode(file)); ssize_t ret = count; - u8 *label = kmalloc(count, GFP_NOFS); + u8 *label; + + if (sb_rdonly(sb)) + return -EROFS; + + label = kmalloc(count, GFP_NOFS); if (!label) return -ENOMEM; -- cgit From 32e9212256b88f35466642f9c939bb40cfb2c2de Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:19:08 +0300 Subject: fs/ntfs3: Fix possible NULL-ptr-deref in ni_readpage_cmpr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index d49fbb22bd5e..dad976a68985 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2148,7 +2148,7 @@ out1: for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; - if (i == idx) + if (i == idx || !pg) continue; unlock_page(pg); put_page(pg); -- cgit From 9c689c8dc86f8ca99bf91c05f24c8bab38fe7d5f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:28:11 +0300 Subject: fs/ntfs3: Fix NULL pointer dereference on error in attr_allocate_frame() Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index e16487764282..63f70259edc0 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1736,10 +1736,8 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) { - err = -ENOENT; - goto out; - } + if (!attr_b) + return -ENOENT; attr = attr_b; le = le_b; -- cgit From 8e7e27b2ee1e19c4040d4987e345f678a74c0aed Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 20 Apr 2023 15:46:22 +0800 Subject: fs/ntfs3: fix panic about slab-out-of-bounds caused by ntfs_list_ea() Here is a BUG report about linux-6.1 from syzbot, but it still remains within upstream: BUG: KASAN: slab-out-of-bounds in ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] BUG: KASAN: slab-out-of-bounds in ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 Read of size 1 at addr ffff888021acaf3d by task syz-executor128/3632 Call Trace: kasan_report+0x139/0x170 mm/kasan/report.c:495 ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 vfs_listxattr fs/xattr.c:457 [inline] listxattr+0x293/0x2d0 fs/xattr.c:804 path_listxattr fs/xattr.c:828 [inline] __do_sys_llistxattr fs/xattr.c:846 [inline] Before derefering field members of `ea` in unpacked_ea_size(), we need to check whether the EA_FULL struct is located in access validate range. Similarly, when derefering `ea->name` field member, we need to check whethe the ea->name is located in access validate range, too. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Reported-by: syzbot+9fcea5ef6dc4dc72d334@syzkaller.appspotmail.com Signed-off-by: Zeng Heng [almaz.alexandrovich@paragon-software.com: took the ret variable out of the loop block] Signed-off-by: Konstantin Komarov --- fs/ntfs3/xattr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 29fd391899e5..4920548192a0 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -211,7 +211,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, size = le32_to_cpu(info->size); /* Enumerate all xattrs. */ - for (ret = 0, off = 0; off < size; off += ea_size) { + ret = 0; + for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { ea = Add2Ptr(ea_all, off); ea_size = unpacked_ea_size(ea); @@ -219,6 +220,10 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, break; if (buffer) { + /* Check if we can use field ea->name */ + if (off + ea_size > size) + break; + if (ret + ea->name_len + 1 > bytes_per_buffer) { err = -ERANGE; goto out; -- cgit From 34e6552a442f268eefd408e47f4f2d471aa64829 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 13 Jul 2023 22:41:46 +0300 Subject: fs/ntfs3: Fix OOB read in ntfs_init_from_boot Syzbot was able to create a device which has the last sector of size 512. After failing to boot from initial sector, reading from boot info from offset 511 causes OOB read. To prevent such reports add sanity check to validate if size of buffer_head if big enough to hold ntfs3 bootinfo Fixes: 6a4cd3ea7d77 ("fs/ntfs3: Alternative boot if primary boot is corrupted") Reported-by: syzbot+53ce40c8c0322c06aea5@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cf0a720523f0..5341ed80e2d2 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -878,6 +878,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, check_boot: err = -EINVAL; + + /* Corrupted image; do not read OOB */ + if (bh->b_size - sizeof(*boot) < boot_off) + goto out; + boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { -- cgit From 1f9b94af923c88539426ed811ae7e9543834a5c5 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Wed, 9 Aug 2023 12:11:18 -0700 Subject: fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e() Upon investigation of the C reproducer provided by Syzbot, it seemed the reproducer was trying to mount a corrupted NTFS filesystem, then issue a rename syscall to some nodes in the filesystem. This can be shown by modifying the reproducer to only include the mount syscall, and investigating the filesystem by e.g. `ls` and `rm` commands. As a result, during the problematic call to `hdr_fine_e`, the `inode` being supplied did not go through `indx_init`, hence the `cmp` function pointer was never set. The fix is simply to check whether `cmp` is not set, and return NULL if that's the case, in order to be consistent with other error scenarios of the `hdr_find_e` method. The rationale behind this patch is that: - We should prevent crashing the kernel even if the mounted filesystem is corrupted. Any syscalls made on the filesystem could return invalid, but the kernel should be able to sustain these calls. - Only very specific corruption would lead to this bug, so it would be a pretty rare case in actual usage anyways. Therefore, introducing a check to specifically protect against this bug seems appropriate. Because of its rarity, an `unlikely` clause is used to wrap around this nullity check. Reported-by: syzbot+60cf892fc31d1f4358fc@syzkaller.appspotmail.com Signed-off-by: Ziqi Zhao Signed-off-by: Konstantin Komarov --- fs/ntfs3/index.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 124c6e822623..cf92b2433f7a 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, u32 total = le32_to_cpu(hdr->total); u16 offs[128]; + if (unlikely(!cmp)) + return NULL; + fill_table: if (end > total) return NULL; -- cgit From 85a4780dc96ed9dd643bbadf236552b3320fae26 Mon Sep 17 00:00:00 2001 From: Gabriel Marcano Date: Tue, 12 Sep 2023 21:50:32 -0700 Subject: fs/ntfs3: Fix directory element type detection Calling stat() from userspace correctly identified junctions in an NTFS partition as symlinks, but using readdir() and iterating through the directory containing the same junction did not identify the junction as a symlink. When emitting directory contents, check FILE_ATTRIBUTE_REPARSE_POINT attribute to detect junctions and report them as links. Signed-off-by: Gabriel Marcano Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 063a6654199b..ec0566b322d5 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,7 +309,11 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ + if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) + dt_type = DT_LNK; + else + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } -- cgit From e4494770a5cad3c9d1d2a65ed15d07656c0d9b82 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 25 Sep 2023 12:48:07 +0800 Subject: fs/ntfs3: Avoid possible memory leak smatch warn: fs/ntfs3/fslog.c:2172 last_log_lsn() warn: possible memory leak of 'page_bufs' Jump to label 'out' to free 'page_bufs' and is more consistent with other code. Signed-off-by: Su Hui Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 12f28cdf5c83..98ccb6650858 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -2168,8 +2168,10 @@ file_is_valid: if (!page) { page = kmalloc(log->page_size, GFP_NOFS); - if (!page) - return -ENOMEM; + if (!page) { + err = -ENOMEM; + goto out; + } } /* -- cgit From 3b8e0af4a7a331d1510e963b8fd77e2fca0a77f1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Sep 2023 20:38:13 +0900 Subject: ata: libata-core: Fix ata_port_request_pm() locking The function ata_port_request_pm() checks the port flag ATA_PFLAG_PM_PENDING and calls ata_port_wait_eh() if this flag is set to ensure that power management operations for a port are not scheduled simultaneously. However, this flag check is done without holding the port lock. Fix this by taking the port lock on entry to the function and checking the flag under this lock. The lock is released and re-taken if ata_port_wait_eh() needs to be called. The two WARN_ON() macros checking that the ATA_PFLAG_PM_PENDING flag was cleared are removed as the first call is racy and the second one done without holding the port lock. Fixes: 5ef41082912b ("ata: add ata port system PM callbacks") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: Bart Van Assche --- drivers/ata/libata-core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0072e0f9ad39..732f3d0b4fd9 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5037,17 +5037,19 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, struct ata_link *link; unsigned long flags; - /* Previous resume operation might still be in - * progress. Wait for PM_PENDING to clear. + spin_lock_irqsave(ap->lock, flags); + + /* + * A previous PM operation might still be in progress. Wait for + * ATA_PFLAG_PM_PENDING to clear. */ if (ap->pflags & ATA_PFLAG_PM_PENDING) { + spin_unlock_irqrestore(ap->lock, flags); ata_port_wait_eh(ap); - WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); + spin_lock_irqsave(ap->lock, flags); } - /* request PM ops to EH */ - spin_lock_irqsave(ap->lock, flags); - + /* Request PM operation to EH */ ap->pm_mesg = mesg; ap->pflags |= ATA_PFLAG_PM_PENDING; ata_for_each_link(link, ap, HOST_FIRST) { @@ -5059,10 +5061,8 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, spin_unlock_irqrestore(ap->lock, flags); - if (!async) { + if (!async) ata_port_wait_eh(ap); - WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); - } } /* -- cgit From 84d76529c650f887f1e18caee72d6f0589e1baf9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Aug 2023 13:07:36 +0900 Subject: ata: libata-core: Fix port and device removal Whenever an ATA adapter driver is removed (e.g. rmmod), ata_port_detach() is called repeatedly for all the adapter ports to remove (unload) the devices attached to the port and delete the port device itself. Removing of devices is done using libata EH with the ATA_PFLAG_UNLOADING port flag set. This causes libata EH to execute ata_eh_unload() which disables all devices attached to the port. ata_port_detach() finishes by calling scsi_remove_host() to remove the scsi host associated with the port. This function will trigger the removal of all scsi devices attached to the host and in the case of disks, calls to sd_shutdown() which will flush the device write cache and stop the device. However, given that the devices were already disabled by ata_eh_unload(), the synchronize write cache command and start stop unit commands fail. E.g. running "rmmod ahci" with first removing sd_mod results in error messages like: ata13.00: disable device sd 0:0:0:0: [sda] Synchronizing SCSI cache sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK sd 0:0:0:0: [sda] Stopping disk sd 0:0:0:0: [sda] Start/Stop Unit failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK Fix this by removing all scsi devices of the ata devices connected to the port before scheduling libata EH to disable the ATA devices. Fixes: 720ba12620ee ("[PATCH] libata-hp: update unload-unplug") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 732f3d0b4fd9..8e35afe5e560 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5948,11 +5948,30 @@ static void ata_port_detach(struct ata_port *ap) struct ata_link *link; struct ata_device *dev; - /* tell EH we're leaving & flush EH */ + /* Wait for any ongoing EH */ + ata_port_wait_eh(ap); + + mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); + + /* Remove scsi devices */ + ata_for_each_link(link, ap, HOST_FIRST) { + ata_for_each_dev(dev, link, ALL) { + if (dev->sdev) { + spin_unlock_irqrestore(ap->lock, flags); + scsi_remove_device(dev->sdev); + spin_lock_irqsave(ap->lock, flags); + dev->sdev = NULL; + } + } + } + + /* Tell EH to disable all devices */ ap->pflags |= ATA_PFLAG_UNLOADING; ata_port_schedule_eh(ap); + spin_unlock_irqrestore(ap->lock, flags); + mutex_unlock(&ap->scsi_scan_mutex); /* wait till EH commits suicide */ ata_port_wait_eh(ap); -- cgit From fb99ef17865035a6657786d4b2af11a27ba23f9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Aug 2023 15:41:14 +0900 Subject: ata: libata-scsi: link ata port and scsi device There is no direct device ancestry defined between an ata_device and its scsi device which prevents the power management code from correctly ordering suspend and resume operations. Create such ancestry with the ata device as the parent to ensure that the scsi device (child) is suspended before the ata device and that resume handles the ata device before the scsi device. The parent-child (supplier-consumer) relationship is established between the ata_port (parent) and the scsi device (child) with the function device_add_link(). The parent used is not the ata_device as the PM operations are defined per port and the status of all devices connected through that port is controlled from the port operations. The device link is established with the new function ata_scsi_slave_alloc(), and this function is used to define the ->slave_alloc callback of the scsi host template of all ata drivers. Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: John Garry --- drivers/ata/libata-scsi.c | 45 ++++++++++++++++++++++++++++++++++++++++----- include/linux/libata.h | 2 ++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index fb73c145b49a..8b43290ca2cd 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1089,6 +1089,42 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) return 0; } +/** + * ata_scsi_slave_alloc - Early setup of SCSI device + * @sdev: SCSI device to examine + * + * This is called from scsi_alloc_sdev() when the scsi device + * associated with an ATA device is scanned on a port. + * + * LOCKING: + * Defined by SCSI layer. We don't really care. + */ + +int ata_scsi_slave_alloc(struct scsi_device *sdev) +{ + struct ata_port *ap = ata_shost_to_port(sdev->host); + struct device_link *link; + + ata_scsi_sdev_config(sdev); + + /* + * Create a link from the ata_port device to the scsi device to ensure + * that PM does suspend/resume in the correct order: the scsi device is + * consumer (child) and the ata port the supplier (parent). + */ + link = device_link_add(&sdev->sdev_gendev, &ap->tdev, + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); + if (!link) { + ata_port_err(ap, "Failed to create link to scsi device %s\n", + dev_name(&sdev->sdev_gendev)); + return -ENODEV; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ata_scsi_slave_alloc); + /** * ata_scsi_slave_config - Set SCSI device attributes * @sdev: SCSI device to examine @@ -1105,14 +1141,11 @@ int ata_scsi_slave_config(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); - int rc = 0; - - ata_scsi_sdev_config(sdev); if (dev) - rc = ata_scsi_dev_config(sdev, dev); + return ata_scsi_dev_config(sdev, dev); - return rc; + return 0; } EXPORT_SYMBOL_GPL(ata_scsi_slave_config); @@ -1136,6 +1169,8 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) unsigned long flags; struct ata_device *dev; + device_link_remove(&sdev->sdev_gendev, &ap->tdev); + spin_lock_irqsave(ap->lock, flags); dev = __ata_scsi_find_dev(ap, sdev); if (dev && dev->sdev) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 84aca8c44fa3..3ce1ab408114 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1148,6 +1148,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); +extern int ata_scsi_slave_alloc(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, @@ -1396,6 +1397,7 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ + .slave_alloc = ata_scsi_slave_alloc, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ -- cgit From 3cc2ffe5c16dc65dfac354bc5b5bc98d3b397567 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 Sep 2023 10:02:41 +0900 Subject: scsi: sd: Differentiate system and runtime start/stop management The underlying device and driver of a SCSI disk may have different system and runtime power mode control requirements. This is because runtime power management affects only the SCSI disk, while system level power management affects all devices, including the controller for the SCSI disk. For instance, issuing a START STOP UNIT command when a SCSI disk is runtime suspended and resumed is fine: the command is translated to a STANDBY IMMEDIATE command to spin down the ATA disk and to a VERIFY command to wake it up. The SCSI disk runtime operations have no effect on the ata port device used to connect the ATA disk. However, for system suspend/resume operations, the ATA port used to connect the device will also be suspended and resumed, with the resume operation requiring re-validating the device link and the device itself. In this case, issuing a VERIFY command to spinup the disk must be done before starting to revalidate the device, when the ata port is being resumed. In such case, we must not allow the SCSI disk driver to issue START STOP UNIT commands. Allow a low level driver to refine the SCSI disk start/stop management by differentiating system and runtime cases with two new SCSI device flags: manage_system_start_stop and manage_runtime_start_stop. These new flags replace the current manage_start_stop flag. Drivers setting the manage_start_stop are modifed to set both new flags, thus preserving the existing start/stop management behavior. For backward compatibility, the old manage_start_stop sysfs device attribute is kept as a read-only attribute showing a value of 1 for devices enabling both new flags and 0 otherwise. Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 3 +- drivers/firewire/sbp2.c | 9 +++-- drivers/scsi/sd.c | 90 ++++++++++++++++++++++++++++++++++++---------- include/scsi/scsi_device.h | 5 ++- 4 files changed, 84 insertions(+), 23 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 8b43290ca2cd..73428ad0c8d2 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1056,7 +1056,8 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) * will be woken up by ata_port_pm_resume() with a port reset * and device revalidation. */ - sdev->manage_start_stop = 1; + sdev->manage_system_start_stop = true; + sdev->manage_runtime_start_stop = true; sdev->no_start_on_resume = 1; } diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 26db5b8dfc1e..749868b9e80d 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " * * - power condition * Set the power condition field in the START STOP UNIT commands sent by - * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on). + * sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or + * manage_runtime_start_stop is on). * Some disks need this to spin down or to resume properly. * * - override internal blacklist @@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev) sdev->use_10_for_rw = 1; - if (sbp2_param_exclusive_login) - sdev->manage_start_stop = 1; + if (sbp2_param_exclusive_login) { + sdev->manage_system_start_stop = true; + sdev->manage_runtime_start_stop = true; + } if (sdev->type == TYPE_ROM) sdev->use_10_for_ms = 1; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c92a317ba547..5a1b802d180f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -201,18 +201,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr, } static ssize_t -manage_start_stop_show(struct device *dev, struct device_attribute *attr, - char *buf) +manage_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; - return sprintf(buf, "%u\n", sdp->manage_start_stop); + return sysfs_emit(buf, "%u\n", + sdp->manage_system_start_stop && + sdp->manage_runtime_start_stop); } +static DEVICE_ATTR_RO(manage_start_stop); static ssize_t -manage_start_stop_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +manage_system_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + + return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); +} + +static ssize_t +manage_system_start_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; @@ -224,11 +238,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr, if (kstrtobool(buf, &v)) return -EINVAL; - sdp->manage_start_stop = v; + sdp->manage_system_start_stop = v; return count; } -static DEVICE_ATTR_RW(manage_start_stop); +static DEVICE_ATTR_RW(manage_system_start_stop); + +static ssize_t +manage_runtime_start_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + + return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); +} + +static ssize_t +manage_runtime_start_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + bool v; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->manage_runtime_start_stop = v; + + return count; +} +static DEVICE_ATTR_RW(manage_runtime_start_stop); static ssize_t allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -560,6 +605,8 @@ static struct attribute *sd_disk_attrs[] = { &dev_attr_FUA.attr, &dev_attr_allow_restart.attr, &dev_attr_manage_start_stop.attr, + &dev_attr_manage_system_start_stop.attr, + &dev_attr_manage_runtime_start_stop.attr, &dev_attr_protection_type.attr, &dev_attr_protection_mode.attr, &dev_attr_app_tag_own.attr, @@ -3771,13 +3818,20 @@ static void sd_shutdown(struct device *dev) sd_sync_cache(sdkp, NULL); } - if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) { + if (system_state != SYSTEM_RESTART && + sdkp->device->manage_system_start_stop) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } } -static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) +static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) +{ + return (sdev->manage_system_start_stop && !runtime) || + (sdev->manage_runtime_start_stop && runtime); +} + +static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_sense_hdr sshdr; @@ -3809,12 +3863,12 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } } - if (sdkp->device->manage_start_stop) { + if (sd_do_start_stop(sdkp->device, runtime)) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); - if (ignore_stop_errors) + if (!runtime) ret = 0; } @@ -3826,23 +3880,23 @@ static int sd_suspend_system(struct device *dev) if (pm_runtime_suspended(dev)) return 0; - return sd_suspend_common(dev, true); + return sd_suspend_common(dev, false); } static int sd_suspend_runtime(struct device *dev) { - return sd_suspend_common(dev, false); + return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev) +static int sd_resume(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret = 0; + int ret; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - if (!sdkp->device->manage_start_stop) + if (!sd_do_start_stop(sdkp->device, runtime)) return 0; if (!sdkp->device->no_start_on_resume) { @@ -3860,7 +3914,7 @@ static int sd_resume_system(struct device *dev) if (pm_runtime_suspended(dev)) return 0; - return sd_resume(dev); + return sd_resume(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -3887,7 +3941,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev); + return sd_resume(dev, true); } static const struct dev_pm_ops sd_pm_ops = { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b9230b6add04..fd41fdac0a8e 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -161,6 +161,10 @@ struct scsi_device { * pass settings from slave_alloc to scsi * core. */ unsigned int eh_timeout; /* Error handling timeout */ + + bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */ + bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */ + unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ unsigned busy:1; /* Used to prevent races */ @@ -193,7 +197,6 @@ struct scsi_device { unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */ unsigned no_start_on_add:1; /* do not issue start on add */ unsigned allow_restart:1; /* issue START_UNIT in error handler */ - unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */ unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */ unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */ unsigned no_uld_attach:1; /* disable connecting to upper level drivers */ -- cgit From aa3998dbeb3abce63653b7f6d4542e7dcd022590 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Aug 2023 09:43:39 +0900 Subject: ata: libata-scsi: Disable scsi device manage_system_start_stop The introduction of a device link to create a consumer/supplier relationship between the scsi device of an ATA device and the ATA port of that ATA device fixes the ordering of system suspend and resume operations. For suspend, the scsi device is suspended first and the ata port after it. This is fine as this allows the synchronize cache and START STOP UNIT commands issued by the scsi disk driver to be executed before the ata port is disabled. For resume operations, the ata port is resumed first, followed by the scsi device. This allows having the request queue of the scsi device to be unfrozen after the ata port resume is scheduled in EH, thus avoiding to see new requests prematurely issued to the ATA device. Since libata sets manage_system_start_stop to 1, the scsi disk resume operation also results in issuing a START STOP UNIT command to the device being resumed so that the device exits standby power mode. However, restoring the ATA device to the active power mode must be synchronized with libata EH processing of the port resume operation to avoid either 1) seeing the start stop unit command being received too early when the port is not yet resumed and ready to accept commands, or after the port resume process issues commands such as IDENTIFY to revalidate the device. In this last case, the risk is that the device revalidation fails with timeout errors as the drive is still spun down. Commit 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") disabled issuing the START STOP UNIT command to avoid issues with it. But this is incorrect as transitioning a device to the active power mode from the standby power mode set on suspend requires a media access command. The IDENTIFY, READ LOG and SET FEATURES commands executed in libata EH context triggered by the ata port resume operation may thus fail. Fix these synchronization issues is by handling a device power mode transitions for system suspend and resume directly in libata EH context, without relying on the scsi disk driver management triggered with the manage_system_start_stop flag. To do this, the following libata helper functions are introduced: 1) ata_dev_power_set_standby(): This function issues a STANDBY IMMEDIATE command to transitiom a device to the standby power mode. For HDDs, this spins down the disks. This function applies only to ATA and ZAC devices and does nothing otherwise. This function also does nothing for devices that have the ATA_FLAG_NO_POWEROFF_SPINDOWN or ATA_FLAG_NO_HIBERNATE_SPINDOWN flag set. For suspend, call ata_dev_power_set_standby() in ata_eh_handle_port_suspend() before the port is disabled and frozen. ata_eh_unload() is also modified to transition all enabled devices to the standby power mode when the system is shutdown or devices removed. 2) ata_dev_power_set_active() and This function applies to ATA or ZAC devices and issues a VERIFY command for 1 sector at LBA 0 to transition the device to the active power mode. For HDDs, since this function will complete only once the disk spin up. Its execution uses the same timeouts as for reset, to give the drive enough time to complete spinup without triggering a command timeout. For resume, call ata_dev_power_set_active() in ata_eh_revalidate_and_attach() after the port has been enabled and before any other command is issued to the device. With these changes, the manage_system_start_stop and no_start_on_resume scsi device flags do not need to be set in ata_scsi_dev_config(). The flag manage_runtime_start_stop is still set to allow the sd driver to spinup/spindown a disk through the sd runtime operations. Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/ata/libata-eh.c | 46 +++++++++++++++++++++++- drivers/ata/libata-scsi.c | 16 ++++----- drivers/ata/libata.h | 2 ++ include/linux/libata.h | 6 ++-- 5 files changed, 148 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8e35afe5e560..a0bc01606b30 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1972,6 +1972,96 @@ retry: return rc; } +/** + * ata_dev_power_set_standby - Set a device power mode to standby + * @dev: target device + * + * Issue a STANDBY IMMEDIATE command to set a device power mode to standby. + * For an HDD device, this spins down the disks. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_dev_power_set_standby(struct ata_device *dev) +{ + unsigned long ap_flags = dev->link->ap->flags; + struct ata_taskfile tf; + unsigned int err_mask; + + /* Issue STANDBY IMMEDIATE command only if supported by the device */ + if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) + return; + + /* + * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5) + * causing some drives to spin up and down again. For these, do nothing + * if we are being called on shutdown. + */ + if ((ap_flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) && + system_state == SYSTEM_POWER_OFF) + return; + + if ((ap_flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && + system_entering_hibernation()) + return; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_STANDBYNOW1; + + ata_dev_notice(dev, "Entering standby power mode\n"); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) + ata_dev_err(dev, "STANDBY IMMEDIATE failed (err_mask=0x%x)\n", + err_mask); +} + +/** + * ata_dev_power_set_active - Set a device power mode to active + * @dev: target device + * + * Issue a VERIFY command to enter to ensure that the device is in the + * active power mode. For a spun-down HDD (standby or idle power mode), + * the VERIFY command will complete after the disk spins up. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_dev_power_set_active(struct ata_device *dev) +{ + struct ata_taskfile tf; + unsigned int err_mask; + + /* + * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only + * if supported by the device. + */ + if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) + return; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_VERIFY; + tf.nsect = 1; + if (dev->flags & ATA_DFLAG_LBA) { + tf.flags |= ATA_TFLAG_LBA; + tf.device |= ATA_LBA; + } else { + /* CHS */ + tf.lbal = 0x1; /* sect */ + } + + ata_dev_notice(dev, "Entering active power mode\n"); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) + ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n", + err_mask); +} + /** * ata_read_log_page - read a specific log page * @dev: target device diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 4cf4f57e57b8..b1b2c276371e 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -147,6 +147,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), .timeouts = ata_eh_flush_timeouts }, + { .commands = CMDS(ATA_CMD_VERIFY), + .timeouts = ata_eh_reset_timeouts }, }; #undef CMDS @@ -498,7 +500,19 @@ static void ata_eh_unload(struct ata_port *ap) struct ata_device *dev; unsigned long flags; - /* Restore SControl IPM and SPD for the next driver and + /* + * Unless we are restarting, transition all enabled devices to + * standby power mode. + */ + if (system_state != SYSTEM_RESTART) { + ata_for_each_link(link, ap, PMP_FIRST) { + ata_for_each_dev(dev, link, ENABLED) + ata_dev_power_set_standby(dev); + } + } + + /* + * Restore SControl IPM and SPD for the next driver and * disable attached devices. */ ata_for_each_link(link, ap, PMP_FIRST) { @@ -684,6 +698,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_xfer_mode[devno] = dev->xfer_mode; if (ata_ncq_enabled(dev)) ehc->saved_ncq_enabled |= 1 << devno; + + /* If we are resuming, wake up the device */ + if (ap->pflags & ATA_PFLAG_RESUMING) + ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; } } @@ -743,6 +761,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) /* clean up */ spin_lock_irqsave(ap->lock, flags); + ap->pflags &= ~ATA_PFLAG_RESUMING; + if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && @@ -1218,6 +1238,13 @@ void ata_eh_detach_dev(struct ata_device *dev) struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; + /* + * If the device is still enabled, transition it to standby power mode + * (i.e. spin down HDDs). + */ + if (ata_dev_enabled(dev)) + ata_dev_power_set_standby(dev); + ata_dev_disable(dev); spin_lock_irqsave(ap->lock, flags); @@ -3016,6 +3043,15 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, if (ehc->i.flags & ATA_EHI_DID_RESET) readid_flags |= ATA_READID_POSTRESET; + /* + * When resuming, before executing any command, make sure to + * transition the device to the active power mode. + */ + if ((action & ATA_EH_SET_ACTIVE) && ata_dev_enabled(dev)) { + ata_dev_power_set_active(dev); + ata_eh_done(link, dev, ATA_EH_SET_ACTIVE); + } + if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { WARN_ON(dev->class == ATA_DEV_PMP); @@ -3989,6 +4025,7 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) unsigned long flags; int rc = 0; struct ata_device *dev; + struct ata_link *link; /* are we suspending? */ spin_lock_irqsave(ap->lock, flags); @@ -4001,6 +4038,12 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); + /* Set all devices attached to the port in standby mode */ + ata_for_each_link(link, ap, HOST_FIRST) { + ata_for_each_dev(dev, link, ENABLED) + ata_dev_power_set_standby(dev); + } + /* * If we have a ZPODD attached, check its zero * power ready status before the port is frozen. @@ -4083,6 +4126,7 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); + ap->pflags |= ATA_PFLAG_RESUMING; spin_unlock_irqrestore(ap->lock, flags); } #endif /* CONFIG_PM */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 73428ad0c8d2..a0e58d22d222 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1050,15 +1050,13 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) } } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); + /* - * Stop the drive on suspend but do not issue START STOP UNIT - * on resume as this is not necessary and may fail: the device - * will be woken up by ata_port_pm_resume() with a port reset - * and device revalidation. + * Ask the sd driver to issue START STOP UNIT on runtime suspend + * and resume only. For system level suspend/resume, devices + * power state is handled directly by libata EH. */ - sdev->manage_system_start_stop = true; sdev->manage_runtime_start_stop = true; - sdev->no_start_on_resume = 1; } /* @@ -1231,7 +1229,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) } if (cdb[4] & 0x1) { - tf->nsect = 1; /* 1 sector, lba=0 */ + tf->nsect = 1; /* 1 sector, lba=0 */ if (qc->dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; @@ -1247,7 +1245,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) tf->lbah = 0x0; /* cyl high */ } - tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ + tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ } else { /* Some odd clown BIOSen issue spindown on power off (ACPI S4 * or S5) causing some drives to spin up and down again. @@ -1257,7 +1255,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) goto skip; if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && - system_entering_hibernation()) + system_entering_hibernation()) goto skip; /* Issue ATA STANDBY IMMEDIATE command */ diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 6e7d352803bd..820299bd9d06 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -60,6 +60,8 @@ extern int ata_dev_reread_id(struct ata_device *dev, unsigned int readid_flags); extern int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class, unsigned int readid_flags); extern int ata_dev_configure(struct ata_device *dev); +extern void ata_dev_power_set_standby(struct ata_device *dev); +extern void ata_dev_power_set_active(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern unsigned int ata_dev_set_feature(struct ata_device *dev, diff --git a/include/linux/libata.h b/include/linux/libata.h index 3ce1ab408114..2a7d2af0ed80 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -192,6 +192,7 @@ enum { ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ + ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ @@ -318,9 +319,10 @@ enum { ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ + ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | - ATA_EH_GET_SUCCESS_SENSE, + ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -357,7 +359,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ -- cgit From ff48b37802e5c134e2dfc4d091f10b2eb5065a72 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 Sep 2023 15:00:13 +0900 Subject: scsi: Do not attempt to rescan suspended devices scsi_rescan_device() takes a scsi device lock before executing a device handler and device driver rescan methods. Waiting for the completion of any command issued to the device by these methods will thus be done with the device lock held. As a result, there is a risk of deadlocking within the power management code if scsi_rescan_device() is called to handle a device resume with the associated scsi device not yet resumed. Avoid such situation by checking that the target scsi device is in the running state, that is, fully capable of executing commands, before proceeding with the rescan and bailout returning -EWOULDBLOCK otherwise. With this error return, the caller can retry rescaning the device after a delay. The state check is done with the device lock held and is thus safe against incoming suspend power management operations. Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: Bart Van Assche --- drivers/scsi/scsi_scan.c | 18 +++++++++++++++++- include/scsi/scsi_host.h | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 52014b2d39e1..3db4d31a03a1 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1619,12 +1619,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); -void scsi_rescan_device(struct scsi_device *sdev) +int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; + int ret = 0; device_lock(dev); + /* + * Bail out if the device is not running. Otherwise, the rescan may + * block waiting for commands to be executed, with us holding the + * device lock. This can result in a potential deadlock in the power + * management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING) { + ret = -EWOULDBLOCK; + goto unlock; + } + scsi_attach_vpd(sdev); scsi_cdl_check(sdev); @@ -1638,7 +1650,11 @@ void scsi_rescan_device(struct scsi_device *sdev) drv->rescan(dev); module_put(dev->driver->owner); } + +unlock: device_unlock(dev); + + return ret; } EXPORT_SYMBOL(scsi_rescan_device); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 49f768d0ff37..4c2dc8150c6d 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -764,7 +764,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); -extern void scsi_rescan_device(struct scsi_device *); +extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); extern int scsi_host_busy(struct Scsi_Host *shost); -- cgit From 8b4d9469d0b0e553208ee6f62f2807111fde18b9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 5 Sep 2023 09:06:23 +0900 Subject: ata: libata-scsi: Fix delayed scsi_rescan_device() execution Commit 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") modified ata_scsi_dev_rescan() to check the scsi device "is_suspended" power field to ensure that the scsi device associated with an ATA device is fully resumed when scsi_rescan_device() is executed. However, this fix is problematic as: 1) It relies on a PM internal field that should not be used without PM device locking protection. 2) The check for is_suspended and the call to scsi_rescan_device() are not atomic and a suspend PM event may be triggered between them, casuing scsi_rescan_device() to be called on a suspended device and in that function blocking while holding the scsi device lock. This would deadlock a following resume operation. These problems can trigger PM deadlocks on resume, especially with resume operations triggered quickly after or during suspend operations. E.g., a simple bash script like: for (( i=0; i<10; i++ )); do echo "+2 > /sys/class/rtc/rtc0/wakealarm echo mem > /sys/power/state done that triggers a resume 2 seconds after starting suspending a system can quickly lead to a PM deadlock preventing the system from correctly resuming. Fix this by replacing the check on is_suspended with a check on the return value given by scsi_rescan_device() as that function will fail if called against a suspended device. Also make sure rescan tasks already scheduled are first cancelled before suspending an ata port. Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 16 ++++++++++++++++ drivers/ata/libata-scsi.c | 33 +++++++++++++++------------------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index a0bc01606b30..092372334e92 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5168,11 +5168,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false); } static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a0e58d22d222..6850cac803c1 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4756,7 +4756,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; - bool delay_rescan = false; + int ret = 0; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); @@ -4765,37 +4765,34 @@ void ata_scsi_dev_rescan(struct work_struct *work) ata_for_each_dev(dev, link, ENABLED) { struct scsi_device *sdev = dev->sdev; + /* + * If the port was suspended before this was scheduled, + * bail out. + */ + if (ap->pflags & ATA_PFLAG_SUSPENDED) + goto unlock; + if (!sdev) continue; if (scsi_device_get(sdev)) continue; - /* - * If the rescan work was scheduled because of a resume - * event, the port is already fully resumed, but the - * SCSI device may not yet be fully resumed. In such - * case, executing scsi_rescan_device() may cause a - * deadlock with the PM code on device_lock(). Prevent - * this by giving up and retrying rescan after a short - * delay. - */ - delay_rescan = sdev->sdev_gendev.power.is_suspended; - if (delay_rescan) { - scsi_device_put(sdev); - break; - } - spin_unlock_irqrestore(ap->lock, flags); - scsi_rescan_device(sdev); + ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); + + if (ret) + goto unlock; } } +unlock: spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); - if (delay_rescan) + /* Reschedule with a delay if scsi_rescan_device() returned an error */ + if (ret) schedule_delayed_work(&ap->scsi_rescan_task, msecs_to_jiffies(5)); } -- cgit From 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 8 Sep 2023 20:04:52 +0900 Subject: ata: libata-core: Do not register PM operations for SAS ports libsas does its own domain based power management of ports. For such ports, libata should not use a device type defining power management operations as executing these operations for suspend/resume in addition to libsas calls to ata_sas_port_suspend() and ata_sas_port_resume() is not necessary (and likely dangerous to do, even though problems are not seen currently). Introduce the new ata_port_sas_type device_type for ports managed by libsas. This new device type is used in ata_tport_add() and is defined without power management operations. Fixes: 2fcbdcb4c802 ("[SCSI] libata: export ata_port suspend/resume infrastructure for sas") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: John Garry Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 2 +- drivers/ata/libata-transport.c | 9 ++++++++- drivers/ata/libata.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 092372334e92..261445c1851b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5335,7 +5335,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume); #endif const struct device_type ata_port_type = { - .name = "ata_port", + .name = ATA_PORT_TYPE_NAME, #ifdef CONFIG_PM .pm = &ata_port_pm_ops, #endif diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index e4fb9d1b9b39..3e49a877500e 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -266,6 +266,10 @@ void ata_tport_delete(struct ata_port *ap) put_device(dev); } +static const struct device_type ata_port_sas_type = { + .name = ATA_PORT_TYPE_NAME, +}; + /** ata_tport_add - initialize a transport ATA port structure * * @parent: parent device @@ -283,7 +287,10 @@ int ata_tport_add(struct device *parent, struct device *dev = &ap->tdev; device_initialize(dev); - dev->type = &ata_port_type; + if (ap->flags & ATA_FLAG_SAS_HOST) + dev->type = &ata_port_sas_type; + else + dev->type = &ata_port_type; dev->parent = parent; ata_host_get(ap->host); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 820299bd9d06..05ac80da8ebc 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -30,6 +30,8 @@ enum { ATA_DNXFER_QUIET = (1 << 31), }; +#define ATA_PORT_TYPE_NAME "ata_port" + extern atomic_t ata_print_id; extern int atapi_passthru16; extern int libata_fua; -- cgit From 99398d2070ab03d13f90b758ad397e19a65fffb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 8 Sep 2023 17:03:15 +0900 Subject: scsi: sd: Do not issue commands to suspended disks on shutdown If an error occurs when resuming a host adapter before the devices attached to the adapter are resumed, the adapter low level driver may remove the scsi host, resulting in a call to sd_remove() for the disks of the host. This in turn results in a call to sd_shutdown() which will issue a synchronize cache command and a start stop unit command to spindown the disk. sd_shutdown() issues the commands only if the device is not already runtime suspended but does not check the power state for system-wide suspend/resume. That is, the commands may be issued with the device in a suspended state, which causes PM resume to hang, forcing a reset of the machine to recover. Fix this by tracking the suspended state of a disk by introducing the suspended boolean field in the scsi_disk structure. This flag is set to true when the disk is suspended is sd_suspend_common() and resumed with sd_resume(). When suspended is true, sd_shutdown() is not executed from sd_remove(). Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Martin K. Petersen --- drivers/scsi/sd.c | 17 +++++++++++++---- drivers/scsi/sd.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5a1b802d180f..83b6a3f3863b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3741,7 +3741,8 @@ static int sd_remove(struct device *dev) device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); - sd_shutdown(dev); + if (!sdkp->suspended) + sd_shutdown(dev); put_disk(sdkp->disk); return 0; @@ -3872,6 +3873,9 @@ static int sd_suspend_common(struct device *dev, bool runtime) ret = 0; } + if (!ret) + sdkp->suspended = true; + return ret; } @@ -3891,21 +3895,26 @@ static int sd_suspend_runtime(struct device *dev) static int sd_resume(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; + int ret = 0; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - if (!sd_do_start_stop(sdkp->device, runtime)) + if (!sd_do_start_stop(sdkp->device, runtime)) { + sdkp->suspended = false; return 0; + } if (!sdkp->device->no_start_on_resume) { sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); } - if (!ret) + if (!ret) { opal_unlock_from_suspend(sdkp->opal_dev); + sdkp->suspended = false; + } + return ret; } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5eea762f84d1..409dda5350d1 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -131,6 +131,7 @@ struct scsi_disk { u8 provisioning_mode; u8 zeroing_mode; u8 nr_actuators; /* Number of actuators */ + bool suspended; /* Disk is suspended (stopped) */ unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ -- cgit From ed518d9ba980dc0d27c7d1dea1e627ba001d1977 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Sep 2023 08:46:22 +0900 Subject: ata: libata-core: Fix compilation warning in ata_dev_config_ncq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 24 bytes length allocated to the ncq_desc string in ata_dev_config_lba() for ata_dev_config_ncq() to use is too short, causing the following gcc compilation warnings when compiling with W=1: drivers/ata/libata-core.c: In function ‘ata_dev_configure’: drivers/ata/libata-core.c:2378:56: warning: ‘%d’ directive output may be truncated writing between 1 and 2 bytes into a region of size between 1 and 11 [-Wformat-truncation=] 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, | ^~ In function ‘ata_dev_config_ncq’, inlined from ‘ata_dev_config_lba’ at drivers/ata/libata-core.c:2649:8, inlined from ‘ata_dev_configure’ at drivers/ata/libata-core.c:2952:9: drivers/ata/libata-core.c:2378:41: note: directive argument in the range [1, 32] 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, | ^~~~~~~~~~~~~~~~~~~~~ drivers/ata/libata-core.c:2378:17: note: ‘snprintf’ output between 16 and 31 bytes into a destination of size 24 2378 | snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2379 | ddepth, aa_desc); | ~~~~~~~~~~~~~~~~ Avoid these warnings and the potential truncation by changing the size of the ncq_desc string to 32 characters. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 261445c1851b..d8cc1e27a125 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2619,7 +2619,7 @@ static int ata_dev_config_lba(struct ata_device *dev) { const u16 *id = dev->id; const char *lba_desc; - char ncq_desc[24]; + char ncq_desc[32]; int ret; dev->flags |= ATA_DFLAG_LBA; -- cgit From 49728bdc702391902a473b9393f1620eea32acb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Sep 2023 09:08:40 +0900 Subject: ata: libata-eh: Fix compilation warning in ata_eh_link_report() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 6 bytes length of the tries_buf string in ata_eh_link_report() is too short and results in a gcc compilation warning with W-!: drivers/ata/libata-eh.c: In function ‘ata_eh_link_report’: drivers/ata/libata-eh.c:2371:59: warning: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 4 [-Wformat-truncation=] 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", | ^~ drivers/ata/libata-eh.c:2371:56: note: directive argument in the range [-2147483648, 4] 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", | ^~~~~~ drivers/ata/libata-eh.c:2371:17: note: ‘snprintf’ output between 4 and 14 bytes into a destination of size 6 2371 | snprintf(tries_buf, sizeof(tries_buf), " t%d", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2372 | ap->eh_tries); | ~~~~~~~~~~~~~ Avoid this warning by increasing the string size to 16B. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-eh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b1b2c276371e..5686353e442c 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2332,7 +2332,7 @@ static void ata_eh_link_report(struct ata_link *link) struct ata_eh_context *ehc = &link->eh_context; struct ata_queued_cmd *qc; const char *frozen, *desc; - char tries_buf[6] = ""; + char tries_buf[16] = ""; int tag, nr_failed = 0; if (ehc->i.flags & ATA_EHI_QUIET) -- cgit From 0d32a6bbb8e7bf503855f2990f1ccce0922db87b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Sep 2023 13:16:48 -0400 Subject: NFSD: Fix zero NFSv4 READ results when RQ_SPLICE_OK is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nfsd4_encode_readv() uses xdr->buf->page_len as a starting point for the nfsd_iter_read() sink buffer -- page_len is going to be offset by the parts of the COMPOUND that have already been encoded into xdr->buf->pages. However, that value must be captured /before/ xdr_reserve_space_vec() advances page_len by the expected size of the read payload. Otherwise, the whole front part of the first page of the payload in the reply will be uninitialized. Mantas hit this because sec=krb5i forces RQ_SPLICE_OK off, which invokes the readv part of the nfsd4_encode_read() path. Also, older Linux NFS clients appear to send shorter READ requests for files smaller than a page, whereas newer clients just send page-sized requests and let the server send as many bytes as are in the file. Reported-by: Mantas Mikulėnas Closes: https://lore.kernel.org/linux-nfs/f1d0b234-e650-0f6e-0f5d-126b3d51d1eb@gmail.com/ Fixes: 703d75215555 ("NFSD: Hoist rq_vec preparation into nfsd_read() [step two]") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2e40c74d2f72..92c7dde148a4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4113,6 +4113,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; + unsigned int base = xdr->buf->page_len & ~PAGE_MASK; unsigned int starting_len = xdr->buf->len; __be32 zero = xdr_zero; __be32 nfserr; @@ -4121,8 +4122,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, return nfserr_resource; nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file, - read->rd_offset, &maxcount, - xdr->buf->page_len & ~PAGE_MASK, + read->rd_offset, &maxcount, base, &read->rd_eof); read->rd_length = maxcount; if (nfserr) -- cgit From 3048102d9d68008e948decbd730f0748dd7bdc31 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Thu, 28 Sep 2023 21:48:52 +0800 Subject: erofs: update documentation - update new features like bloom filter and DEFLATE. - add documentation for the long xattr name prefixes, which was landed upstream since v6.4. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20230928134852.31118-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- Documentation/filesystems/erofs.rst | 40 ++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 4654ee57c1d5..f200d7874495 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -58,12 +58,14 @@ Here are the main features of EROFS: - Support extended attributes as an option; + - Support a bloom filter that speeds up negative extended attribute lookups; + - Support POSIX.1e ACLs by using extended attributes; - Support transparent data compression as an option: - LZ4 and MicroLZMA algorithms can be used on a per-file basis; In addition, - inplace decompression is also supported to avoid bounce compressed buffers - and page cache thrashing. + LZ4, MicroLZMA and DEFLATE algorithms can be used on a per-file basis; In + addition, inplace decompression is also supported to avoid bounce compressed + buffers and unnecessary page cache thrashing. - Support chunk-based data deduplication and rolling-hash compressed data deduplication; @@ -268,6 +270,38 @@ details.) By the way, chunk-based files are all uncompressed for now. +Long extended attribute name prefixes +------------------------------------- +There are use cases where extended attributes with different values can have +only a few common prefixes (such as overlayfs xattrs). The predefined prefixes +work inefficiently in both image size and runtime performance in such cases. + +The long xattr name prefixes feature is introduced to address this issue. The +overall idea is that, apart from the existing predefined prefixes, the xattr +entry could also refer to user-specified long xattr name prefixes, e.g. +"trusted.overlay.". + +When referring to a long xattr name prefix, the highest bit (bit 7) of +erofs_xattr_entry.e_name_index is set, while the lower bits (bit 0-6) as a whole +represent the index of the referred long name prefix among all long name +prefixes. Therefore, only the trailing part of the name apart from the long +xattr name prefix is stored in erofs_xattr_entry.e_name, which could be empty if +the full xattr name matches exactly as its long xattr name prefix. + +All long xattr prefixes are stored one by one in the packed inode as long as +the packed inode is valid, or in the meta inode otherwise. The +xattr_prefix_count (of the on-disk superblock) indicates the total number of +long xattr name prefixes, while (xattr_prefix_start * 4) indicates the start +offset of long name prefixes in the packed/meta inode. Note that, long extended +attribute name prefixes are disabled if xattr_prefix_count is 0. + +Each long name prefix is stored in the format: ALIGN({__le16 len, data}, 4), +where len represents the total size of the data part. The data part is actually +represented by 'struct erofs_xattr_long_prefix', where base_index represents the +index of the predefined xattr name prefix, e.g. EROFS_XATTR_INDEX_TRUSTED for +"trusted.overlay." long name prefix, while the infix string keeps the string +after stripping the short prefix, e.g. "overlay." for the example above. + Data compression ---------------- EROFS implements fixed-sized output compression which generates fixed-sized -- cgit From dd1b2026323a2d075ac553cecfd7a0c23c456c59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 19 Sep 2023 09:17:28 -0400 Subject: nfs: decrement nrequests counter before releasing the req I hit this panic in testing: [ 6235.500016] run fstests generic/464 at 2023-09-18 22:51:24 [ 6288.410761] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 6288.412174] #PF: supervisor read access in kernel mode [ 6288.413160] #PF: error_code(0x0000) - not-present page [ 6288.413992] PGD 0 P4D 0 [ 6288.414603] Oops: 0000 [#1] PREEMPT SMP PTI [ 6288.415419] CPU: 0 PID: 340798 Comm: kworker/u18:8 Not tainted 6.6.0-rc1-gdcf620ceebac #95 [ 6288.416538] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 [ 6288.417701] Workqueue: nfsiod rpc_async_release [sunrpc] [ 6288.418676] RIP: 0010:nfs_inode_remove_request+0xc8/0x150 [nfs] [ 6288.419836] Code: ff ff 48 8b 43 38 48 8b 7b 10 a8 04 74 5b 48 85 ff 74 56 48 8b 07 a9 00 00 08 00 74 58 48 8b 07 f6 c4 10 74 50 e8 c8 44 b3 d5 <48> 8b 00 f0 48 ff 88 30 ff ff ff 5b 5d 41 5c c3 cc cc cc cc 48 8b [ 6288.422389] RSP: 0018:ffffbd618353bda8 EFLAGS: 00010246 [ 6288.423234] RAX: 0000000000000000 RBX: ffff9a29f9a25280 RCX: 0000000000000000 [ 6288.424351] RDX: ffff9a29f9a252b4 RSI: 000000000000000b RDI: ffffef41448e3840 [ 6288.425345] RBP: ffffef41448e3840 R08: 0000000000000038 R09: ffffffffffffffff [ 6288.426334] R10: 0000000000033f80 R11: ffff9a2a7fffa000 R12: ffff9a29093f98c4 [ 6288.427353] R13: 0000000000000000 R14: ffff9a29230f62e0 R15: ffff9a29230f62d0 [ 6288.428358] FS: 0000000000000000(0000) GS:ffff9a2a77c00000(0000) knlGS:0000000000000000 [ 6288.429513] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6288.430427] CR2: 0000000000000000 CR3: 0000000264748002 CR4: 0000000000770ef0 [ 6288.431553] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6288.432715] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6288.433698] PKRU: 55555554 [ 6288.434196] Call Trace: [ 6288.434667] [ 6288.435132] ? __die+0x1f/0x70 [ 6288.435723] ? page_fault_oops+0x159/0x450 [ 6288.436389] ? try_to_wake_up+0x98/0x5d0 [ 6288.437044] ? do_user_addr_fault+0x65/0x660 [ 6288.437728] ? exc_page_fault+0x7a/0x180 [ 6288.438368] ? asm_exc_page_fault+0x22/0x30 [ 6288.439137] ? nfs_inode_remove_request+0xc8/0x150 [nfs] [ 6288.440112] ? nfs_inode_remove_request+0xa0/0x150 [nfs] [ 6288.440924] nfs_commit_release_pages+0x16e/0x340 [nfs] [ 6288.441700] ? __pfx_call_transmit+0x10/0x10 [sunrpc] [ 6288.442475] ? _raw_spin_lock_irqsave+0x23/0x50 [ 6288.443161] nfs_commit_release+0x15/0x40 [nfs] [ 6288.443926] rpc_free_task+0x36/0x60 [sunrpc] [ 6288.444741] rpc_async_release+0x29/0x40 [sunrpc] [ 6288.445509] process_one_work+0x171/0x340 [ 6288.446135] worker_thread+0x277/0x3a0 [ 6288.446724] ? __pfx_worker_thread+0x10/0x10 [ 6288.447376] kthread+0xf0/0x120 [ 6288.447903] ? __pfx_kthread+0x10/0x10 [ 6288.448500] ret_from_fork+0x2d/0x50 [ 6288.449078] ? __pfx_kthread+0x10/0x10 [ 6288.449665] ret_from_fork_asm+0x1b/0x30 [ 6288.450283] [ 6288.450688] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc nls_iso8859_1 nls_cp437 vfat fat 9p netfs ext4 kvm_intel crc16 mbcache jbd2 joydev kvm xfs irqbypass virtio_net pcspkr net_failover psmouse failover 9pnet_virtio cirrus drm_shmem_helper virtio_balloon drm_kms_helper button evdev drm loop dm_mod zram zsmalloc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha512_generic virtio_blk nvme aesni_intel crypto_simd cryptd nvme_core t10_pi i6300esb crc64_rocksoft_generic crc64_rocksoft crc64 virtio_pci virtio virtio_pci_legacy_dev virtio_pci_modern_dev virtio_ring serio_raw btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq autofs4 [ 6288.460211] CR2: 0000000000000000 [ 6288.460787] ---[ end trace 0000000000000000 ]--- [ 6288.461571] RIP: 0010:nfs_inode_remove_request+0xc8/0x150 [nfs] [ 6288.462500] Code: ff ff 48 8b 43 38 48 8b 7b 10 a8 04 74 5b 48 85 ff 74 56 48 8b 07 a9 00 00 08 00 74 58 48 8b 07 f6 c4 10 74 50 e8 c8 44 b3 d5 <48> 8b 00 f0 48 ff 88 30 ff ff ff 5b 5d 41 5c c3 cc cc cc cc 48 8b [ 6288.465136] RSP: 0018:ffffbd618353bda8 EFLAGS: 00010246 [ 6288.465963] RAX: 0000000000000000 RBX: ffff9a29f9a25280 RCX: 0000000000000000 [ 6288.467035] RDX: ffff9a29f9a252b4 RSI: 000000000000000b RDI: ffffef41448e3840 [ 6288.468093] RBP: ffffef41448e3840 R08: 0000000000000038 R09: ffffffffffffffff [ 6288.469121] R10: 0000000000033f80 R11: ffff9a2a7fffa000 R12: ffff9a29093f98c4 [ 6288.470109] R13: 0000000000000000 R14: ffff9a29230f62e0 R15: ffff9a29230f62d0 [ 6288.471106] FS: 0000000000000000(0000) GS:ffff9a2a77c00000(0000) knlGS:0000000000000000 [ 6288.472216] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6288.473059] CR2: 0000000000000000 CR3: 0000000264748002 CR4: 0000000000770ef0 [ 6288.474096] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6288.475097] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6288.476148] PKRU: 55555554 [ 6288.476665] note: kworker/u18:8[340798] exited with irqs disabled Once we've released "req", it's not safe to dereference it anymore. Decrement the nrequests counter before dropping the reference. Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8c1ee1a1a28f..7720b5e43014 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -802,8 +802,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { - nfs_release_request(req); atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests); + nfs_release_request(req); } } -- cgit From 684f7e6d28e8087502fc8efdb6c9fe82400479dd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 09:26:58 -0700 Subject: iomap: Spelling s/preceeding/preceding/g Fix a misspelling of "preceding". Signed-off-by: Geert Uytterhoeven Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 644479ccefbd..5db54ca29a35 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1049,7 +1049,7 @@ static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio, /* * Scan the data range passed to us for dirty page cache folios. If we find a - * dirty folio, punch out the preceeding range and update the offset from which + * dirty folio, punch out the preceding range and update the offset from which * the next punch will start from. * * We can punch out storage reservations under clean pages because they either -- cgit From e6e43b8aa7cd3c3af686caf0c2e11819a886d705 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Fri, 29 Sep 2023 00:44:13 +0700 Subject: fs/smb/client: Reset password pointer to NULL Forget to reset ctx->password to NULL will lead to bug like double free Cc: stable@vger.kernel.org Cc: Willy Tarreau Reviewed-by: Namjae Jeon Signed-off-by: Quang Le Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index e45ce31bbda7..a3493da12ad1 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1541,6 +1541,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_parse_mount_err: kfree_sensitive(ctx->password); + ctx->password = NULL; return -EINVAL; } -- cgit From fc09027786c900368de98d03d40af058bcb01ad9 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 23 Sep 2023 01:14:08 +0000 Subject: sched/rt: Fix live lock between select_fallback_rq() and RT push During RCU-boost testing with the TREE03 rcutorture config, I found that after a few hours, the machine locks up. On tracing, I found that there is a live lock happening between 2 CPUs. One CPU has an RT task running, while another CPU is being offlined which also has an RT task running. During this offlining, all threads are migrated. The migration thread is repeatedly scheduled to migrate actively running tasks on the CPU being offlined. This results in a live lock because select_fallback_rq() keeps picking the CPU that an RT task is already running on only to get pushed back to the CPU being offlined. It is anyway pointless to pick CPUs for pushing tasks to if they are being offlined only to get migrated away to somewhere else. This could also add unwanted latency to this task. Fix these issues by not selecting CPUs in RT if they are not 'active' for scheduling, using the cpu_active_mask. Other parts in core.c already use cpu_active_mask to prevent tasks from being put on CPUs going offline. With this fix I ran the tests for days and could not reproduce the hang. Without the patch, I hit it in a few hours. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Ingo Molnar Tested-by: Paul E. McKenney Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230923011409.3522762-1-joel@joelfernandes.org --- kernel/sched/cpupri.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index a286e726eb4b..42c40cfdf836 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -101,6 +101,7 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, if (lowest_mask) { cpumask_and(lowest_mask, &p->cpus_mask, vec->mask); + cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); /* * We have to ensure that we have at least one bit -- cgit From c6c2adcba50c2622ed25ba5d5e7f05f584711358 Mon Sep 17 00:00:00 2001 From: Haitao Huang Date: Thu, 27 Jul 2023 22:10:24 -0700 Subject: x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race The SGX EPC reclaimer (ksgxd) may reclaim the SECS EPC page for an enclave and set secs.epc_page to NULL. The SECS page is used for EAUG and ELDU in the SGX page fault handler. However, the NULL check for secs.epc_page is only done for ELDU, not EAUG before being used. Fix this by doing the same NULL check and reloading of the SECS page as needed for both EAUG and ELDU. The SECS page holds global enclave metadata. It can only be reclaimed when there are no other enclave pages remaining. At that point, virtually nothing can be done with the enclave until the SECS page is paged back in. An enclave can not run nor generate page faults without a resident SECS page. But it is still possible for a #PF for a non-SECS page to race with paging out the SECS page: when the last resident non-SECS page A triggers a #PF in a non-resident page B, and then page A and the SECS both are paged out before the #PF on B is handled. Hitting this bug requires that race triggered with a #PF for EAUG. Following is a trace when it happens. BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:sgx_encl_eaug_page+0xc7/0x210 Call Trace: ? __kmem_cache_alloc_node+0x16a/0x440 ? xa_load+0x6e/0xa0 sgx_vma_fault+0x119/0x230 __do_fault+0x36/0x140 do_fault+0x12f/0x400 __handle_mm_fault+0x728/0x1110 handle_mm_fault+0x105/0x310 do_user_addr_fault+0x1ee/0x750 ? __this_cpu_preempt_check+0x13/0x20 exc_page_fault+0x76/0x180 asm_exc_page_fault+0x27/0x30 Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave") Signed-off-by: Haitao Huang Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Reviewed-by: Kai Huang Acked-by: Reinette Chatre Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20230728051024.33063-1-haitao.huang%40linux.intel.com --- arch/x86/kernel/cpu/sgx/encl.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 91fa70e51004..279148e72459 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -235,6 +235,21 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page, return epc_page; } +/* + * Ensure the SECS page is not swapped out. Must be called with encl->lock + * to protect the enclave states including SECS and ensure the SECS page is + * not swapped out again while being used. + */ +static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl) +{ + struct sgx_epc_page *epc_page = encl->secs.epc_page; + + if (!epc_page) + epc_page = sgx_encl_eldu(&encl->secs, NULL); + + return epc_page; +} + static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl, struct sgx_encl_page *entry) { @@ -248,11 +263,9 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl, return entry; } - if (!(encl->secs.epc_page)) { - epc_page = sgx_encl_eldu(&encl->secs, NULL); - if (IS_ERR(epc_page)) - return ERR_CAST(epc_page); - } + epc_page = sgx_encl_load_secs(encl); + if (IS_ERR(epc_page)) + return ERR_CAST(epc_page); epc_page = sgx_encl_eldu(entry, encl->secs.epc_page); if (IS_ERR(epc_page)) @@ -339,6 +352,13 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, mutex_lock(&encl->lock); + epc_page = sgx_encl_load_secs(encl); + if (IS_ERR(epc_page)) { + if (PTR_ERR(epc_page) == -EBUSY) + vmret = VM_FAULT_NOPAGE; + goto err_out_unlock; + } + epc_page = sgx_alloc_epc_page(encl_page, false); if (IS_ERR(epc_page)) { if (PTR_ERR(epc_page) == -EBUSY) -- cgit From a52d4f657568d6458e873f74a9602e022afe666f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Sep 2023 09:23:27 -0600 Subject: io_uring/fs: remove sqe->rw_flags checking from LINKAT This is unionized with the actual link flags, so they can of course be set and they will be evaluated further down. If not we fail any LINKAT that has to set option flags. Fixes: cf30da90bc3a ("io_uring: add support for IORING_OP_LINKAT") Cc: stable@vger.kernel.org Reported-by: Thomas Leonard Link: https://github.com/axboe/liburing/issues/955 Signed-off-by: Jens Axboe --- io_uring/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/fs.c b/io_uring/fs.c index f6a69a549fd4..08e3b175469c 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -243,7 +243,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); const char __user *oldf, *newf; - if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; -- cgit From e80f238d2bc0c0f27dc52ac824ca80b938a43ace Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Fri, 29 Sep 2023 12:32:42 +0200 Subject: ASoC: core: Print component name when printing log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When printing log related to component it is useful to know, to which component it applies to. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230929103243.705433-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index cc442c52cdea..33eb5e2bb8bc 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1445,8 +1445,8 @@ static int soc_probe_component(struct snd_soc_card *card, if (component->card) { if (component->card != card) { dev_err(component->dev, - "Trying to bind component to card \"%s\" but is already bound to card \"%s\"\n", - card->name, component->card->name); + "Trying to bind component \"%s\" to card \"%s\" but is already bound to card \"%s\"\n", + component->name, card->name, component->card->name); return -ENODEV; } return 0; -- cgit From dd9f9cc1e6b9391140afa5cf27bb47c9e2a08d02 Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Fri, 29 Sep 2023 12:32:43 +0200 Subject: ASoC: core: Do not call link_exit() on uninitialized rtd objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On init we have sequence: for_each_card_prelinks(card, i, dai_link) { ret = snd_soc_add_pcm_runtime(card, dai_link); ret = init_some_other_things(...); if (ret) goto probe_end: for_each_card_rtds(card, rtd) { ret = soc_init_pcm_runtime(card, rtd); probe_end: while on exit: for_each_card_rtds(card, rtd) snd_soc_link_exit(rtd); If init_some_other_things() step fails due to error we end up with not fully setup rtds and try to call snd_soc_link_exit on them, which depending on contents on .link_exit handler, can end up dereferencing NULL pointer. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230929103243.705433-2-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index fa2337a3cf4c..37f9d3fe302a 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1126,6 +1126,8 @@ struct snd_soc_pcm_runtime { unsigned int pop_wait:1; unsigned int fe_compr:1; /* for Dynamic PCM */ + bool initialized; + int num_components; struct snd_soc_component *components[]; /* CPU/Codec/Platform */ }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 33eb5e2bb8bc..9de98c01d815 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1347,7 +1347,7 @@ static int soc_init_pcm_runtime(struct snd_soc_card *card, snd_soc_runtime_get_dai_fmt(rtd); ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); if (ret) - return ret; + goto err; /* add DPCM sysfs entries */ soc_dpcm_debugfs_add(rtd); @@ -1372,17 +1372,26 @@ static int soc_init_pcm_runtime(struct snd_soc_card *card, /* create compress_device if possible */ ret = snd_soc_dai_compress_new(cpu_dai, rtd, num); if (ret != -ENOTSUPP) - return ret; + goto err; /* create the pcm */ ret = soc_new_pcm(rtd, num); if (ret < 0) { dev_err(card->dev, "ASoC: can't create pcm %s :%d\n", dai_link->stream_name, ret); - return ret; + goto err; } - return snd_soc_pcm_dai_new(rtd); + ret = snd_soc_pcm_dai_new(rtd); + if (ret < 0) + goto err; + + rtd->initialized = true; + + return 0; +err: + snd_soc_link_exit(rtd); + return ret; } static void soc_set_name_prefix(struct snd_soc_card *card, @@ -1980,7 +1989,8 @@ static void soc_cleanup_card_resources(struct snd_soc_card *card) /* release machine specific resources */ for_each_card_rtds(card, rtd) - snd_soc_link_exit(rtd); + if (rtd->initialized) + snd_soc_link_exit(rtd); /* remove and free each DAI */ soc_remove_link_dais(card); soc_remove_link_components(card); -- cgit From 9b7177b1df64b8d7f85700027c324aadd6aded00 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 25 Sep 2023 20:52:58 -0700 Subject: bpf: tcp_read_skb needs to pop skb regardless of seq Before fix e5c6de5fa0258 tcp_read_skb() would increment the tp->copied-seq value. This (as described in the commit) would cause an error for apps because once that is incremented the application might believe there is no data to be read. Then some apps would stall or abort believing no data is available. However, the fix is incomplete because it introduces another issue in the skb dequeue. The loop does tcp_recv_skb() in a while loop to consume as many skbs as possible. The problem is the call is ... tcp_recv_skb(sk, seq, &offset) ... where 'seq' is: u32 seq = tp->copied_seq; Now we can hit a case where we've yet incremented copied_seq from BPF side, but then tcp_recv_skb() fails this test ... if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) ... so that instead of returning the skb we call tcp_eat_recv_skb() which frees the skb. This is because the routine believes the SKB has been collapsed per comment: /* This looks weird, but this can happen if TCP collapsing * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ This can't happen here we've unlinked the full SKB and orphaned it. Anyways it would confuse any BPF programs if the data were suddenly moved underneath it. To fix this situation do simpler operation and just skb_peek() the data of the queue followed by the unlink. It shouldn't need to check this condition and tcp_read_skb() reads entire skbs so there is no need to handle the 'offset!=0' case as we would see in tcp_read_sock(). Fixes: e5c6de5fa0258 ("bpf, sockmap: Incorrectly handling copied_seq") Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20230926035300.135096-2-john.fastabend@gmail.com --- net/ipv4/tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0c3040a63ebd..3f66cdeef7de 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1621,16 +1621,13 @@ EXPORT_SYMBOL(tcp_read_sock); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - struct tcp_sock *tp = tcp_sk(sk); - u32 seq = tp->copied_seq; struct sk_buff *skb; int copied = 0; - u32 offset; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; - while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { u8 tcp_flags; int used; @@ -1643,13 +1640,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) copied = used; break; } - seq += used; copied += used; - if (tcp_flags & TCPHDR_FIN) { - ++seq; + if (tcp_flags & TCPHDR_FIN) break; - } } return copied; } -- cgit From da9e915eaf5dadb1963b7738cdfa42ed55212445 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 25 Sep 2023 20:52:59 -0700 Subject: bpf, sockmap: Do not inc copied_seq when PEEK flag set When data is peek'd off the receive queue we shouldn't considered it copied from tcp_sock side. When we increment copied_seq this will confuse tcp_data_ready() because copied_seq can be arbitrarily increased. From application side it results in poll() operations not waking up when expected. Notice tcp stack without BPF recvmsg programs also does not increment copied_seq. We broke this when we moved copied_seq into recvmsg to only update when actual copy was happening. But, it wasn't working correctly either before because the tcp_data_ready() tried to use the copied_seq value to see if data was read by user yet. See fixes tags. Fixes: e5c6de5fa0258 ("bpf, sockmap: Incorrectly handling copied_seq") Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20230926035300.135096-3-john.fastabend@gmail.com --- net/ipv4/tcp_bpf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 81f0dff69e0b..327268203001 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, int *addr_len) { struct tcp_sock *tcp = tcp_sk(sk); + int peek = flags & MSG_PEEK; u32 seq = tcp->copied_seq; struct sk_psock *psock; int copied = 0; @@ -311,7 +312,8 @@ msg_bytes_ready: copied = -EAGAIN; } out: - WRITE_ONCE(tcp->copied_seq, seq); + if (!peek) + WRITE_ONCE(tcp->copied_seq, seq); tcp_rcv_space_adjust(sk); if (copied > 0) __tcp_cleanup_rbuf(sk, copied); -- cgit From 5f405c0c0c4651b991c109cf9be33bb996af098e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 25 Sep 2023 20:53:00 -0700 Subject: bpf, sockmap: Add tests for MSG_F_PEEK Test that we can read with MSG_F_PEEK and then still get correct number of available bytes through FIONREAD. The recv() (without PEEK) then returns the bytes as expected. The recv() always worked though because it was just the available byte reporting that was broke before latest fixes. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20230926035300.135096-4-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 064cc5e8d9ad..dda7060e86a0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -475,6 +475,55 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_peek(void) +{ + int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail; + struct test_sockmap_pass_prog *pass; + char snd[256] = "0123456789"; + char rcv[256] = "0"; + + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + s = socket_loopback(AF_INET, SOCK_STREAM); + if (!ASSERT_GT(s, -1, "socket_loopback(s)")) + goto out; + + err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pairs(s)")) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + + sent = xsend(p1, snd, sizeof(snd), 0); + ASSERT_EQ(sent, sizeof(snd), "xsend(p1)"); + recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK); + ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)"); + recvd = recv(c1, rcv, sizeof(rcv), 0); + ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_pass_prog__destroy(pass); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -515,4 +564,6 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict msg_f_peek")) + test_sockmap_skb_verdict_peek(); } -- cgit From b80e31baa43614e086a9d29dc1151932b1bd7fc5 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 20 Sep 2023 12:20:55 +0200 Subject: bpf, sockmap: Reject sk_msg egress redirects to non-TCP sockets With a SOCKMAP/SOCKHASH map and an sk_msg program user can steer messages sent from one TCP socket (s1) to actually egress from another TCP socket (s2): tcp_bpf_sendmsg(s1) // = sk_prot->sendmsg tcp_bpf_send_verdict(s1) // __SK_REDIRECT case tcp_bpf_sendmsg_redir(s2) tcp_bpf_push_locked(s2) tcp_bpf_push(s2) tcp_rate_check_app_limited(s2) // expects tcp_sock tcp_sendmsg_locked(s2) // ditto There is a hard-coded assumption in the call-chain, that the egress socket (s2) is a TCP socket. However in commit 122e6c79efe1 ("sock_map: Update sock type checks for UDP") we have enabled redirects to non-TCP sockets. This was done for the sake of BPF sk_skb programs. There was no indention to support sk_msg send-to-egress use case. As a result, attempts to send-to-egress through a non-TCP socket lead to a crash due to invalid downcast from sock to tcp_sock: BUG: kernel NULL pointer dereference, address: 000000000000002f ... Call Trace: ? show_regs+0x60/0x70 ? __die+0x1f/0x70 ? page_fault_oops+0x80/0x160 ? do_user_addr_fault+0x2d7/0x800 ? rcu_is_watching+0x11/0x50 ? exc_page_fault+0x70/0x1c0 ? asm_exc_page_fault+0x27/0x30 ? tcp_tso_segs+0x14/0xa0 tcp_write_xmit+0x67/0xce0 __tcp_push_pending_frames+0x32/0xf0 tcp_push+0x107/0x140 tcp_sendmsg_locked+0x99f/0xbb0 tcp_bpf_push+0x19d/0x3a0 tcp_bpf_sendmsg_redir+0x55/0xd0 tcp_bpf_send_verdict+0x407/0x550 tcp_bpf_sendmsg+0x1a1/0x390 inet_sendmsg+0x6a/0x70 sock_sendmsg+0x9d/0xc0 ? sockfd_lookup_light+0x12/0x80 __sys_sendto+0x10e/0x160 ? syscall_enter_from_user_mode+0x20/0x60 ? __this_cpu_preempt_check+0x13/0x20 ? lockdep_hardirqs_on+0x82/0x110 __x64_sys_sendto+0x1f/0x30 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Reject selecting a non-TCP sockets as redirect target from a BPF sk_msg program to prevent the crash. When attempted, user will receive an EACCES error from send/sendto/sendmsg() syscall. Fixes: 122e6c79efe1 ("sock_map: Update sock type checks for UDP") Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230920102055.42662-1-jakub@cloudflare.com --- net/core/sock_map.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index cb11750b1df5..4292c2ed1828 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -668,6 +668,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg, sk = __sock_map_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; @@ -1267,6 +1269,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg, sk = __sock_hash_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; -- cgit From 7f949f6f54ff593123ab95b6247bfa4542a65580 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 25 Sep 2023 11:22:32 +0800 Subject: perf/arm-cmn: Fix the unhandled overflow status of counter 4 to 7 The register por_dt_pmovsr Bits[7:0] indicates overflow from counters 7 to 0. But in arm_cmn_handle_irq(), only handled the overflow status of Bits[3:0] which results in unhandled overflow status of counters 4 to 7. So let the overflow status of DTC counters 4 to 7 to be handled. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Signed-off-by: Jing Zhang Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1695612152-123633-1-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 913dc04b3a40..6b50bc551984 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1972,7 +1972,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) u64 delta; int i; - for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) { + for (i = 0; i < CMN_DT_NUM_COUNTERS; i++) { if (status & (1U << i)) { ret = IRQ_HANDLED; if (WARN_ON(!dtc->counters[i])) -- cgit From a654a69b9f9c06b2e56387d0b99f0e3e6b0ff4ef Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 21 Sep 2023 14:41:51 -0500 Subject: arm64: Add Cortex-A520 CPU part definition Add the CPU Part number for the new Arm design. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230921194156.1050055-1-robh@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5f6f84837a49..74d00feb62f0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 @@ -148,6 +149,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) -- cgit From 471470bc7052d28ce125901877dd10e4c048e513 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 21 Sep 2023 14:41:52 -0500 Subject: arm64: errata: Add Cortex-A520 speculative unprivileged load workaround Implement the workaround for ARM Cortex-A520 erratum 2966298. On an affected Cortex-A520 core, a speculatively executed unprivileged load might leak data from a privileged load via a cache side channel. The issue only exists for loads within a translation regime with the same translation (e.g. same ASID and VMID). Therefore, the issue only affects the return to EL0. The workaround is to execute a TLBI before returning to EL0 after all loads of privileged data. A non-shareable TLBI to any address is sufficient. The workaround isn't necessary if page table isolation (KPTI) is enabled, but for simplicity it will be. Page table isolation should normally be disabled for Cortex-A520 as it supports the CSV3 feature and the E0PD feature (used when KASLR is enabled). Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230921194156.1050055-2-robh@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 13 +++++++++++++ arch/arm64/kernel/cpu_errata.c | 8 ++++++++ arch/arm64/kernel/entry.S | 4 ++++ arch/arm64/tools/cpucaps | 1 + 5 files changed, 28 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index e96f057ea2a0..f47f63bcf67c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -71,6 +71,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b10515c0200b..78f20e632712 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1037,6 +1037,19 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_ERRATUM_2966298 + bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + default y + help + This option adds the workaround for ARM Cortex-A520 erratum 2966298. + + On an affected Cortex-A520 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index be66e94a21bd..5706e74c5578 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -730,6 +730,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + { + .desc = "ARM erratum 2966298", + .capability = ARM64_WORKAROUND_2966298, + /* Cortex-A520 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + }, +#endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 { .desc = "AmpereOne erratum AC03_CPU_38", diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad61de03d0a..a6030913cd58 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,6 +428,10 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 +alternative_if ARM64_WORKAROUND_2966298 + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c3f06fdef609..dea3dc89234b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,6 +84,7 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 +WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE -- cgit From 089667aaaa6aa33715d22b21a72216b43af3e896 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Aug 2023 17:28:13 +0200 Subject: phy: realtek: Realtek PHYs should depend on ARCH_REALTEK The Realtek SoC USB2 and USB3 PHY Transceivers are only present on Realtek Digital Home Center (DHC) RTD series SoCs. Hence add a dependency on ARCH_REALTEK, to prevent asking the user about these drivers when configuring a kernel without Realtek SoC support. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2892527cac9af6fa8f5e7b8daeffd7d4351fde68.1692113167.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/realtek/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig index 650e20ed69af..75ac7e7c31ae 100644 --- a/drivers/phy/realtek/Kconfig +++ b/drivers/phy/realtek/Kconfig @@ -2,6 +2,9 @@ # # Phy drivers for Realtek platforms # + +if ARCH_REALTEK || COMPILE_TEST + config PHY_RTK_RTD_USB2PHY tristate "Realtek RTD USB2 PHY Transceiver Driver" depends on USB_SUPPORT @@ -25,3 +28,5 @@ config PHY_RTK_RTD_USB3PHY The DHC (digital home center) RTD series SoCs used the Synopsys DWC3 USB IP. This driver will do the PHY initialization of the parameters. + +endif # ARCH_REALTEK || COMPILE_TEST -- cgit From 6a878a54d0053ef21f3b829dc267487c2302b012 Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Tue, 19 Sep 2023 15:59:48 +0530 Subject: PCI: qcom: Fix IPQ8074 enumeration PARF_SLV_ADDR_SPACE_SIZE_2_3_3 is used by qcom_pcie_post_init_2_3_3(). This PCIe slave address space size register offset is 0x358 but was incorrectly changed to 0x16c by 39171b33f652 ("PCI: qcom: Remove PCIE20_ prefix from register definitions"). This prevented access to slave address space registers like iATU, etc., so the IPQ8074 PCIe controller was not enumerated. Revert back to the correct 0x358 offset and remove the unused PARF_SLV_ADDR_SPACE_SIZE_2_3_3. Fixes: 39171b33f652 ("PCI: qcom: Remove PCIE20_ prefix from register definitions") Link: https://lore.kernel.org/r/20230919102948.1844909-1-quic_srichara@quicinc.com Tested-by: Robert Marko Signed-off-by: Sricharan Ramabadhran [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Cc: stable@vger.kernel.org # v6.4+ --- drivers/pci/controller/dwc/pcie-qcom.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index e2f29404c84e..64420ecc24d1 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -43,7 +43,6 @@ #define PARF_PHY_REFCLK 0x4c #define PARF_CONFIG_BITS 0x50 #define PARF_DBI_BASE_ADDR 0x168 -#define PARF_SLV_ADDR_SPACE_SIZE_2_3_3 0x16c /* Register offset specific to IP ver 2.3.3 */ #define PARF_MHI_CLOCK_RESET_CTRL 0x174 #define PARF_AXI_MSTR_WR_ADDR_HALT 0x178 #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 @@ -797,8 +796,7 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); u32 val; - writel(SLV_ADDR_SPACE_SZ, - pcie->parf + PARF_SLV_ADDR_SPACE_SIZE_2_3_3); + writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE); val = readl(pcie->parf + PARF_PHY_CTRL); val &= ~PHY_TEST_PWR_DOWN; -- cgit From b0b88a585c27834223d2daf47faa57ca06dd9414 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 28 Sep 2023 09:06:52 +0200 Subject: MAINTAINERS: Fix Florian Fainelli's email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 31345a0f5901 ("MAINTAINERS: Replace my email address") added 13 instances of ...@broadcom.com and one of only ...@broadcom. I didn't double check if Broadcom really owns that TLD, but git send-email doesn't accept it, so add ".com" to that one bogous(?) instance. Fixes: 31345a0f5901 ("MAINTAINERS: Replace my email address") Signed-off-by: Uwe Kleine-König Acked-by: Florian Fainelli Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d52634553ddc..ed77216bb0e5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4082,7 +4082,7 @@ F: drivers/net/wireless/broadcom/brcm80211/ BROADCOM BRCMSTB GPIO DRIVER M: Doug Berger -M: Florian Fainelli +M: Florian Fainelli R: Broadcom internal kernel review list S: Supported F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml -- cgit From 33efa29e825636a06d5711c9dfccf92726d2fc81 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Fri, 29 Sep 2023 10:10:48 -0700 Subject: PCI: of_property: Handle interrupt parsing failures of_pci_prop_intr_map() uses uninitialized addr_sz[] values if of_irq_parse_raw() fails, which leads to intermittent crashes. Clear addr_sz[] before use so we never use uninitialized elements. If no valid IRQs are parsed, don't bother adding the interrupt-map property. Fixes: 407d1a51921e ("PCI: Create device tree node for bridge") Link: https://lore.kernel.org/r/1696007448-42127-1-git-send-email-lizhi.hou@amd.com Reported-by: Jonathan Cameron Closes: https://lore.kernel.org/all/20230911154856.000076c3@Huawei.com/ Reported-by: Herve Codina Closes: https://lore.kernel.org/all/20230911171319.495bb837@bootlin.com/ Signed-off-by: Lizhi Hou [bhelgaas: commit log, add similar report from Herve] Signed-off-by: Bjorn Helgaas Reviewed-by: Herve Codina --- drivers/pci/of_property.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c index 710ec35ba4a1..c2c7334152bc 100644 --- a/drivers/pci/of_property.c +++ b/drivers/pci/of_property.c @@ -186,8 +186,8 @@ static int of_pci_prop_interrupts(struct pci_dev *pdev, static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs, struct device_node *np) { + u32 i, addr_sz[OF_PCI_MAX_INT_PIN] = { 0 }, map_sz = 0; struct of_phandle_args out_irq[OF_PCI_MAX_INT_PIN]; - u32 i, addr_sz[OF_PCI_MAX_INT_PIN], map_sz = 0; __be32 laddr[OF_PCI_ADDRESS_CELLS] = { 0 }; u32 int_map_mask[] = { 0xffff00, 0, 0, 7 }; struct device_node *pnode; @@ -213,33 +213,44 @@ static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs, out_irq[i].args[0] = pin; ret = of_irq_parse_raw(laddr, &out_irq[i]); if (ret) { - pci_err(pdev, "parse irq %d failed, ret %d", pin, ret); + out_irq[i].np = NULL; + pci_dbg(pdev, "parse irq %d failed, ret %d", pin, ret); continue; } - ret = of_property_read_u32(out_irq[i].np, "#address-cells", - &addr_sz[i]); - if (ret) - addr_sz[i] = 0; + of_property_read_u32(out_irq[i].np, "#address-cells", + &addr_sz[i]); } list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) { i = pci_swizzle_interrupt_pin(child, pin) - 1; + if (!out_irq[i].np) + continue; map_sz += 5 + addr_sz[i] + out_irq[i].args_count; } } + /* + * Parsing interrupt failed for all pins. In this case, it does not + * need to generate interrupt-map property. + */ + if (!map_sz) + return 0; + int_map = kcalloc(map_sz, sizeof(u32), GFP_KERNEL); mapp = int_map; list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) { + i = pci_swizzle_interrupt_pin(child, pin) - 1; + if (!out_irq[i].np) + continue; + *mapp = (child->bus->number << 16) | (child->devfn << 8); mapp += OF_PCI_ADDRESS_CELLS; *mapp = pin; mapp++; - i = pci_swizzle_interrupt_pin(child, pin) - 1; *mapp = out_irq[i].np->phandle; mapp++; if (addr_sz[i]) { -- cgit From f69977404700a3d33b1c4b492c2a44f17cb07af5 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Fri, 29 Sep 2023 10:10:17 -0700 Subject: PCI: of: Destroy changeset when adding PCI device node fails Previously of_pci_make_dev_node() leaked a cset if it failed to create a device node for the PCI device with of_changeset_create_node(). Destroy the cset if of_changeset_create_node() fails. Fixes: 407d1a51921e ("PCI: Create device tree node for bridge") Link: https://lore.kernel.org/r/1696007417-42059-1-git-send-email-lizhi.hou@amd.com Reported-by: Herve Codina Closes: https://lore.kernel.org/all/20230911171319.495bb837@bootlin.com/ Signed-off-by: Lizhi Hou [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Herve Codina --- drivers/pci/of.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/pci/of.c b/drivers/pci/of.c index 2af64bcb7da3..51e3dd0ea5ab 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -657,30 +657,33 @@ void of_pci_make_dev_node(struct pci_dev *pdev) cset = kmalloc(sizeof(*cset), GFP_KERNEL); if (!cset) - goto failed; + goto out_free_name; of_changeset_init(cset); np = of_changeset_create_node(cset, ppnode, name); if (!np) - goto failed; - np->data = cset; + goto out_destroy_cset; ret = of_pci_add_properties(pdev, cset, np); if (ret) - goto failed; + goto out_free_node; ret = of_changeset_apply(cset); if (ret) - goto failed; + goto out_free_node; + np->data = cset; pdev->dev.of_node = np; kfree(name); return; -failed: - if (np) - of_node_put(np); +out_free_node: + of_node_put(np); +out_destroy_cset: + of_changeset_destroy(cset); + kfree(cset); +out_free_name: kfree(name); } #endif -- cgit From c82458101d5490230d735caecce14c9c27b1010c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 18 Sep 2023 08:30:41 +0300 Subject: PCI/PM: Mark devices disconnected if upstream PCIe link is down on resume Mark Blakeney reported that when suspending system with a Thunderbolt dock connected and then unplugging the dock before resume (which is pretty normal flow with laptops), resuming takes long time. What happens is that the PCIe link from the root port to the PCIe switch inside the Thunderbolt device does not train (as expected, the link is unplugged): pcieport 0000:00:07.2: restoring config space at offset 0x24 (was 0x3bf12001, writing 0x3bf12001) pcieport 0000:00:07.0: waiting 100 ms for downstream link pcieport 0000:01:00.0: not ready 1023ms after resume; giving up However, at this point we still try to resume the devices below that unplugged link: pcieport 0000:01:00.0: Unable to change power state from D3cold to D0, device inaccessible ... pcieport 0000:01:00.0: restoring config space at offset 0x38 (was 0xffffffff, writing 0x0) ... pcieport 0000:02:02.0: waiting 100 ms for downstream link, after activation And this is the link from PCIe switch downstream port to the xHCI on the dock: xhci_hcd 0000:03:00.0: not ready 65535ms after resume; giving up xhci_hcd 0000:03:00.0: Unable to change power state from D3cold to D0, device inaccessible xhci_hcd 0000:03:00.0: restoring config space at offset 0x3c (was 0xffffffff, writing 0x1ff) This ends up slowing down the resume time considerably. For this reason mark these devices as disconnected if the link above them did not train properly. Fixes: e8b908146d44 ("PCI/PM: Increase wait time after resume") Link: https://lore.kernel.org/r/20230918053041.1018876-1-mika.westerberg@linux.intel.com Reported-by: Mark Blakeney Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217915 Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.4+ --- drivers/pci/pci-driver.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a79c110c7e51..51ec9e7e784f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -572,7 +572,19 @@ static void pci_pm_default_resume_early(struct pci_dev *pci_dev) static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev) { - pci_bridge_wait_for_secondary_bus(pci_dev, "resume"); + int ret; + + ret = pci_bridge_wait_for_secondary_bus(pci_dev, "resume"); + if (ret) { + /* + * The downstream link failed to come up, so mark the + * devices below as disconnected to make sure we don't + * attempt to resume them. + */ + pci_walk_bus(pci_dev->subordinate, pci_dev_set_disconnected, + NULL); + return; + } /* * When powering on a bridge from D3cold, the whole hierarchy may be -- cgit From f9b0e1088bbf35933e25c839b75094039059b3be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Sep 2023 22:41:20 +0200 Subject: bpf, mprog: Fix maximum program check on mprog attachment After Paul's recent improvement to syzkaller to improve coverage for bpf_mprog and tcx, it hit a splat that the program limit was surpassed. What happened is that the maximum number of progs got added, followed by another prog add request which adds with BPF_F_BEFORE flag relative to the last program in the array. The idx >= bpf_mprog_max() check in bpf_mprog_attach() still passes because the index is below the maximum but the maximum will be surpassed. We need to add a check upfront for insertions to catch this situation. Fixes: 053c8e1f235d ("bpf: Add generic attach/detach/query API for multi-progs") Reported-by: syzbot+baa44e3dbbe48e05c1ad@syzkaller.appspotmail.com Reported-by: syzbot+b97d20ed568ce0951a06@syzkaller.appspotmail.com Reported-by: syzbot+2558ca3567a77b7af4e3@syzkaller.appspotmail.com Co-developed-by: Nikolay Aleksandrov Signed-off-by: Nikolay Aleksandrov Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Tested-by: syzbot+baa44e3dbbe48e05c1ad@syzkaller.appspotmail.com Tested-by: syzbot+b97d20ed568ce0951a06@syzkaller.appspotmail.com Link: https://github.com/google/syzkaller/pull/4207 Link: https://lore.kernel.org/bpf/20230929204121.20305-1-daniel@iogearbox.net --- kernel/bpf/mprog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index 32d2c4829eb8..007d98c799e2 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -253,6 +253,9 @@ int bpf_mprog_attach(struct bpf_mprog_entry *entry, goto out; } idx = tidx; + } else if (bpf_mprog_total(entry) == bpf_mprog_max()) { + ret = -ERANGE; + goto out; } if (flags & BPF_F_BEFORE) { tidx = bpf_mprog_pos_before(entry, &rtuple); -- cgit From d1a783daa443d34e2f39811573ac9cbb5e5b78f3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Sep 2023 22:41:21 +0200 Subject: selftest/bpf: Add various selftests for program limits Add various tests to check maximum number of supported programs being attached: # ./vmtest.sh -- ./test_progs -t tc_opts [...] ./test_progs -t tc_opts [ 1.185325] bpf_testmod: loading out-of-tree module taints kernel. [ 1.186826] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel [ 1.270123] tsc: Refined TSC clocksource calibration: 3407.988 MHz [ 1.272428] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fc932722, max_idle_ns: 440795381586 ns [ 1.276408] clocksource: Switched to clocksource tsc #252 tc_opts_after:OK #253 tc_opts_append:OK #254 tc_opts_basic:OK #255 tc_opts_before:OK #256 tc_opts_chain_classic:OK #257 tc_opts_chain_mixed:OK #258 tc_opts_delete_empty:OK #259 tc_opts_demixed:OK #260 tc_opts_detach:OK #261 tc_opts_detach_after:OK #262 tc_opts_detach_before:OK #263 tc_opts_dev_cleanup:OK #264 tc_opts_invalid:OK #265 tc_opts_max:OK <--- (new test) #266 tc_opts_mixed:OK #267 tc_opts_prepend:OK #268 tc_opts_replace:OK #269 tc_opts_revision:OK Summary: 18/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230929204121.20305-2-daniel@iogearbox.net --- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 7a2ecd4eca5d..99af79ea21a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2378,3 +2378,87 @@ void serial_test_tc_opts_chain_mixed(void) test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); } + +static int generate_dummy_prog(void) +{ + const struct bpf_insn prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, opts); + const size_t log_buf_sz = 256; + char *log_buf; + int fd = -1; + + log_buf = malloc(log_buf_sz); + if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) + return fd; + opts.log_buf = log_buf; + opts.log_size = log_buf_sz; + + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL", + prog_insns, prog_insn_cnt, &opts); + ASSERT_STREQ(log_buf, "", "log_0"); + ASSERT_GE(fd, 0, "prog_fd"); + free(log_buf); + return fd; +} + +static void test_tc_opts_max_target(int target, int flags, bool relative) +{ + int err, ifindex, i, prog_fd, last_fd = -1; + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + const int max_progs = 63; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + for (i = 0; i < max_progs; i++) { + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count_ifindex(ifindex, target, i + 1); + if (i == max_progs - 1 && relative) + last_fd = prog_fd; + else + close(prog_fd); + } + + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + opta.flags = flags; + if (last_fd > 0) + opta.relative_fd = last_fd; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_64_attach"); + assert_mprog_count_ifindex(ifindex, target, max_progs); + close(prog_fd); +cleanup: + if (last_fd > 0) + close(last_fd); + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_opts_max(void) +{ + test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false); +} -- cgit From 7c3151585730b7095287be8162b846d31e6eee61 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Thu, 7 Sep 2023 11:18:08 +1000 Subject: fs: binfmt_elf_efpic: fix personality for ELF-FDPIC The elf-fdpic loader hard sets the process personality to either PER_LINUX_FDPIC for true elf-fdpic binaries or to PER_LINUX for normal ELF binaries (in this case they would be constant displacement compiled with -pie for example). The problem with that is that it will lose any other bits that may be in the ELF header personality (such as the "bug emulation" bits). On the ARM architecture the ADDR_LIMIT_32BIT flag is used to signify a normal 32bit binary - as opposed to a legacy 26bit address binary. This matters since start_thread() will set the ARM CPSR register as required based on this flag. If the elf-fdpic loader loses this bit the process will be mis-configured and crash out pretty quickly. Modify elf-fdpic loader personality setting so that it preserves the upper three bytes by using the SET_PERSONALITY macro to set it. This macro in the generic case sets PER_LINUX and preserves the upper bytes. Architectures can override this for their specific use case, and ARM does exactly this. The problem shows up quite easily running under qemu using the ARM architecture, but not necessarily on all types of real ARM hardware. If the underlying ARM processor does not support the legacy 26-bit addressing mode then everything will work as expected. Link: https://lkml.kernel.org/r/20230907011808.2985083-1-gerg@kernel.org Fixes: 1bde925d23547 ("fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries") Signed-off-by: Greg Ungerer Cc: Al Viro Cc: Christian Brauner Cc: Eric W. Biederman Cc: Greg Ungerer Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- fs/binfmt_elf_fdpic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 43b2a2851ba3..206812ce544a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -345,10 +345,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* there's now no turning back... the old userspace image is dead, * defunct, deceased, etc. */ + SET_PERSONALITY(exec_params.hdr); if (elf_check_fdpic(&exec_params.hdr)) - set_personality(PER_LINUX_FDPIC); - else - set_personality(PER_LINUX); + current->personality |= PER_LINUX_FDPIC; if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; -- cgit From a501a0703044f00180d7697b32cacd7ff46d02d8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 20 Sep 2023 04:53:35 +0100 Subject: mm: report success more often from filemap_map_folio_range() Even though we had successfully mapped the relevant page, we would rarely return success from filemap_map_folio_range(). That leads to falling back from the VMA lock path to the mmap_lock path, which is a speed & scalability issue. Found by inspection. Link: https://lkml.kernel.org/r/20230920035336.854212-1-willy@infradead.org Fixes: 617c28ecab22 ("filemap: batch PTE mappings") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Yin Fengwei Cc: Dave Hansen Cc: David Hildenbrand Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- mm/filemap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 4ea4387053e8..f0a15ce1bd1b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3503,7 +3503,7 @@ skip: if (count) { set_pte_range(vmf, folio, page, count, addr); folio_ref_add(folio, count); - if (in_range(vmf->address, addr, count)) + if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; } @@ -3517,7 +3517,7 @@ skip: if (count) { set_pte_range(vmf, folio, page, count, addr); folio_ref_add(folio, count); - if (in_range(vmf->address, addr, count)) + if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; } -- cgit From ce60f27bb62dfeb1bf827350520f34abc84e0933 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 20 Sep 2023 05:09:58 +0100 Subject: mm: abstract moving to the next PFN In order to fix the L1TF vulnerability, x86 can invert the PTE bits for PROT_NONE VMAs, which means we cannot move from one PTE to the next by adding 1 to the PFN field of the PTE. This results in the BUG reported at [1]. Abstract advancing the PTE to the next PFN through a pte_next_pfn() function/macro. Link: https://lkml.kernel.org/r/20230920040958.866520-1-willy@infradead.org Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: syzbot+55cc72f8cc3a549119df@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/000000000000d099fa0604f03351@google.com [1] Reviewed-by: Yin Fengwei Cc: Dave Hansen Cc: David Hildenbrand Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 8 ++++++++ include/linux/pgtable.h | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d6ad98ca1288..e02b179ec659 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -955,6 +955,14 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } +static inline pte_t pte_next_pfn(pte_t pte) +{ + if (__pte_needs_invert(pte_val(pte))) + return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#define pte_next_pfn pte_next_pfn + static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1fba072b3dac..af7639c3b0a3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -206,6 +206,14 @@ static inline int pmd_young(pmd_t pmd) #endif #ifndef set_ptes + +#ifndef pte_next_pfn +static inline pte_t pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#endif + /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -231,7 +239,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, if (--nr == 0) break; ptep++; - pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + pte = pte_next_pfn(pte); } arch_leave_lazy_mmu_mode(); } -- cgit From 7ee29facd8a9c5a26079148e36bcf07141b3a6bc Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Sep 2023 23:17:31 +0900 Subject: nilfs2: fix potential use after free in nilfs_gccache_submit_read_data() In nilfs_gccache_submit_read_data(), brelse(bh) is called to drop the reference count of bh when the call to nilfs_dat_translate() fails. If the reference count hits 0 and its owner page gets unlocked, bh may be freed. However, bh->b_page is dereferenced to put the page after that, which may result in a use-after-free bug. This patch moves the release operation after unlocking and putting the page. NOTE: The function in question is only called in GC, and in combination with current userland tools, address translation using DAT does not occur in that function, so the code path that causes this issue will not be executed. However, it is possible to run that code path by intentionally modifying the userland GC library or by calling the GC ioctl directly. [konishi.ryusuke@gmail.com: NOTE added to the commit log] Link: https://lkml.kernel.org/r/1543201709-53191-1-git-send-email-bianpan2016@163.com Link: https://lkml.kernel.org/r/20230921141731.10073-1-konishi.ryusuke@gmail.com Fixes: a3d93f709e89 ("nilfs2: block cache for garbage collection") Signed-off-by: Pan Bian Reported-by: Ferry Meng Closes: https://lkml.kernel.org/r/20230818092022.111054-1-mengferry@linux.alibaba.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/gcinode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 48fe71d309cb..8beb2730929d 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); - if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ - brelse(bh); + if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */ goto failed; - } } lock_buffer(bh); @@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, failed: unlock_page(bh->b_page); put_page(bh->b_page); + if (unlikely(err)) + brelse(bh); return err; } -- cgit From 5c590804b6b0ff933ed4e5cee5d76de3a5048d9f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:35 -0400 Subject: maple_tree: add mas_is_active() to detect in-tree walks Patch series "maple_tree: Fix mas_prev() state regression". Pedro Falcato retported an mprotect regression [1] which was bisected back to the iterator changes for maple tree. Root cause analysis showed the mas_prev() running off the end of the VMA space (previous from 0) followed by mas_find(), would skip the first value. This patchset introduces maple state underflow/overflow so the sequence of calls on the maple state will return what the user expects. Users who encounter this bug may see mprotect(), userfaultfd_register(), and mlock() fail on VMAs mapped with address 0. This patch (of 2): Instead of constantly checking each possibility of the maple state, create a fast path that will skip over checking unlikely states. Link: https://lkml.kernel.org/r/20230921181236.509072-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230921181236.509072-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e41c70ac7744..f66f5f78f8cf 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -511,6 +511,15 @@ static inline bool mas_is_paused(const struct ma_state *mas) return mas->node == MAS_PAUSE; } +/* Check if the mas is pointing to a node or not */ +static inline bool mas_is_active(struct ma_state *mas) +{ + if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) + return true; + + return false; +} + /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. -- cgit From a8091f039c1ebf5cb0d5261e3613f18eb2a5d8b7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:36 -0400 Subject: maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states When updating the maple tree iterator to avoid rewalks, an issue was introduced when shifting beyond the limits. This can be seen by trying to go to the previous address of 0, which would set the maple node to MAS_NONE and keep the range as the last entry. Subsequent calls to mas_find() would then search upwards from mas->last and skip the value at mas->index/mas->last. This showed up as a bug in mprotect which skips the actual VMA at the current range after attempting to go to the previous VMA from 0. Since MAS_NONE may already be set when searching for a value that isn't contained within a node, changing the handling of MAS_NONE in mas_find() would make the code more complicated and error prone. Furthermore, there was no way to tell which limit was hit, and thus which action to take (next or the entry at the current range). This solution is to add two states to track what happened with the previous iterator action. This allows for the expected behaviour of the next command to return the correct item (either the item at the range requested, or the next/previous). Tests are also added and updated accordingly. Link: https://lkml.kernel.org/r/20230921181236.509072-3-Liam.Howlett@oracle.com Link: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Link: https://lore.kernel.org/linux-mm/20230921181236.509072-1-Liam.Howlett@oracle.com/ Fixes: 39193685d585 ("maple_tree: try harder to keep active node with mas_prev()") Signed-off-by: Liam R. Howlett Reported-by: Pedro Falcato Closes: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Closes: https://bugs.archlinux.org/task/79656 Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 + lib/maple_tree.c | 221 +++++++++++++++++++++++++++++++++------------ lib/test_maple_tree.c | 87 +++++++++++++++--- 3 files changed, 237 insertions(+), 73 deletions(-) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index f66f5f78f8cf..d01e850b570f 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -428,6 +428,8 @@ struct ma_wr_state { #define MAS_ROOT ((struct maple_enode *)5UL) #define MAS_NONE ((struct maple_enode *)9UL) #define MAS_PAUSE ((struct maple_enode *)17UL) +#define MAS_OVERFLOW ((struct maple_enode *)33UL) +#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ee1ff0c59fd7..0e00a84e8e8f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -256,6 +256,22 @@ bool mas_is_err(struct ma_state *mas) return xa_is_err(mas->node); } +static __always_inline bool mas_is_overflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_OVERFLOW)) + return true; + + return false; +} + +static __always_inline bool mas_is_underflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_UNDERFLOW)) + return true; + + return false; +} + static inline bool mas_searchable(struct ma_state *mas) { if (mas_is_none(mas)) @@ -4415,10 +4431,13 @@ no_entry: * * @mas: The maple state * @max: The minimum starting range + * @empty: Can be empty + * @set_underflow: Set the @mas->node to underflow state on limit. * * Return: The entry in the previous slot which is possibly NULL */ -static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) +static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, + bool set_underflow) { void *entry; void __rcu **slots; @@ -4435,7 +4454,6 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->min <= min) { pivot = mas_safe_min(mas, pivots, mas->offset); @@ -4443,9 +4461,10 @@ again: goto retry; if (pivot <= min) - return NULL; + goto underflow; } +again: if (likely(mas->offset)) { mas->offset--; mas->last = mas->index - 1; @@ -4457,7 +4476,7 @@ again: } if (mas_is_none(mas)) - return NULL; + goto underflow; mas->last = mas->max; node = mas_mn(mas); @@ -4474,10 +4493,19 @@ again: if (likely(entry)) return entry; - if (!empty) + if (!empty) { + if (mas->index <= min) + goto underflow; + goto again; + } return entry; + +underflow: + if (set_underflow) + mas->node = MAS_UNDERFLOW; + return NULL; } /* @@ -4567,10 +4595,13 @@ no_entry: * @mas: The maple state * @max: The maximum starting range * @empty: Can be empty + * @set_overflow: Should @mas->node be set to overflow when the limit is + * reached. * * Return: The entry in the next slot which is possibly NULL */ -static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) +static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, + bool set_overflow) { void __rcu **slots; unsigned long *pivots; @@ -4589,22 +4620,22 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->max >= max) { if (likely(mas->offset < data_end)) pivot = pivots[mas->offset]; else - return NULL; /* must be mas->max */ + goto overflow; if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; if (pivot >= max) - return NULL; + goto overflow; } if (likely(mas->offset < data_end)) { mas->index = pivots[mas->offset] + 1; +again: mas->offset++; if (likely(mas->offset < data_end)) mas->last = pivots[mas->offset]; @@ -4616,8 +4647,11 @@ again: goto retry; } - if (mas_is_none(mas)) + if (WARN_ON_ONCE(mas_is_none(mas))) { + mas->node = MAS_OVERFLOW; return NULL; + goto overflow; + } mas->offset = 0; mas->index = mas->min; @@ -4636,12 +4670,20 @@ again: return entry; if (!empty) { - if (!mas->offset) - data_end = 2; + if (mas->last >= max) + goto overflow; + + mas->index = mas->last + 1; + /* Node cannot end on NULL, so it's safe to short-cut here */ goto again; } return entry; + +overflow: + if (set_overflow) + mas->node = MAS_OVERFLOW; + return NULL; } /* @@ -4651,17 +4693,20 @@ again: * * Set the @mas->node to the next entry and the range_start to * the beginning value for the entry. Does not check beyond @limit. - * Sets @mas->index and @mas->last to the limit if it is hit. + * Sets @mas->index and @mas->last to the range, Does not update @mas->index and + * @mas->last on overflow. * Restarts on dead nodes. * * Return: the next entry or %NULL. */ static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) { - if (mas->last >= limit) + if (mas->last >= limit) { + mas->node = MAS_OVERFLOW; return NULL; + } - return mas_next_slot(mas, limit, false); + return mas_next_slot(mas, limit, false, true); } /* @@ -4837,7 +4882,7 @@ void *mas_walk(struct ma_state *mas) { void *entry; - if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas)) + if (!mas_is_active(mas) || !mas_is_start(mas)) mas->node = MAS_START; retry: entry = mas_state_walk(mas); @@ -5294,14 +5339,22 @@ static inline void mte_destroy_walk(struct maple_enode *enode, static void mas_wr_store_setup(struct ma_wr_state *wr_mas) { - if (mas_is_start(wr_mas->mas)) - return; + if (!mas_is_active(wr_mas->mas)) { + if (mas_is_start(wr_mas->mas)) + return; - if (unlikely(mas_is_paused(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_paused(wr_mas->mas))) + goto reset; - if (unlikely(mas_is_none(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_none(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_overflow(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_underflow(wr_mas->mas))) + goto reset; + } /* * A less strict version of mas_is_span_wr() where we allow spanning @@ -5595,8 +5648,25 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, { bool was_none = mas_is_none(mas); - if (mas_is_none(mas) || mas_is_paused(mas)) + if (unlikely(mas->last >= max)) { + mas->node = MAS_OVERFLOW; + return true; + } + + if (mas_is_active(mas)) + return false; + + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_overflow(mas)) { + /* Overflowed before, but the max changed */ mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } if (mas_is_start(mas)) *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ @@ -5615,6 +5685,7 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, if (mas_is_none(mas)) return true; + return false; } @@ -5637,7 +5708,7 @@ void *mas_next(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, true); } EXPORT_SYMBOL_GPL(mas_next); @@ -5660,7 +5731,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, true); } EXPORT_SYMBOL_GPL(mas_next_range); @@ -5691,18 +5762,31 @@ EXPORT_SYMBOL_GPL(mt_next); static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) { - if (mas->index <= min) - goto none; + if (unlikely(mas->index <= min)) { + mas->node = MAS_UNDERFLOW; + return true; + } - if (mas_is_none(mas) || mas_is_paused(mas)) + if (mas_is_active(mas)) + return false; + + if (mas_is_overflow(mas)) { mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } - if (mas_is_start(mas)) { - mas_walk(mas); - if (!mas->index) - goto none; + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + /* underflowed before but the min changed */ + mas->node = MAS_START; } + if (mas_is_start(mas)) + mas_walk(mas); + if (unlikely(mas_is_ptr(mas))) { if (!mas->index) goto none; @@ -5747,7 +5831,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, true); } EXPORT_SYMBOL_GPL(mas_prev); @@ -5770,7 +5854,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, true); } EXPORT_SYMBOL_GPL(mas_prev_range); @@ -5828,24 +5912,35 @@ EXPORT_SYMBOL_GPL(mas_pause); static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) { - *entry = NULL; + if (mas_is_active(mas)) { + if (mas->last < max) + return false; - if (unlikely(mas_is_none(mas))) { + return true; + } + + if (mas_is_paused(mas)) { if (unlikely(mas->last >= max)) return true; - mas->index = mas->last; + mas->index = ++mas->last; mas->node = MAS_START; - } else if (unlikely(mas_is_paused(mas))) { + } else if (mas_is_none(mas)) { if (unlikely(mas->last >= max)) return true; + mas->index = mas->last; mas->node = MAS_START; - mas->index = ++mas->last; - } else if (unlikely(mas_is_ptr(mas))) - goto ptr_out_of_range; + } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { + if (mas->index > max) { + mas->node = MAS_OVERFLOW; + return true; + } + + mas->node = MAS_START; + } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index > max) return true; @@ -5895,7 +5990,7 @@ void *mas_find(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, false); } EXPORT_SYMBOL_GPL(mas_find); @@ -5913,13 +6008,13 @@ EXPORT_SYMBOL_GPL(mas_find); */ void *mas_find_range(struct ma_state *mas, unsigned long max) { - void *entry; + void *entry = NULL; if (mas_find_setup(mas, max, &entry)) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, false); } EXPORT_SYMBOL_GPL(mas_find_range); @@ -5934,26 +6029,36 @@ EXPORT_SYMBOL_GPL(mas_find_range); static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, void **entry) { - *entry = NULL; - - if (unlikely(mas_is_none(mas))) { - if (mas->index <= min) - goto none; + if (mas_is_active(mas)) { + if (mas->index > min) + return false; - mas->last = mas->index; - mas->node = MAS_START; + return true; } - if (unlikely(mas_is_paused(mas))) { + if (mas_is_paused(mas)) { if (unlikely(mas->index <= min)) { mas->node = MAS_NONE; return true; } mas->node = MAS_START; mas->last = --mas->index; + } else if (mas_is_none(mas)) { + if (mas->index <= min) + goto none; + + mas->last = mas->index; + mas->node = MAS_START; + } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { + if (mas->last <= min) { + mas->node = MAS_UNDERFLOW; + return true; + } + + mas->node = MAS_START; } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index < min) return true; @@ -6004,13 +6109,13 @@ none: */ void *mas_find_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, false); } EXPORT_SYMBOL_GPL(mas_find_rev); @@ -6030,13 +6135,13 @@ EXPORT_SYMBOL_GPL(mas_find_rev); */ void *mas_find_range_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, false); } EXPORT_SYMBOL_GPL(mas_find_range_rev); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 0674aebd4423..06959165e2f9 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2166,7 +2166,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt) MT_BUG_ON(mt, val != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 5); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); mas.index = 0; mas.last = 5; @@ -2917,6 +2917,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * exists MAS_NONE active range * exists active active range * DNE active active set to last range + * ERANGE active MAS_OVERFLOW last range * * Function ENTRY Start Result index & last * mas_prev() @@ -2945,6 +2946,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * any MAS_ROOT MAS_NONE 0 * exists active active range * DNE active active last range + * ERANGE active MAS_UNDERFLOW last range * * Function ENTRY Start Result index & last * mas_find() @@ -2955,7 +2957,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START MAS_NONE 0 * DNE MAS_PAUSE MAS_NONE 0 * DNE MAS_ROOT MAS_NONE 0 - * DNE MAS_NONE MAS_NONE 0 + * DNE MAS_NONE MAS_NONE 1 * if index == 0 * exists MAS_START MAS_ROOT 0 * exists MAS_PAUSE MAS_ROOT 0 @@ -2967,7 +2969,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to max * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to max - * exists MAS_NONE active range + * exists MAS_NONE active range (start at last) * exists active active range * DNE active active last range (max < last) * @@ -2992,7 +2994,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to min * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to min - * exists MAS_NONE active range + * exists MAS_NONE active range (start at index) * exists active active range * DNE active active last range (min > index) * @@ -3039,10 +3041,10 @@ static noinline void __init check_state_handling(struct maple_tree *mt) mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL); mas_lock(&mas); - /* prev: Start -> none */ + /* prev: Start -> underflow*/ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); /* prev: Start -> root */ mas_set(&mas, 10); @@ -3069,7 +3071,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.node != MAS_NONE); - /* next: start -> none */ + /* next: start -> none*/ mas_set(&mas, 10); entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, mas.index != 1); @@ -3268,25 +3270,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active beyond data */ entry = mas_next(&mas, 0x2999); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x2501); MT_BUG_ON(mt, mas.last != 0x2fff); MT_BUG_ON(mt, !mas_active(mas)); - /* Continue after out of range*/ + /* Continue after last range ends after max */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active continued */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, !mas_active(mas)); + + /* next:active -> overflow */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* next:overflow -> overflow */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* prev:overflow -> active */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != ptr3); + MT_BUG_ON(mt, mas.index != 0x3000); + MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); /* next: none -> active, skip value at location */ @@ -3307,11 +3330,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning end range */ + entry = mas_prev(&mas, 0x0100); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:active -> underflow */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* prev:underflow -> underflow */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* next:underflow -> active */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:first value -> underflow */ + entry = mas_prev(&mas, 0x1000); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* find:underflow -> first value */ + entry = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); /* prev: pause ->active */ @@ -3325,14 +3383,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning min */ entry = mas_prev(&mas, 0x1600); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* prev: active ->active, continue*/ + /* prev: active ->active, continue */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); @@ -3379,7 +3437,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* find: none ->active */ + /* find: overflow ->active */ entry = mas_find(&mas, 0x5000); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); @@ -3778,7 +3836,6 @@ static int __init maple_tree_seed(void) check_empty_area_fill(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_state_handling(&tree); mtree_destroy(&tree); -- cgit From 935d4f0c6dc8b3533e6e39346de7389a84490178 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 22 Sep 2023 12:58:03 +0100 Subject: mm: hugetlb: add huge page size param to set_huge_pte_at() Patch series "Fix set_huge_pte_at() panic on arm64", v2. This series fixes a bug in arm64's implementation of set_huge_pte_at(), which can result in an unprivileged user causing a kernel panic. The problem was triggered when running the new uffd poison mm selftest for HUGETLB memory. This test (and the uffd poison feature) was merged for v6.5-rc7. Ideally, I'd like to get this fix in for v6.6 and I've cc'ed stable (correctly this time) to get it backported to v6.5, where the issue first showed up. Description of Bug ================== arm64's huge pte implementation supports multiple huge page sizes, some of which are implemented in the page table with multiple contiguous entries. So set_huge_pte_at() needs to work out how big the logical pte is, so that it can also work out how many physical ptes (or pmds) need to be written. It previously did this by grabbing the folio out of the pte and querying its size. However, there are cases when the pte being set is actually a swap entry. But this also used to work fine, because for huge ptes, we only ever saw migration entries and hwpoison entries. And both of these types of swap entries have a PFN embedded, so the code would grab that and everything still worked out. But over time, more calls to set_huge_pte_at() have been added that set swap entry types that do not embed a PFN. And this causes the code to go bang. The triggering case is for the uffd poison test, commit 99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"), which causes a PTE_MARKER_POISONED swap entry to be set, coutesey of commit 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") - added in v6.5-rc7. Although review shows that there are other call sites that set PTE_MARKER_UFFD_WP (which also has no PFN), these don't trigger on arm64 because arm64 doesn't support UFFD WP. If CONFIG_DEBUG_VM is enabled, we do at least get a BUG(), but otherwise, it will dereference a bad pointer in page_folio(): static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) { VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); return page_folio(pfn_to_page(swp_offset_pfn(entry))); } Fix === The simplest fix would have been to revert the dodgy cleanup commit 18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()"), but since things have moved on, this would have required an audit of all the new set_huge_pte_at() call sites to see if they should be converted to set_huge_swap_pte_at(). As per the original intent of the change, it would also leave us open to future bugs when people invariably get it wrong and call the wrong helper. So instead, I've added a huge page size parameter to set_huge_pte_at(). This means that the arm64 code has the size in all cases. It's a bigger change, due to needing to touch the arches that implement the function, but it is entirely mechanical, so in my view, low risk. I've compile-tested all touched arches; arm64, parisc, powerpc, riscv, s390, sparc (and additionally x86_64). I've additionally booted and run mm selftests against arm64, where I observe the uffd poison test is fixed, and there are no other regressions. This patch (of 2): In order to fix a bug, arm64 needs to be told the size of the huge page for which the pte is being set in set_huge_pte_at(). Provide for this by adding an `unsigned long sz` parameter to the function. This follows the same pattern as huge_pte_clear(). This commit makes the required interface modifications to the core mm as well as all arches that implement this function (arm64, parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed in a separate commit. No behavioral changes intended. Link: https://lkml.kernel.org/r/20230922115804.2043771-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20230922115804.2043771-2-ryan.roberts@arm.com Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") Signed-off-by: Ryan Roberts Reviewed-by: Christophe Leroy [powerpc 8xx] Reviewed-by: Lorenzo Stoakes [vmalloc change] Cc: Alexandre Ghiti Cc: Albert Ou Cc: Alexander Gordeev Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Mike Kravetz Cc: Muchun Song Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Xu Cc: Qi Zheng Cc: Ryan Roberts Cc: SeongJae Park Cc: Sven Schnelle Cc: Uladzislau Rezki (Sony) Cc: Vasily Gorbik Cc: Will Deacon Cc: [6.5+] Signed-off-by: Andrew Morton --- arch/arm64/include/asm/hugetlb.h | 2 +- arch/arm64/mm/hugetlbpage.c | 6 ++-- arch/parisc/include/asm/hugetlb.h | 2 +- arch/parisc/mm/hugetlbpage.c | 2 +- arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 3 +- arch/powerpc/mm/book3s64/hugetlbpage.c | 5 ++- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 3 +- arch/powerpc/mm/nohash/8xx.c | 3 +- arch/powerpc/mm/pgtable.c | 3 +- arch/riscv/include/asm/hugetlb.h | 3 +- arch/riscv/mm/hugetlbpage.c | 3 +- arch/s390/include/asm/hugetlb.h | 6 ++-- arch/s390/mm/hugetlbpage.c | 8 ++++- arch/sparc/include/asm/hugetlb.h | 6 ++-- arch/sparc/mm/hugetlbpage.c | 8 ++++- include/asm-generic/hugetlb.h | 2 +- include/linux/hugetlb.h | 6 ++-- mm/damon/vaddr.c | 3 +- mm/hugetlb.c | 43 +++++++++++++----------- mm/migrate.c | 7 ++-- mm/rmap.c | 23 ++++++++++--- mm/vmalloc.c | 2 +- 22 files changed, 100 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index f43a38ac1779..2ddc33d93b13 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -28,7 +28,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 9c52718ea750..a7f8c8db3425 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -249,7 +249,7 @@ static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { size_t pgsize; int i; @@ -571,5 +571,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index f7f078c2872c..72daacc472a0 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -6,7 +6,7 @@ #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index a8a1a7c1e16e..a9f7e21f6656 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -140,7 +140,7 @@ static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, unsigned long sz) { __set_huge_pte_at(mm, addr, ptep, entry); } diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index de092b04ee1a..92df40c6cc6b 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -46,7 +46,8 @@ static inline int check_and_get_huge_psize(int shift) } #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index 3bc0eb21b2a0..5a2e512e96db 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -143,11 +143,14 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { + unsigned long psize; if (radix_enabled()) return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + + psize = huge_page_size(hstate_vma(vma)); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } void __init hugetlbpage_init_defaultsize(void) diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index 17075c78d4bc..35fd2a95be24 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -47,6 +47,7 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, pte_t old_pte, pte_t pte) { struct mm_struct *mm = vma->vm_mm; + unsigned long psize = huge_page_size(hstate_vma(vma)); /* * POWER9 NMMU must flush the TLB after clearing the PTE before @@ -58,5 +59,5 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index dbbfe897455d..a642a7929892 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -91,7 +91,8 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) return -EINVAL; - set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + set_huge_pte_at(&init_mm, va, ptep, + pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize); return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3f86fd217690..3ba9fe411604 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -288,7 +288,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, } #if defined(CONFIG_PPC_8xx) -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz) { pmd_t *pmd = pmd_off(mm, addr); pte_basic_t val; diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 34e24f078cc1..4c5b0e929890 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -18,7 +18,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pte); + unsigned long addr, pte_t *ptep, pte_t pte, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 96225a8533ad..e4a2ace92dbe 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -180,7 +180,8 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte) + pte_t pte, + unsigned long sz) { int i, pte_num; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index f07267875a19..deb198a61039 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -16,6 +16,8 @@ #define hugepages_supported() (MACHINE_HAS_EDAT1) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, @@ -65,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(ptep), pte); if (changed) { huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; } @@ -74,7 +76,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c718f2a0de94..297a6d897d5a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -142,7 +142,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) __storage_key_init_range(paddr, paddr + size - 1); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { unsigned long rste; @@ -163,6 +163,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, __pte(rste)); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, pte); +} + pte_t huge_ptep_get(pte_t *ptep) { return __rste_to_pte(pte_val(*ptep)); diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 0a26cca24232..c714ca6a05aa 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -14,6 +14,8 @@ extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end; #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR @@ -32,7 +34,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t old_pte = *ptep; - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -42,7 +44,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, { int changed = !pte_same(*ptep, pte); if (changed) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); flush_tlb_page(vma, addr); } return changed; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d7018823206c..b432500c13a5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -328,7 +328,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return pte_offset_huge(pmd, addr); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { unsigned int nptes, orig_shift, shift; @@ -364,6 +364,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, orig_shift); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, entry); +} + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 4da02798a00b..6dcf4d576970 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -76,7 +76,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { set_pte_at(mm, addr, ptep, pte); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5b2626063f4f..a30686e649f7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -984,7 +984,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1173,7 +1175,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 4c81a9dbd044..cf8a9fc5c9d1 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -341,13 +341,14 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, bool referenced = false; pte_t entry = huge_ptep_get(pte); struct folio *folio = pfn_folio(pte_pfn(entry)); + unsigned long psize = huge_page_size(hstate_vma(vma)); folio_get(folio); if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - set_huge_pte_at(mm, addr, pte, entry); + set_huge_pte_at(mm, addr, pte, entry, psize); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ba6d39b71cb1..52d26072dfda 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4980,7 +4980,7 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) static void hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, - struct folio *new_folio, pte_t old) + struct folio *new_folio, pte_t old, unsigned long sz) { pte_t newpte = make_huge_pte(vma, &new_folio->page, 1); @@ -4988,7 +4988,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add hugepage_add_new_anon_rmap(new_folio, vma, addr); if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(vma->vm_mm, addr, ptep, newpte); + set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); folio_set_hugetlb_migratable(new_folio); } @@ -5065,7 +5065,7 @@ again: } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = pte_swp_uffd_wp(entry); @@ -5080,18 +5080,18 @@ again: entry = swp_entry_to_pte(swp_entry); if (userfaultfd_wp(src_vma) && uffd_wp) entry = pte_swp_mkuffd_wp(entry); - set_huge_pte_at(src, addr, src_pte, entry); + set_huge_pte_at(src, addr, src_pte, entry, sz); } if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_pte_marker(entry))) { pte_marker marker = copy_pte_marker( pte_to_swp_entry(entry), dst_vma); if (marker) set_huge_pte_at(dst, addr, dst_pte, - make_pte_marker(marker)); + make_pte_marker(marker), sz); } else { entry = huge_ptep_get(src_pte); pte_folio = page_folio(pte_page(entry)); @@ -5145,7 +5145,7 @@ again: goto again; } hugetlb_install_folio(dst_vma, dst_pte, addr, - new_folio, src_pte_old); + new_folio, src_pte_old, sz); spin_unlock(src_ptl); spin_unlock(dst_ptl); continue; @@ -5166,7 +5166,7 @@ again: if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); hugetlb_count_add(npages, dst); } spin_unlock(src_ptl); @@ -5184,7 +5184,8 @@ again: } static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) + unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte, + unsigned long sz) { struct hstate *h = hstate_vma(vma); struct mm_struct *mm = vma->vm_mm; @@ -5202,7 +5203,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); - set_huge_pte_at(mm, new_addr, dst_pte, pte); + set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); if (src_ptl != dst_ptl) spin_unlock(src_ptl); @@ -5259,7 +5260,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, if (!dst_pte) break; - move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); + move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz); } if (shared_pmd) @@ -5337,7 +5338,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (pte_swp_uffd_wp_any(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); else huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); @@ -5371,7 +5373,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); @@ -5676,7 +5679,7 @@ retry_avoidcopy: hugepage_add_new_anon_rmap(new_folio, vma, haddr); if (huge_pte_uffd_wp(pte)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(mm, haddr, ptep, newpte); + set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h)); folio_set_hugetlb_migratable(new_folio); /* Make the old page be freed below */ new_folio = old_folio; @@ -5972,7 +5975,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, */ if (unlikely(pte_marker_uffd_wp(old_pte))) new_pte = huge_pte_mkuffd_wp(new_pte); - set_huge_pte_at(mm, haddr, ptep, new_pte); + set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { @@ -6261,7 +6264,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, } _dst_pte = make_pte_marker(PTE_MARKER_POISONED); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, + huge_page_size(h)); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); @@ -6412,7 +6416,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, if (wp_enabled) _dst_pte = huge_pte_mkuffd_wp(_dst_pte); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), dst_mm); @@ -6598,7 +6602,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, else if (uffd_wp_resolve) newpte = pte_swp_clear_uffd_wp(newpte); if (!pte_same(pte, newpte)) - set_huge_pte_at(mm, address, ptep, newpte); + set_huge_pte_at(mm, address, ptep, newpte, psize); } else if (unlikely(is_pte_marker(pte))) { /* No other markers apply for now. */ WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); @@ -6623,7 +6627,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (unlikely(uffd_wp)) /* Safe to modify directly (none->non-present). */ set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + psize); } spin_unlock(ptl); } diff --git a/mm/migrate.c b/mm/migrate.c index b7fa020003f3..2053b54556ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -243,7 +243,9 @@ static bool remove_migration_pte(struct folio *folio, #ifdef CONFIG_HUGETLB_PAGE if (folio_test_hugetlb(folio)) { - unsigned int shift = huge_page_shift(hstate_vma(vma)); + struct hstate *h = hstate_vma(vma); + unsigned int shift = huge_page_shift(h); + unsigned long psize = huge_page_size(h); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (folio_test_anon(folio)) @@ -251,7 +253,8 @@ static bool remove_migration_pte(struct folio *folio, rmap_flags); else page_dup_file_rmap(new, true); - set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); + set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte, + psize); } else #endif { diff --git a/mm/rmap.c b/mm/rmap.c index ec7f8e6c9e48..9f795b93cf40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1480,6 +1480,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1511,6 +1512,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -1628,7 +1632,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -1820,6 +1825,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1855,6 +1861,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -2020,7 +2029,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -2044,7 +2054,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (arch_unmap_one(mm, vma, address, pteval) < 0) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2058,7 +2069,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (anon_exclusive && page_try_share_anon_rmap(subpage)) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2090,7 +2102,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, swp_pte); + set_huge_pte_at(mm, address, pvmw.pte, swp_pte, + hsz); else set_pte_at(mm, address, pvmw.pte, swp_pte); trace_set_migration_pte(address, pte_val(swp_pte), diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef8599d394fd..a3fedb3ee0db 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -111,7 +111,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t entry = pfn_pte(pfn, prot); entry = arch_make_huge_pte(entry, ilog2(size), 0); - set_huge_pte_at(&init_mm, addr, pte, entry); + set_huge_pte_at(&init_mm, addr, pte, entry, size); pfn += PFN_DOWN(size); continue; } -- cgit From 6f1bace9a9fb11402520ba7defa76bfaf7b8e09f Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 22 Sep 2023 12:58:04 +0100 Subject: arm64: hugetlb: fix set_huge_pte_at() to work with all swap entries When called with a swap entry that does not embed a PFN (e.g. PTE_MARKER_POISONED or PTE_MARKER_UFFD_WP), the previous implementation of set_huge_pte_at() would either cause a BUG() to fire (if CONFIG_DEBUG_VM is enabled) or cause a dereference of an invalid address and subsequent panic. arm64's huge pte implementation supports multiple huge page sizes, some of which are implemented in the page table with multiple contiguous entries. So set_huge_pte_at() needs to work out how big the logical pte is, so that it can also work out how many physical ptes (or pmds) need to be written. It previously did this by grabbing the folio out of the pte and querying its size. However, there are cases when the pte being set is actually a swap entry. But this also used to work fine, because for huge ptes, we only ever saw migration entries and hwpoison entries. And both of these types of swap entries have a PFN embedded, so the code would grab that and everything still worked out. But over time, more calls to set_huge_pte_at() have been added that set swap entry types that do not embed a PFN. And this causes the code to go bang. The triggering case is for the uffd poison test, commit 99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"), which causes a PTE_MARKER_POISONED swap entry to be set, coutesey of commit 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") - added in v6.5-rc7. Although review shows that there are other call sites that set PTE_MARKER_UFFD_WP (which also has no PFN), these don't trigger on arm64 because arm64 doesn't support UFFD WP. Arguably, the root cause is really due to commit 18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()"), which aimed to simplify the interface to the core code by removing set_huge_swap_pte_at() (which took a page size parameter) and replacing it with calls to set_huge_pte_at() where the size was inferred from the folio, as descibed above. While that commit didn't break anything at the time, it did break the interface because it couldn't handle swap entries without PFNs. And since then new callers have come along which rely on this working. But given the brokeness is only observable after commit 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs"), that one gets the Fixes tag. Now that we have modified the set_huge_pte_at() interface to pass the huge page size in the previous patch, we can trivially fix this issue. Link: https://lkml.kernel.org/r/20230922115804.2043771-3-ryan.roberts@arm.com Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") Signed-off-by: Ryan Roberts Reviewed-by: Axel Rasmussen Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Christoph Hellwig Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Lorenzo Stoakes Cc: Mike Kravetz Cc: Muchun Song Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Xu Cc: Qi Zheng Cc: SeongJae Park Cc: Sven Schnelle Cc: Uladzislau Rezki (Sony) Cc: Vasily Gorbik Cc: Will Deacon Cc: [6.5+] Signed-off-by: Andrew Morton --- arch/arm64/mm/hugetlbpage.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index a7f8c8db3425..13fd592228b1 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -241,13 +241,6 @@ static void clear_flush(struct mm_struct *mm, flush_tlb_range(&vma, saddr, addr); } -static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) -{ - VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); - - return page_folio(pfn_to_page(swp_offset_pfn(entry))); -} - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { @@ -257,13 +250,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long pfn, dpfn; pgprot_t hugeprot; - if (!pte_present(pte)) { - struct folio *folio; - - folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte)); - ncontig = num_contig_ptes(folio_size(folio), &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); - for (i = 0; i < ncontig; i++, ptep++) + if (!pte_present(pte)) { + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) set_pte_at(mm, addr, ptep, pte); return; } @@ -273,7 +263,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, return; } - ncontig = find_num_contig(mm, addr, ptep, &pgsize); pfn = pte_pfn(pte); dpfn = pgsize >> PAGE_SHIFT; hugeprot = pte_pgprot(pte); -- cgit From ca56489c2f1df516801eeff06129d2e9e282ab7b Mon Sep 17 00:00:00 2001 From: Domenico Cerasuolo Date: Fri, 22 Sep 2023 19:22:11 +0200 Subject: mm: zswap: fix potential memory corruption on duplicate store While stress-testing zswap a memory corruption was happening when writing back pages. __frontswap_store used to check for duplicate entries before attempting to store a page in zswap, this was because if the store fails the old entry isn't removed from the tree. This change removes duplicate entries in zswap_store before the actual attempt. [cerasuolodomenico@gmail.com: add a warning and a comment, per Johannes] Link: https://lkml.kernel.org/r/20230925130002.1929369-1-cerasuolodomenico@gmail.com Link: https://lkml.kernel.org/r/20230922172211.1704917-1-cerasuolodomenico@gmail.com Fixes: 42c06a0e8ebe ("mm: kill frontswap") Signed-off-by: Domenico Cerasuolo Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Dan Streetman Cc: Domenico Cerasuolo Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/zswap.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index 412b1409a0d7..083c693602b8 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1218,6 +1218,19 @@ bool zswap_store(struct folio *folio) if (!zswap_enabled || !tree) return false; + /* + * If this is a duplicate, it must be removed before attempting to store + * it, otherwise, if the store fails the old page won't be removed from + * the tree, and it might be written back overriding the new data. + */ + spin_lock(&tree->lock); + dupentry = zswap_rb_search(&tree->rbroot, offset); + if (dupentry) { + zswap_duplicate_entry++; + zswap_invalidate_entry(tree, dupentry); + } + spin_unlock(&tree->lock); + /* * XXX: zswap reclaim does not work with cgroups yet. Without a * cgroup-aware entry LRU, we will push out entries system-wide based on @@ -1333,7 +1346,14 @@ insert_entry: /* map */ spin_lock(&tree->lock); + /* + * A duplicate entry should have been removed at the beginning of this + * function. Since the swap entry should be pinned, if a duplicate is + * found again here it means that something went wrong in the swap + * cache. + */ while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { + WARN_ON(1); zswap_duplicate_entry++; zswap_invalidate_entry(tree, dupentry); } -- cgit From 4597648fddeadef5877610d693af11906aa666ac Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 21 Sep 2023 09:38:29 +0200 Subject: mm, memcg: reconsider kmem.limit_in_bytes deprecation This reverts commits 86327e8eb94c ("memcg: drop kmem.limit_in_bytes") and partially reverts 58056f77502f ("memcg, kmem: further deprecate kmem.limit_in_bytes") which have incrementally removed support for the kernel memory accounting hard limit. Unfortunately it has turned out that there is still userspace depending on the existence of memory.kmem.limit_in_bytes [1]. The underlying functionality is not really required but the non-existent file just confuses the userspace which fails in the result. The patch to fix this on the userspace side has been submitted but it is hard to predict how it will propagate through the maze of 3rd party consumers of the software. Now, reverting alone 86327e8eb94c is not an option because there is another set of userspace which cannot cope with ENOTSUPP returned when writing to the file. Therefore we have to go and revisit 58056f77502f as well. There are two ways to go ahead. Either we give up on the deprecation and fully revert 58056f77502f as well or we can keep kmem.limit_in_bytes but make the write a noop and warn about the fact. This should work for both known breaking workloads which depend on the existence but do not depend on the hard limit enforcement. Note to backporters to stable trees. a8c49af3be5f ("memcg: add per-memcg total kernel memory stat") introduced in 4.18 has added memcg_account_kmem so the accounting is not done by obj_cgroup_charge_pages directly for v1 anymore. Prior kernels need to add it explicitly (thanks to Johannes for pointing this out). [akpm@linux-foundation.org: fix build - remove unused local] Link: http://lkml.kernel.org/r/20230920081101.GA12096@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net [1] Link: https://lkml.kernel.org/r/ZRE5VJozPZt9bRPy@dhcp22.suse.cz Fixes: 86327e8eb94c ("memcg: drop kmem.limit_in_bytes") Fixes: 58056f77502f ("memcg, kmem: further deprecate kmem.limit_in_bytes") Signed-off-by: Michal Hocko Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Greg Kroah-Hartman Cc: Jeremi Piotrowski Cc: Muchun Song Cc: Roman Gushchin Cc: Tejun heo Cc: Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v1/memory.rst | 7 +++++++ mm/memcontrol.c | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 5f502bf68fbc..ff456871bf4b 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -92,6 +92,13 @@ Brief summary of control files. memory.oom_control set/show oom controls. memory.numa_stat show the number of memory usage per numa node + memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel + memory hard limit. Kernel hard limit is not + supported since 5.16. Writing any value to + do file will not have any effect same as if + nokmem kernel parameter was specified. + Kernel memory is still charged and reported + by memory.kmem.usage_in_bytes. memory.kmem.usage_in_bytes show current kernel memory allocation memory.kmem.failcnt show the number of kernel memory usage hits limits diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d13dde2f8b56..5b009b233ab8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3867,6 +3867,13 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, case _MEMSWAP: ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; + case _KMEM: + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " + "Writing any value to this file has no effect. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); + ret = 0; + break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); break; @@ -5077,6 +5084,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memcg_numa_stat_show, }, #endif + { + .name = "kmem.limit_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, { .name = "kmem.usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), -- cgit From 45120b15743fa7c0aa53d5db6dfb4c8f87be4abd Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 Sep 2023 15:20:59 +0800 Subject: mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions() When CONFIG_DAMON_VADDR_KUNIT_TEST=y and making CONFIG_DEBUG_KMEMLEAK=y and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y, the below memory leak is detected. Since commit 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary variables"), the damon_destroy_ctx() is removed, but still call damon_new_target() and damon_new_region(), the damon_region which is allocated by kmem_cache_alloc() in damon_new_region() and the damon_target which is allocated by kmalloc in damon_new_target() are not freed. And the damon_region which is allocated in damon_new_region() in damon_set_regions() is also not freed. So use damon_destroy_target to free all the damon_regions and damon_target. unreferenced object 0xffff888107c9a940 (size 64): comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk 60 c7 9c 07 81 88 ff ff f8 cb 9c 07 81 88 ff ff `............... backtrace: [] kmalloc_trace+0x27/0xa0 [] damon_new_target+0x3f/0x1b0 [] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [] damon_test_apply_three_regions1+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff8881079cc740 (size 56): comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [] damon_new_region+0x22/0x1c0 [] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [] damon_test_apply_three_regions1+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107c9ac40 (size 64): comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk a0 cc 9c 07 81 88 ff ff 78 a1 76 07 81 88 ff ff ........x.v..... backtrace: [] kmalloc_trace+0x27/0xa0 [] damon_new_target+0x3f/0x1b0 [] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [] damon_test_apply_three_regions2+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff8881079ccc80 (size 56): comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [] damon_new_region+0x22/0x1c0 [] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [] damon_test_apply_three_regions2+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107c9af40 (size 64): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk 20 a2 76 07 81 88 ff ff b8 a6 76 07 81 88 ff ff .v.......v..... backtrace: [] kmalloc_trace+0x27/0xa0 [] damon_new_target+0x3f/0x1b0 [] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [] damon_test_apply_three_regions3+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776a200 (size 56): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [] damon_new_region+0x22/0x1c0 [] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [] damon_test_apply_three_regions3+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776a740 (size 56): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.025s) hex dump (first 32 bytes): 3d 00 00 00 00 00 00 00 3f 00 00 00 00 00 00 00 =.......?....... 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [] damon_new_region+0x22/0x1c0 [] damon_set_regions+0x4c2/0x8e0 [] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0 [] damon_test_apply_three_regions3+0x21e/0x260 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888108038240 (size 64): comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 03 00 00 00 6b 6b 6b 6b ............kkkk 48 ad 76 07 81 88 ff ff 98 ae 76 07 81 88 ff ff H.v.......v..... backtrace: [] kmalloc_trace+0x27/0xa0 [] damon_new_target+0x3f/0x1b0 [] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [] damon_test_apply_three_regions4+0x1cd/0x210 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776ad28 (size 56): comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [] damon_new_region+0x22/0x1c0 [] damon_set_regions+0x4c2/0x8e0 [] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0 [] damon_test_apply_three_regions4+0x1cd/0x210 [] kunit_generic_run_threadfn_adapter+0x4a/0x90 [] kthread+0x2b6/0x380 [] ret_from_fork+0x2d/0x70 [] ret_from_fork_asm+0x11/0x20 Link: https://lkml.kernel.org/r/20230925072100.3725620-1-ruanjinjie@huawei.com Fixes: 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary variables") Signed-off-by: Jinjie Ruan Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/vaddr-test.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index c4b455b5ee30..dcf1ca6b31cc 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -148,6 +148,8 @@ static void damon_do_test_apply_three_regions(struct kunit *test, KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); } + + damon_destroy_target(t); } /* -- cgit From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.com Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi Cc: Hugh Dickins Cc: Suren Baghdasaryan Cc: Matthew Wilcox Cc: Michal Hocko Cc: Vlastimil Babka Cc: Oscar Salvador Cc: Rafael Aquini Cc: Kirill A. Shutemov Cc: David Rientjes Cc: [4.9+] Signed-off-by: Andrew Morton --- mm/mempolicy.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ unlock: * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out: -- cgit From bbe246f875d064ecfb872fe4f66152e743dfd22d Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Wed, 27 Sep 2023 02:19:44 +0800 Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error According to the awk manual, the -e option does not need to be specified in front of 'program' (unless you need to mix program-file). The redundant -e option can cause error when users use awk tools other than gawk (for example, mawk does not support the -e option). Error Example: awk: not an option: -e Link: https://lkml.kernel.org/r/VI1P193MB075228810591AF2FDD7D42C599C3A@VI1P193MB0752.EURP193.PROD.OUTLOOK.COM Signed-off-by: Juntong Deng Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 4 ++-- tools/testing/selftests/mm/hugetlb_reparenting_test.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index a5cb4b09a46c..0899019a7fcb 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then fi if [[ $cgroup2 ]]; then - cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path @@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb" >$cgroup_path/cgroup.subtree_control else - cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index bf2d2a684edf..14d26075c863 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -20,7 +20,7 @@ fi if [[ $cgroup2 ]]; then - CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT @@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control else - CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT -- cgit From e2a8f20dd8e9df695f736e51cd9115ae55be92d1 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 26 Sep 2023 20:09:05 +0800 Subject: Crash: add lock to serialize crash hotplug handling Eric reported that handling corresponding crash hotplug event can be failed easily when many memory hotplug event are notified in a short period. They failed because failing to take __kexec_lock. ======= [ 78.714569] Fallback order for Node 0: 0 [ 78.714575] Built 1 zonelists, mobility grouping on. Total pages: 1817886 [ 78.717133] Policy zone: Normal [ 78.724423] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate [ 78.727207] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate [ 80.056643] PEFILE: Unsigned PE binary ======= The memory hotplug events are notified very quickly and very many, while the handling of crash hotplug is much slower relatively. So the atomic variable __kexec_lock and kexec_trylock() can't guarantee the serialization of crash hotplug handling. Here, add a new mutex lock __crash_hotplug_lock to serialize crash hotplug handling specifically. This doesn't impact the usage of __kexec_lock. Link: https://lkml.kernel.org/r/20230926120905.392903-1-bhe@redhat.com Fixes: 247262756121 ("crash: add generic infrastructure for crash hotplug support") Signed-off-by: Baoquan He Tested-by: Eric DeVolder Reviewed-by: Eric DeVolder Reviewed-by: Valentin Schneider Cc: Sourabh Jain Cc: Signed-off-by: Andrew Morton --- kernel/crash_core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 03a7932cde0a..2f675ef045d4 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -739,6 +739,17 @@ subsys_initcall(crash_notes_memory_init); #undef pr_fmt #define pr_fmt(fmt) "crash hp: " fmt +/* + * Different than kexec/kdump loading/unloading/jumping/shrinking which + * usually rarely happen, there will be many crash hotplug events notified + * during one short period, e.g one memory board is hot added and memory + * regions are online. So mutex lock __crash_hotplug_lock is used to + * serialize the crash hotplug handling specifically. + */ +DEFINE_MUTEX(__crash_hotplug_lock); +#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) +#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) + /* * This routine utilized when the crash_hotplug sysfs node is read. * It reflects the kernel's ability/permission to update the crash @@ -748,9 +759,11 @@ int crash_check_update_elfcorehdr(void) { int rc = 0; + crash_hotplug_lock(); /* Obtain lock while reading crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return 0; } if (kexec_crash_image) { @@ -761,6 +774,7 @@ int crash_check_update_elfcorehdr(void) } /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); return rc; } @@ -783,9 +797,11 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) { struct kimage *image; + crash_hotplug_lock(); /* Obtain lock while changing crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return; } @@ -852,6 +868,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) out: /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); } static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) -- cgit From 7a48b58eb5ff3798f0480d2da16bf27df9654fc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 10:16:05 +0300 Subject: perf dlfilter: Fix use of addr_location__exit() in dlfilter__object_code() Stop calling addr_location__exit() when addr_location__init() was not called. Fixes: 0dd5041c9a0e ("perf addr_location: Add init/exit/copy functions") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20230928071605.17624-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/util/dlfilter.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 1dbf27822ee2..4a1dc21b0450 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -282,13 +282,21 @@ static struct perf_event_attr *dlfilter__attr(void *ctx) return &d->evsel->core.attr; } +static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len) +{ + u64 offset = map__map_ip(map, ip); + + if (ip + len >= map__end(map)) + len = map__end(map) - ip; + + return dso__data_read_offset(map__dso(map), machine, offset, buf, len); +} + static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) { struct dlfilter *d = (struct dlfilter *)ctx; struct addr_location *al; struct addr_location a; - struct map *map; - u64 offset; __s32 ret; if (!d->ctx_valid) @@ -298,27 +306,17 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) if (!al) return -1; - map = al->map; - - if (map && ip >= map__start(map) && ip < map__end(map) && + if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) && machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) - goto have_map; + return code_read(ip, al->map, d->machine, buf, len); addr_location__init(&a); + thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) { - ret = -1; - goto out; - } + ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1; - map = a.map; -have_map: - offset = map__map_ip(map, ip); - if (ip + len >= map__end(map)) - len = map__end(map) - ip; - ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); -out: addr_location__exit(&a); + return ret; } -- cgit From f38f547314b858b94d36aeb9a4401f0aade7d1af Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 12:10:33 +0300 Subject: perf dlfilter: Add a test for object_code() Extend the "dlfilter C API" test to test perf_dlfilter_fns.object_code(). Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20230928091033.33998-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/dlfilters/dlfilter-test-api-v0.c | 12 +++++++++++- tools/perf/dlfilters/dlfilter-test-api-v2.c | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 72f263d49121..4083b1abeaab 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -289,6 +289,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -314,7 +323,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 38e593d92920..32ff619e881c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -308,6 +308,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -333,7 +342,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) -- cgit From 3b678768c0458e6d8d45fadf61423e44effed4cb Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 29 Sep 2023 22:53:36 +0530 Subject: powerpc/pseries: Fix STK_PARAM access in the hcall tracing code In powerpc pseries system, below behaviour is observed while enabling tracing on hcall: # cd /sys/kernel/debug/tracing/ # cat events/powerpc/hcall_exit/enable 0 # echo 1 > events/powerpc/hcall_exit/enable # ls -bash: fork: Bad address Above is from power9 lpar with latest kernel. Past this, softlockup is observed. Initially while attempting via perf_event_open to use "PERF_TYPE_TRACEPOINT", kernel panic was observed. perf config used: ================ memset(&pe[1],0,sizeof(struct perf_event_attr)); pe[1].type=PERF_TYPE_TRACEPOINT; pe[1].size=96; pe[1].config=0x26ULL; /* 38 raw_syscalls/sys_exit */ pe[1].sample_type=0; /* 0 */ pe[1].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING|PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1f */ pe[1].inherit=1; pe[1].precise_ip=0; /* arbitrary skid */ pe[1].wakeup_events=0; pe[1].bp_type=HW_BREAKPOINT_EMPTY; pe[1].config1=0x1ULL; Kernel panic logs: ================== Kernel attempted to read user page (8) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000008 Faulting instruction address: 0xc0000000004c2814 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: nfnetlink bonding tls rfkill sunrpc dm_service_time dm_multipath pseries_rng xts vmx_crypto xfs libcrc32c sd_mod t10_pi crc64_rocksoft crc64 sg ibmvfc scsi_transport_fc ibmveth dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 0 PID: 1431 Comm: login Not tainted 6.4.0+ #1 Hardware name: IBM,8375-42A POWER9 (raw) 0x4e0202 0xf000005 of:IBM,FW950.30 (VL950_892) hv:phyp pSeries NIP page_remove_rmap+0x44/0x320 LR wp_page_copy+0x384/0xec0 Call Trace: 0xc00000001416e400 (unreliable) wp_page_copy+0x384/0xec0 __handle_mm_fault+0x9d4/0xfb0 handle_mm_fault+0xf0/0x350 ___do_page_fault+0x48c/0xc90 hash__do_page_fault+0x30/0x70 do_hash_fault+0x1a4/0x330 data_access_common_virt+0x198/0x1f0 --- interrupt: 300 at 0x7fffae971abc git bisect tracked this down to below commit: 'commit baa49d81a94b ("powerpc/pseries: hvcall stack frame overhead")' This commit changed STACK_FRAME_OVERHEAD (112 ) to STACK_FRAME_MIN_SIZE (32 ) since 32 bytes is the minimum size for ELFv2 stack. With the latest kernel, when running on ELFv2, STACK_FRAME_MIN_SIZE is used to allocate stack size. During plpar_hcall_trace, first call is made to HCALL_INST_PRECALL which saves the registers and allocates new stack frame. In the plpar_hcall_trace code, STK_PARAM is accessed at two places. 1. To save r4: std r4,STK_PARAM(R4)(r1) 2. To access r4 back: ld r12,STK_PARAM(R4)(r1) HCALL_INST_PRECALL precall allocates a new stack frame. So all the stack parameter access after the precall, needs to be accessed with +STACK_FRAME_MIN_SIZE. So the store instruction should be: std r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) If the "std" is not updated with STACK_FRAME_MIN_SIZE, we will end up with overwriting stack contents and cause corruption. But instead of updating 'std', we can instead remove it since HCALL_INST_PRECALL already saves it to the correct location. similarly load instruction should be: ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) Fix the load instruction to correctly access the stack parameter with +STACK_FRAME_MIN_SIZE and remove the store of r4 since the precall saves it correctly. Cc: stable@vger.kernel.org # v6.2+ Fixes: baa49d81a94b ("powerpc/pseries: hvcall stack frame overhead") Co-developed-by: Naveen N Rao Signed-off-by: Naveen N Rao Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://msgid.link/20230929172337.7906-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hvCall.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index bae45b358a09..2addf2ea03f0 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,7 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) mr r0,r4 mr r4,r5 @@ -196,7 +195,7 @@ plpar_hcall_trace: HVSC - ld r12,STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) @@ -296,7 +295,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) mr r0,r4 mr r4,r5 -- cgit From dfb5f8cbd5992d5769edfd3e059fad9e0b8bdafb Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 29 Sep 2023 22:53:37 +0530 Subject: powerpc/pseries: Remove unused r0 in the hcall tracing code In the plpar_hcall trace code, currently we use r0 to store the value of r4. But this value is not used subsequently in the code. Hence remove this unused save to r0 in plpar_hcall and plpar_hcall9 Suggested-by: Naveen N Rao Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://msgid.link/20230929172337.7906-2-atrajeev@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hvCall.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2addf2ea03f0..2b0cac6fb61f 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,8 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 @@ -295,8 +293,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 -- cgit From 885291ab687e83085b9f450ec1efaed44a8a7ab6 Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Wed, 20 Sep 2023 03:11:36 -0300 Subject: drm/tests: Fix kunit_release_action ctx argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kunit_action_platform_driver_unregister is added with &fake_platform_driver as ctx, but the kunit_release_action is called pdev as ctx. Fix that by replacing it with &fake_platform_driver. Fixes: 4f2b0b583baa ("drm/tests: helpers: Switch to kunit actions") Signed-off-by: Arthur Grillo Reviewed-by: Maíra Canal Acked-by: Maxime Ripard Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20230920-kunit-kasan-fixes-v1-1-1a0fc261832d@riseup.net --- drivers/gpu/drm/tests/drm_kunit_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index c1dfbfcaa000..bccb33b900f3 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -118,7 +118,7 @@ void drm_kunit_helper_free_device(struct kunit *test, struct device *dev) kunit_release_action(test, kunit_action_platform_driver_unregister, - pdev); + &fake_platform_driver); } EXPORT_SYMBOL_GPL(drm_kunit_helper_free_device); -- cgit From 9077fc228f09c9f975c498c55f5d2e882cd0da59 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 28 Sep 2023 18:15:58 +0800 Subject: bpf: Use kmalloc_size_roundup() to adjust size_index Commit d52b59315bf5 ("bpf: Adjust size_index according to the value of KMALLOC_MIN_SIZE") uses KMALLOC_MIN_SIZE to adjust size_index, but as reported by Nathan, the adjustment is not enough, because __kmalloc_minalign() also decides the minimal alignment of slab object as shown in new_kmalloc_cache() and its value may be greater than KMALLOC_MIN_SIZE (e.g., 64 bytes vs 8 bytes under a riscv QEMU VM). Instead of invoking __kmalloc_minalign() in bpf subsystem to find the maximal alignment, just using kmalloc_size_roundup() directly to get the corresponding slab object size for each allocation size. If these two sizes are unmatched, adjust size_index to select a bpf_mem_cache with unit_size equal to the object_size of the underlying slab cache for the allocation size. Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/bpf/20230914181407.GA1000274@dev-arch.thelio-3990X/ Signed-off-by: Hou Tao Tested-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20230928101558.2594068-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index cf1941516643..d93ddac283d4 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -965,37 +965,31 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } -/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ static __init int bpf_mem_cache_adjust_size(void) { - unsigned int size, index; + unsigned int size; - /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be - * up-to 256-bytes. + /* Adjusting the indexes in size_index() according to the object_size + * of underlying slab cache, so bpf_mem_alloc() will select a + * bpf_mem_cache with unit_size equal to the object_size of + * the underlying slab cache. + * + * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is + * 256-bytes, so only do adjustment for [8-bytes, 192-bytes]. */ - size = KMALLOC_MIN_SIZE; - if (size <= 192) - index = size_index[(size - 1) / 8]; - else - index = fls(size - 1) - 1; - for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) - size_index[(size - 1) / 8] = index; + for (size = 192; size >= 8; size -= 8) { + unsigned int kmalloc_size, index; - /* The minimal alignment is 64-bytes, so disable 96-bytes cache and - * use 128-bytes cache instead. - */ - if (KMALLOC_MIN_SIZE >= 64) { - index = size_index[(128 - 1) / 8]; - for (size = 64 + 8; size <= 96; size += 8) - size_index[(size - 1) / 8] = index; - } + kmalloc_size = kmalloc_size_roundup(size); + if (kmalloc_size == size) + continue; - /* The minimal alignment is 128-bytes, so disable 192-bytes cache and - * use 256-bytes cache instead. - */ - if (KMALLOC_MIN_SIZE >= 128) { - index = fls(256 - 1) - 1; - for (size = 128 + 8; size <= 192; size += 8) + if (kmalloc_size <= 192) + index = size_index[(kmalloc_size - 1) / 8]; + else + index = fls(kmalloc_size - 1) - 1; + /* Only overwrite if necessary */ + if (size_index[(size - 1) / 8] != index) size_index[(size - 1) / 8] = index; } -- cgit From 383eba9f9a7f4cd639d367ea5daa6df2be392c54 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 19 Sep 2023 14:42:22 +0200 Subject: power: supply: qcom_battmgr: fix battery_id type qcom_battmgr_update_request.battery_id is written to using cpu_to_le32() and should be of type __le32, just like all other 32bit integer requests for qcom_battmgr. Cc: stable@vger.kernel.org # 6.3 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309162149.4owm9iXc-lkp@intel.com/ Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20230919124222.1155894-1-sebastian.reichel@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index de77df97b3a4..a05fd00711f6 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -105,7 +105,7 @@ struct qcom_battmgr_property_request { struct qcom_battmgr_update_request { struct pmic_glink_hdr hdr; - u32 battery_id; + __le32 battery_id; }; struct qcom_battmgr_charge_time_request { -- cgit From 8894b432548851f705f72ff135d3dcbd442a18d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Sep 2023 12:16:49 +0200 Subject: power: supply: qcom_battmgr: fix enable request endianness Add the missing endianness conversion when sending the enable request so that the driver will work also on a hypothetical big-endian machine. This issue was reported by sparse. Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Cc: stable@vger.kernel.org # 6.3 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20230929101649.20206-1-johan+linaro@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index a05fd00711f6..ec163d1bcd18 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -1282,9 +1282,9 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) { struct qcom_battmgr *battmgr = container_of(work, struct qcom_battmgr, enable_work); struct qcom_battmgr_enable_request req = { - .hdr.owner = PMIC_GLINK_OWNER_BATTMGR, - .hdr.type = PMIC_GLINK_NOTIFY, - .hdr.opcode = BATTMGR_REQUEST_NOTIFICATION, + .hdr.owner = cpu_to_le32(PMIC_GLINK_OWNER_BATTMGR), + .hdr.type = cpu_to_le32(PMIC_GLINK_NOTIFY), + .hdr.opcode = cpu_to_le32(BATTMGR_REQUEST_NOTIFICATION), }; int ret; -- cgit From 1e0cb399c7653462d9dadf8ab9425337c355d358 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 29 Sep 2023 18:01:13 -0400 Subject: ring-buffer: Update "shortest_full" in polling It was discovered that the ring buffer polling was incorrectly stating that read would not block, but that's because polling did not take into account that reads will block if the "buffer-percent" was set. Instead, the ring buffer polling would say reads would not block if there was any data in the ring buffer. This was incorrect behavior from a user space point of view. This was fixed by commit 42fb0a1e84ff by having the polling code check if the ring buffer had more data than what the user specified "buffer percent" had. The problem now is that the polling code did not register itself to the writer that it wanted to wait for a specific "full" value of the ring buffer. The result was that the writer would wake the polling waiter whenever there was a new event. The polling waiter would then wake up, see that there's not enough data in the ring buffer to notify user space and then go back to sleep. The next event would wake it up again. Before the polling fix was added, the code would wake up around 100 times for a hackbench 30 benchmark. After the "fix", due to the constant waking of the writer, it would wake up over 11,0000 times! It would never leave the kernel, so the user space behavior was still "correct", but this definitely is not the desired effect. To fix this, have the polling code add what it's waiting for to the "shortest_full" variable, to tell the writer not to wake it up if the buffer is not as full as it expects to be. Note, after this fix, it appears that the waiter is now woken up around 2x the times it was before (~200). This is a tremendous improvement from the 11,000 times, but I will need to spend some time to see why polling is more aggressive in its wakeups than the read blocking code. Link: https://lore.kernel.org/linux-trace-kernel/20230929180113.01c2cae3@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Fixes: 42fb0a1e84ff ("tracing/ring-buffer: Have polling block on watermark") Reported-by: Julia Lawall Tested-by: Julia Lawall Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28daf0ce95c5..515cafdb18d9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1137,6 +1137,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, if (full) { poll_wait(filp, &work->full_waiters, poll_table); work->full_waiters_pending = true; + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; } else { poll_wait(filp, &work->waiters, poll_table); work->waiters_pending = true; -- cgit From 23cce5f25491968b23fb9c399bbfb25f13870cd9 Mon Sep 17 00:00:00 2001 From: Clément Léger Date: Fri, 29 Sep 2023 21:16:37 +0200 Subject: tracing: relax trace_event_eval_update() execution with cond_resched() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When kernel is compiled without preemption, the eval_map_work_func() (which calls trace_event_eval_update()) will not be preempted up to its complete execution. This can actually cause a problem since if another CPU call stop_machine(), the call will have to wait for the eval_map_work_func() function to finish executing in the workqueue before being able to be scheduled. This problem was observe on a SMP system at boot time, when the CPU calling the initcalls executed clocksource_done_booting() which in the end calls stop_machine(). We observed a 1 second delay because one CPU was executing eval_map_work_func() and was not preempted by the stop_machine() task. Adding a call to cond_resched() in trace_event_eval_update() allows other tasks to be executed and thus continue working asynchronously like before without blocking any pending task at boot time. Link: https://lore.kernel.org/linux-trace-kernel/20230929191637.416931-1-cleger@rivosinc.com Cc: Masami Hiramatsu Signed-off-by: Clément Léger Tested-by: Atish Patra Reviewed-by: Atish Patra Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 91951d038ba4..f49d6ddb6342 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2770,6 +2770,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) update_event_fields(call, map[i]); } } + cond_resched(); } up_write(&trace_event_sem); } -- cgit From 2de9ee94054263940122aee8720e902b30c27930 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Mon, 25 Sep 2023 23:08:28 +0000 Subject: tracing/user_events: Align set_bit() address for all archs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures should use a long aligned address passed to set_bit(). User processes can pass either a 32-bit or 64-bit sized value to be updated when tracing is enabled when on a 64-bit kernel. Both cases are ensured to be naturally aligned, however, that is not enough. The address must be long aligned without affecting checks on the value within the user process which require different adjustments for the bit for little and big endian CPUs. Add a compat flag to user_event_enabler that indicates when a 32-bit value is being used on a 64-bit kernel. Long align addresses and correct the bit to be used by set_bit() to account for this alignment. Ensure compat flags are copied during forks and used during deletion clears. Link: https://lore.kernel.org/linux-trace-kernel/20230925230829.341-2-beaub@linux.microsoft.com Link: https://lore.kernel.org/linux-trace-kernel/20230914131102.179100-1-cleger@rivosinc.com/ Cc: stable@vger.kernel.org Fixes: 7235759084a4 ("tracing/user_events: Use remote writes for event enablement") Reported-by: Clément Léger Suggested-by: Clément Léger Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 58 +++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 6f046650e527..b87f41187c6a 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -127,8 +127,13 @@ struct user_event_enabler { /* Bit 7 is for freeing status of enablement */ #define ENABLE_VAL_FREEING_BIT 7 -/* Only duplicate the bit value */ -#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK +/* Bit 8 is for marking 32-bit on 64-bit */ +#define ENABLE_VAL_32_ON_64_BIT 8 + +#define ENABLE_VAL_COMPAT_MASK (1 << ENABLE_VAL_32_ON_64_BIT) + +/* Only duplicate the bit and compat values */ +#define ENABLE_VAL_DUP_MASK (ENABLE_VAL_BIT_MASK | ENABLE_VAL_COMPAT_MASK) #define ENABLE_BITOPS(e) (&(e)->values) @@ -174,6 +179,30 @@ struct user_event_validator { int flags; }; +static inline void align_addr_bit(unsigned long *addr, int *bit, + unsigned long *flags) +{ + if (IS_ALIGNED(*addr, sizeof(long))) { +#ifdef __BIG_ENDIAN + /* 32 bit on BE 64 bit requires a 32 bit offset when aligned. */ + if (test_bit(ENABLE_VAL_32_ON_64_BIT, flags)) + *bit += 32; +#endif + return; + } + + *addr = ALIGN_DOWN(*addr, sizeof(long)); + + /* + * We only support 32 and 64 bit values. The only time we need + * to align is a 32 bit value on a 64 bit kernel, which on LE + * is always 32 bits, and on BE requires no change when unaligned. + */ +#ifdef __LITTLE_ENDIAN + *bit += 32; +#endif +} + typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, void *tpdata, bool *faulted); @@ -482,6 +511,7 @@ static int user_event_enabler_write(struct user_event_mm *mm, unsigned long *ptr; struct page *page; void *kaddr; + int bit = ENABLE_BIT(enabler); int ret; lockdep_assert_held(&event_mutex); @@ -497,6 +527,8 @@ static int user_event_enabler_write(struct user_event_mm *mm, test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) return -EBUSY; + align_addr_bit(&uaddr, &bit, ENABLE_BITOPS(enabler)); + ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, &page, NULL); @@ -515,9 +547,9 @@ static int user_event_enabler_write(struct user_event_mm *mm, /* Update bit atomically, user tracers must be atomic as well */ if (enabler->event && enabler->event->status) - set_bit(ENABLE_BIT(enabler), ptr); + set_bit(bit, ptr); else - clear_bit(ENABLE_BIT(enabler), ptr); + clear_bit(bit, ptr); kunmap_local(kaddr); unpin_user_pages_dirty_lock(&page, 1, true); @@ -849,6 +881,12 @@ static struct user_event_enabler enabler->event = user; enabler->addr = uaddr; enabler->values = reg->enable_bit; + +#if BITS_PER_LONG >= 64 + if (reg->enable_size == 4) + set_bit(ENABLE_VAL_32_ON_64_BIT, ENABLE_BITOPS(enabler)); +#endif + retry: /* Prevents state changes from racing with new enablers */ mutex_lock(&event_mutex); @@ -2377,7 +2415,8 @@ static long user_unreg_get(struct user_unreg __user *ureg, } static int user_event_mm_clear_bit(struct user_event_mm *user_mm, - unsigned long uaddr, unsigned char bit) + unsigned long uaddr, unsigned char bit, + unsigned long flags) { struct user_event_enabler enabler; int result; @@ -2385,7 +2424,7 @@ static int user_event_mm_clear_bit(struct user_event_mm *user_mm, memset(&enabler, 0, sizeof(enabler)); enabler.addr = uaddr; - enabler.values = bit; + enabler.values = bit | flags; retry: /* Prevents state changes from racing with new enablers */ mutex_lock(&event_mutex); @@ -2415,6 +2454,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) struct user_event_mm *mm = current->user_event_mm; struct user_event_enabler *enabler, *next; struct user_unreg reg; + unsigned long flags; long ret; ret = user_unreg_get(ureg, ®); @@ -2425,6 +2465,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) if (!mm) return -ENOENT; + flags = 0; ret = -ENOENT; /* @@ -2441,6 +2482,9 @@ static long user_events_ioctl_unreg(unsigned long uarg) ENABLE_BIT(enabler) == reg.disable_bit) { set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); + /* We must keep compat flags for the clear */ + flags |= enabler->values & ENABLE_VAL_COMPAT_MASK; + if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) user_event_enabler_destroy(enabler, true); @@ -2454,7 +2498,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) /* Ensure bit is now cleared for user, regardless of event status */ if (!ret) ret = user_event_mm_clear_bit(mm, reg.disable_addr, - reg.disable_bit); + reg.disable_bit, flags); return ret; } -- cgit From 2598bd3ca8dcf5bbca1161ee5b271b432398da37 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sat, 30 Sep 2023 09:01:06 -0400 Subject: eventfs: Test for dentries array allocated in eventfs_release() The dcache_dir_open_wrapper() could be called when a dynamic event is being deleted leaving a dentry with no children. In this case the dlist->dentries array will never be allocated. This needs to be checked for in eventfs_release(), otherwise it will trigger a NULL pointer dereference. Link: https://lore.kernel.org/linux-trace-kernel/20230930090106.1c3164e9@rorschach.local.home Cc: Mark Rutland Acked-by: Masami Hiramatsu (Google) Fixes: ef36b4f92868 ("eventfs: Remember what dentries were created on dir open") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5f1714089884..8c8d64e76103 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -421,7 +421,7 @@ static int eventfs_release(struct inode *inode, struct file *file) if (WARN_ON_ONCE(!dlist)) return -EINVAL; - for (i = 0; dlist->dentries[i]; i++) { + for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { dput(dlist->dentries[i]); } -- cgit From cbc3d00cf88fda95dbcafee3b38655b7a8f2650a Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 28 Sep 2023 17:28:07 -0300 Subject: modpost: add missing else to the "of" check Without this 'else' statement, an "usb" name goes into two handlers: the first/previous 'if' statement _AND_ the for-loop over 'devtable', but the latter is useless as it has no 'usb' device_id entry anyway. Tested with allmodconfig before/after patch; no changes to *.mod.c: git checkout v6.6-rc3 make -j$(nproc) allmodconfig make -j$(nproc) olddefconfig make -j$(nproc) find . -name '*.mod.c' | cpio -pd /tmp/before # apply patch make -j$(nproc) find . -name '*.mod.c' | cpio -pd /tmp/after diff -r /tmp/before/ /tmp/after/ # no difference Fixes: acbef7b76629 ("modpost: fix module autoloading for OF devices with generic compatible property") Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Masahiro Yamada --- scripts/mod/file2alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 38120f932b0d..7056751c29b1 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1604,7 +1604,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, /* First handle the "special" cases */ if (sym_is(name, namelen, "usb")) do_usb_table(symval, sym->st_size, mod); - if (sym_is(name, namelen, "of")) + else if (sym_is(name, namelen, "of")) do_of_table(symval, sym->st_size, mod); else if (sym_is(name, namelen, "pnp")) do_pnp_device_entry(symval, sym->st_size, mod); -- cgit From 15e86643d5b6df08ebd65c7b5aa607fd2ed2d9d1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 30 Sep 2023 16:13:35 +0900 Subject: vmlinux.lds.h: remove unused CPU_KEEP and CPU_DISCARD macros Remove the left-over of commit e24f6628811e ("modpost: remove all traces of cpuinit/cpuexit sections"). Signed-off-by: Masahiro Yamada Acked-by: Paul Gortmaker --- include/asm-generic/vmlinux.lds.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9c59409104f6..67d8dd2f1bde 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -138,13 +138,6 @@ * are handled as text/data or they can be discarded (which * often happens at runtime) */ -#ifdef CONFIG_HOTPLUG_CPU -#define CPU_KEEP(sec) *(.cpu##sec) -#define CPU_DISCARD(sec) -#else -#define CPU_KEEP(sec) -#define CPU_DISCARD(sec) *(.cpu##sec) -#endif #if defined(CONFIG_MEMORY_HOTPLUG) #define MEM_KEEP(sec) *(.mem##sec) -- cgit From f177cd0c15fcc7bdbb68d8d1a3166dead95314c8 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sat, 30 Sep 2023 18:52:04 +0200 Subject: modpost: Don't let "driver"s reference .exit.* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers must not reference functions marked with __exit as these likely are not available when the code is built-in. There are few creative offenders uncovered for example in ARCH=amd64 allmodconfig builds. So only trigger the section mismatch warning for W=1 builds. The dual rule that drivers must not reference .init.* is implemented since commit 0db252452378 ("modpost: don't allow *driver to reference .init.*") which however missed that .exit.* should be handled in the same way. Thanks to Masahiro Yamada and Arnd Bergmann who gave valuable hints to find this improvement. Signed-off-by: Uwe Kleine-König Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index de499dce5265..b3dee80497cb 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1015,9 +1015,20 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, "*_console"))) return 0; - /* symbols in data sections that may refer to meminit/exit sections */ + /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_EXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_XXXEXIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until + * these are fixed. + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && + match(tosec, PATTERNS(EXIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; -- cgit From 6b09edc1b31762af58d3d95754354ca6a92d39c0 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Thu, 21 Sep 2023 14:24:43 +0800 Subject: net: stmmac: platform: fix the incorrect parameter The second parameter of stmmac_pltfr_init() needs the pointer of "struct plat_stmmacenet_data". So, correct the parameter typo when calling the function. Otherwise, it may cause this alignment exception when doing suspend/resume. [ 49.067201] CPU1 is up [ 49.135258] Internal error: SP/PC alignment exception: 000000008a000000 [#1] PREEMPT SMP [ 49.143346] Modules linked in: soc_imx9 crct10dif_ce polyval_ce nvmem_imx_ocotp_fsb_s400 polyval_generic layerscape_edac_mod snd_soc_fsl_asoc_card snd_soc_imx_audmux snd_soc_imx_card snd_soc_wm8962 el_enclave snd_soc_fsl_micfil rtc_pcf2127 rtc_pcf2131 flexcan can_dev snd_soc_fsl_xcvr snd_soc_fsl_sai imx8_media_dev(C) snd_soc_fsl_utils fuse [ 49.173393] CPU: 0 PID: 565 Comm: sh Tainted: G C 6.5.0-rc4-next-20230804-05047-g5781a6249dae #677 [ 49.183721] Hardware name: NXP i.MX93 11X11 EVK board (DT) [ 49.189190] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 49.196140] pc : 0x80800052 [ 49.198931] lr : stmmac_pltfr_resume+0x34/0x50 [ 49.203368] sp : ffff800082f8bab0 [ 49.206670] x29: ffff800082f8bab0 x28: ffff0000047d0ec0 x27: ffff80008186c170 [ 49.213794] x26: 0000000b5e4ff1ba x25: ffff800081e5fa74 x24: 0000000000000010 [ 49.220918] x23: ffff800081fe0000 x22: 0000000000000000 x21: 0000000000000000 [ 49.228042] x20: ffff0000001b4010 x19: ffff0000001b4010 x18: 0000000000000006 [ 49.235166] x17: ffff7ffffe007000 x16: ffff800080000000 x15: 0000000000000000 [ 49.242290] x14: 00000000000000fc x13: 0000000000000000 x12: 0000000000000000 [ 49.249414] x11: 0000000000000001 x10: 0000000000000a60 x9 : ffff800082f8b8c0 [ 49.256538] x8 : 0000000000000008 x7 : 0000000000000001 x6 : 000000005f54a200 [ 49.263662] x5 : 0000000001000000 x4 : ffff800081b93680 x3 : ffff800081519be0 [ 49.270786] x2 : 0000000080800052 x1 : 0000000000000000 x0 : ffff0000001b4000 [ 49.277911] Call trace: [ 49.280346] 0x80800052 [ 49.282781] platform_pm_resume+0x2c/0x68 [ 49.286785] dpm_run_callback.constprop.0+0x74/0x134 [ 49.291742] device_resume+0x88/0x194 [ 49.295391] dpm_resume+0x10c/0x230 [ 49.298866] dpm_resume_end+0x18/0x30 [ 49.302515] suspend_devices_and_enter+0x2b8/0x624 [ 49.307299] pm_suspend+0x1fc/0x348 [ 49.310774] state_store+0x80/0x104 [ 49.314258] kobj_attr_store+0x18/0x2c [ 49.318002] sysfs_kf_write+0x44/0x54 [ 49.321659] kernfs_fop_write_iter+0x120/0x1ec [ 49.326088] vfs_write+0x1bc/0x300 [ 49.329485] ksys_write+0x70/0x104 [ 49.332874] __arm64_sys_write+0x1c/0x28 [ 49.336783] invoke_syscall+0x48/0x114 [ 49.340527] el0_svc_common.constprop.0+0xc4/0xe4 [ 49.345224] do_el0_svc+0x38/0x98 [ 49.348526] el0_svc+0x2c/0x84 [ 49.351568] el0t_64_sync_handler+0x100/0x12c [ 49.355910] el0t_64_sync+0x190/0x194 [ 49.359567] Code: ???????? ???????? ???????? ???????? (????????) [ 49.365644] ---[ end trace 0000000000000000 ]--- Fixes: 97117eb51ec8 ("net: stmmac: platform: provide stmmac_pltfr_init()") Signed-off-by: Clark Wang Reviewed-by: Jacob Keller Reviewed-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0f28795e581c..2f0678f15fb7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -901,7 +901,7 @@ static int __maybe_unused stmmac_pltfr_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); int ret; - ret = stmmac_pltfr_init(pdev, priv->plat->bsp_priv); + ret = stmmac_pltfr_init(pdev, priv->plat); if (ret) return ret; -- cgit From 2d7d1bc119a4d7f54cfe0b1be480c34e8c712d06 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 Oct 2023 23:03:39 +0900 Subject: kbuild: remove stale code for 'source' symlink in packaging scripts Since commit d8131c2965d5 ("kbuild: remove $(MODLIB)/source symlink"), modules_install does not create the 'source' symlink. Remove the stale code from builddeb and kernel.spec. Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 1 - scripts/package/kernel.spec | 3 --- 2 files changed, 4 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index bf3f8561aa68..d7dd0d04c70c 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -64,7 +64,6 @@ install_linux_image () { ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" modules_install rm -f "${pdir}/lib/modules/${KERNELRELEASE}/build" - rm -f "${pdir}/lib/modules/${KERNELRELEASE}/source" # Install the kernel if [ "${ARCH}" = um ] ; then diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index ac3f2ee6d7a0..3eee0143e0c5 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -68,7 +68,6 @@ cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEA cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} cp .config %{buildroot}/boot/config-%{KERNELRELEASE} ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build -ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/source %if %{with_devel} %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}' %endif @@ -101,7 +100,6 @@ fi %defattr (-, root, root) /lib/modules/%{KERNELRELEASE} %exclude /lib/modules/%{KERNELRELEASE}/build -%exclude /lib/modules/%{KERNELRELEASE}/source /boot/* %files headers @@ -113,5 +111,4 @@ fi %defattr (-, root, root) /usr/src/kernels/%{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/build -/lib/modules/%{KERNELRELEASE}/source %endif -- cgit From 25563b581ba3a1f263a00e8c9a97f5e7363be6fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 08:46:26 +0000 Subject: net: fix possible store tearing in neigh_periodic_work() While looking at a related syzbot report involving neigh_periodic_work(), I found that I forgot to add an annotation when deleting an RCU protected item from a list. Readers use rcu_deference(*np), we need to use either rcu_assign_pointer() or WRITE_ONCE() on writer side to prevent store tearing. I use rcu_assign_pointer() to have lockdep support, this was the choice made in neigh_flush_dev(). Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6b76cd103195..7212c7e521ef 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -988,7 +988,9 @@ static void neigh_periodic_work(struct work_struct *work) (state == NUD_FAILED || !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); -- cgit From 5baa0433a15eadd729625004c37463acb982eca7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 09:27:13 +0000 Subject: neighbour: fix data-races around n->output n->output field can be read locklessly, while a writer might change the pointer concurrently. Add missing annotations to prevent load-store tearing. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- net/bridge/br_netfilter_hooks.c | 2 +- net/core/neighbour.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6da68886fabb..07022bb0d44d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -539,7 +539,7 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - return n->output(n, skb); + return READ_ONCE(n->output)(n, skb); } static inline struct neighbour * diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 15186247b59a..033034d68f1f 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -294,7 +294,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ /* tell br_dev_xmit to continue with forwarding */ nf_bridge->bridged_dnat = 1; /* FIXME Need to refragment */ - ret = neigh->output(neigh, skb); + ret = READ_ONCE(neigh->output)(neigh, skb); } neigh_release(neigh); return ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7212c7e521ef..9c09f091cbff 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -410,7 +410,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, */ __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; - n->output = neigh_blackhole; + WRITE_ONCE(n->output, neigh_blackhole); if (n->nud_state & NUD_VALID) n->nud_state = NUD_NOARP; else @@ -920,7 +920,7 @@ static void neigh_suspect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->output = neigh->ops->output; + WRITE_ONCE(neigh->output, neigh->ops->output); } /* Neighbour state is OK; @@ -932,7 +932,7 @@ static void neigh_connect(struct neighbour *neigh) { neigh_dbg(2, "neigh %p is connected\n", neigh); - neigh->output = neigh->ops->connected_output; + WRITE_ONCE(neigh->output, neigh->ops->connected_output); } static void neigh_periodic_work(struct work_struct *work) @@ -1449,7 +1449,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, if (n2) n1 = n2; } - n1->output(n1, skb); + READ_ONCE(n1->output)(n1, skb); if (n2) neigh_release(n2); rcu_read_unlock(); @@ -3155,7 +3155,7 @@ int neigh_xmit(int index, struct net_device *dev, rcu_read_unlock(); goto out_kfree_skb; } - err = neigh->output(neigh, skb); + err = READ_ONCE(neigh->output)(neigh, skb); rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { -- cgit From 9d4c75800f61e5d75c1659ba201b6c0c7ead3070 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Sep 2023 11:41:19 +0100 Subject: ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data() Including the transhdrlen in length is a problem when the packet is partially filled (e.g. something like send(MSG_MORE) happened previously) when appending to an IPv4 or IPv6 packet as we don't want to repeat the transport header or account for it twice. This can happen under some circumstances, such as splicing into an L2TP socket. The symptom observed is a warning in __ip6_append_data(): WARNING: CPU: 1 PID: 5042 at net/ipv6/ip6_output.c:1800 __ip6_append_data.isra.0+0x1be8/0x47f0 net/ipv6/ip6_output.c:1800 that occurs when MSG_SPLICE_PAGES is used to append more data to an already partially occupied skbuff. The warning occurs when 'copy' is larger than the amount of data in the message iterator. This is because the requested length includes the transport header length when it shouldn't. This can be triggered by, for example: sfd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP); bind(sfd, ...); // ::1 connect(sfd, ...); // ::1 port 7 send(sfd, buffer, 4100, MSG_MORE); sendfile(sfd, dfd, NULL, 1024); Fix this by only adding transhdrlen into the length if the write queue is empty in l2tp_ip6_sendmsg(), analogously to how UDP does things. l2tp_ip_sendmsg() looks like it won't suffer from this problem as it builds the UDP packet itself. Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Reported-by: syzbot+62cbf263225ae13ff153@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/0000000000001c12b30605378ce8@google.com/ Suggested-by: Willem de Bruijn Signed-off-by: David Howells cc: Eric Dumazet cc: Willem de Bruijn cc: "David S. Miller" cc: David Ahern cc: Paolo Abeni cc: Jakub Kicinski cc: netdev@vger.kernel.org cc: bpf@vger.kernel.org cc: syzkaller-bugs@googlegroups.com Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ed8ebb6f5909..11f3d375cec0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -507,7 +507,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ if (len > INT_MAX - transhdrlen) return -EMSGSIZE; - ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) @@ -628,6 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); + ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, -- cgit From 26297b4ce1ce4ea40bc9a48ec99f45da3f64d2e2 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Thu, 21 Sep 2023 18:46:40 -0500 Subject: net: replace calls to sock->ops->connect() with kernel_connect() commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect") ensured that kernel_connect() will not overwrite the address parameter in cases where BPF connect hooks perform an address rewrite. This change replaces direct calls to sock->ops->connect() in net with kernel_connect() to make these call safe. Link: https://lore.kernel.org/netdev/20230912013332.2048422-1-jrife@google.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Signed-off-by: Jordan Rife Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/rds/tcp_connect.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index da5af28ff57b..6e4ed1e11a3b 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1505,8 +1505,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - salen, 0); + result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index f0c477c5d1db..d788c6d28986 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) * own the socket */ rds_tcp_set_callbacks(sock, cp); - ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); + ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK); rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) -- cgit From 86a7e0b69bd5b812e48a20c66c2161744f3caa16 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Thu, 21 Sep 2023 18:46:41 -0500 Subject: net: prevent rewrite of msg_name in sock_sendmsg() Callers of sock_sendmsg(), and similarly kernel_sendmsg(), in kernel space may observe their value of msg_name change in cases where BPF sendmsg hooks rewrite the send address. This has been confirmed to break NFS mounts running in UDP mode and has the potential to break other systems. This patch: 1) Creates a new function called __sock_sendmsg() with same logic as the old sock_sendmsg() function. 2) Replaces calls to sock_sendmsg() made by __sys_sendto() and __sys_sendmsg() with __sock_sendmsg() to avoid an unnecessary copy, as these system calls are already protected. 3) Modifies sock_sendmsg() so that it makes a copy of msg_name if present before passing it down the stack to insulate callers from changes to the send address. Link: https://lore.kernel.org/netdev/20230912013332.2048422-1-jrife@google.com/ Fixes: 1cedee13d25a ("bpf: Hooks for sys_sendmsg") Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Signed-off-by: Jordan Rife Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/socket.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/net/socket.c b/net/socket.c index c8b08b32f097..a39ec136f5cf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -737,6 +737,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) return ret; } +static int __sock_sendmsg(struct socket *sock, struct msghdr *msg) +{ + int err = security_socket_sendmsg(sock, msg, + msg_data_left(msg)); + + return err ?: sock_sendmsg_nosec(sock, msg); +} + /** * sock_sendmsg - send a message through @sock * @sock: socket @@ -747,10 +755,19 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) */ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - int err = security_socket_sendmsg(sock, msg, - msg_data_left(msg)); + struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; + struct sockaddr_storage address; + int ret; - return err ?: sock_sendmsg_nosec(sock, msg); + if (msg->msg_name) { + memcpy(&address, msg->msg_name, msg->msg_namelen); + msg->msg_name = &address; + } + + ret = __sock_sendmsg(sock, msg); + msg->msg_name = save_addr; + + return ret; } EXPORT_SYMBOL(sock_sendmsg); @@ -1138,7 +1155,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = sock_sendmsg(sock, &msg); + res = __sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -2174,7 +2191,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg); + err = __sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -2538,7 +2555,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys); + err = __sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. -- cgit From c889a99a21bf124c3db08d09df919f0eccc5ea4c Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Thu, 21 Sep 2023 18:46:42 -0500 Subject: net: prevent address rewrite in kernel_bind() Similar to the change in commit 0bdf399342c5("net: Avoid address overwrite in kernel_connect"), BPF hooks run on bind may rewrite the address passed to kernel_bind(). This change 1) Makes a copy of the bind address in kernel_bind() to insulate callers. 2) Replaces direct calls to sock->ops->bind() in net with kernel_bind() Link: https://lore.kernel.org/netdev/20230912013332.2048422-1-jrife@google.com/ Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind") Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Signed-off-by: Jordan Rife Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 2 +- net/socket.c | 7 ++++++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6e4ed1e11a3b..4174076c66fa 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1439,7 +1439,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) sin.sin_addr.s_addr = addr; sin.sin_port = 0; - return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); + return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, @@ -1546,7 +1546,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id, get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; - result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); + result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index d788c6d28986..a0046e99d6df 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -145,7 +145,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) addrlen = sizeof(sin); } - ret = sock->ops->bind(sock, addr, addrlen); + ret = kernel_bind(sock, addr, addrlen); if (ret) { rdsdebug("bind failed with %d at address %pI6c\n", ret, &conn->c_laddr); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 014fa24418c1..53b3535a1e4a 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -306,7 +306,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) addr_len = sizeof(*sin); } - ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len); + ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len); if (ret < 0) { rdsdebug("could not bind %s listener socket: %d\n", isv6 ? "IPv6" : "IPv4", ret); diff --git a/net/socket.c b/net/socket.c index a39ec136f5cf..c4a6f5532955 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3516,7 +3516,12 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { - return READ_ONCE(sock->ops)->bind(sock, addr, addrlen); + struct sockaddr_storage address; + + memcpy(&address, addr, addrlen); + + return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address, + addrlen); } EXPORT_SYMBOL(kernel_bind); -- cgit From 8a749fd1a8720d4619c91c8b6e7528c0a355c0aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Oct 2023 14:15:13 -0700 Subject: Linux 6.6-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3de08c780c74..373649c7374e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From caa0578c1d487d39e4bb947a1b4965417053b409 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 22 Sep 2023 17:40:44 +0800 Subject: ptp: ocp: Fix error handling in ptp_ocp_device_init When device_add() fails, ptp_ocp_dev_release() will be called after put_device(). Therefore, it seems that the ptp_ocp_dev_release() before put_device() is redundant. Fixes: 773bda964921 ("ptp: ocp: Expose various resources on the timecard.") Signed-off-by: Dinghao Liu Reviewed-by: Vadim Feodrenko Signed-off-by: David S. Miller --- drivers/ptp/ptp_ocp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 20a974ced8d6..a7a6947ab4bc 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3998,7 +3998,6 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) return 0; out: - ptp_ocp_dev_release(&bp->dev); put_device(&bp->dev); return err; } -- cgit From 6ccf50d4d4741e064ba35511a95402c63bbe21a8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 22 Sep 2023 09:47:41 -0300 Subject: net: dsa: mv88e6xxx: Avoid EEPROM timeout when EEPROM is absent Since commit 23d775f12dcd ("net: dsa: mv88e6xxx: Wait for EEPROM done before HW reset") the following error is seen on a imx8mn board with a 88E6320 switch: mv88e6085 30be0000.ethernet-1:00: Timeout waiting for EEPROM done This board does not have an EEPROM attached to the switch though. This problem is well explained by Andrew Lunn: "If there is an EEPROM, and the EEPROM contains a lot of data, it could be that when we perform a hardware reset towards the end of probe, it interrupts an I2C bus transaction, leaving the I2C bus in a bad state, and future reads of the EEPROM do not work. The work around for this was to poll the EEInt status and wait for it to go true before performing the hardware reset. However, we have discovered that for some boards which do not have an EEPROM, EEInt never indicates complete. As a result, mv88e6xxx_g1_wait_eeprom_done() spins for a second and then prints a warning. We probably need a different solution than calling mv88e6xxx_g1_wait_eeprom_done(). The datasheet for 6352 documents the EEPROM Command register: bit 15 is: EEPROM Unit Busy. This bit must be set to a one to start an EEPROM operation (see EEOp below). Only one EEPROM operation can be executing at one time so this bit must be zero before setting it to a one. When the requested EEPROM operation completes this bit will automatically be cleared to a zero. The transition of this bit from a one to a zero can be used to generate an interrupt (the EEInt in Global 1, offset 0x00). and more interesting is bit 11: Register Loader Running. This bit is set to one whenever the register loader is busy executing instructions contained in the EEPROM." Change to using mv88e6xxx_g2_eeprom_wait() to fix the timeout error when the EEPROM chip is not present. Fixes: 23d775f12dcd ("net: dsa: mv88e6xxx: Wait for EEPROM done before HW reset") Suggested-by: Andrew Lunn Signed-off-by: Fabio Estevam Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 6 ++++-- drivers/net/dsa/mv88e6xxx/global1.c | 31 ------------------------------- drivers/net/dsa/mv88e6xxx/global1.h | 1 - drivers/net/dsa/mv88e6xxx/global2.c | 2 +- drivers/net/dsa/mv88e6xxx/global2.h | 1 + 5 files changed, 6 insertions(+), 35 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 52a99d8bada0..ab434a77b059 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2958,14 +2958,16 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) * from the wrong location resulting in the switch booting * to wrong mode and inoperable. */ - mv88e6xxx_g1_wait_eeprom_done(chip); + if (chip->info->ops->get_eeprom) + mv88e6xxx_g2_eeprom_wait(chip); gpiod_set_value_cansleep(gpiod, 1); usleep_range(10000, 20000); gpiod_set_value_cansleep(gpiod, 0); usleep_range(10000, 20000); - mv88e6xxx_g1_wait_eeprom_done(chip); + if (chip->info->ops->get_eeprom) + mv88e6xxx_g2_eeprom_wait(chip); } } diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index 2fa55a643591..174c773b38c2 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -75,37 +75,6 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip) return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1); } -void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip) -{ - const unsigned long timeout = jiffies + 1 * HZ; - u16 val; - int err; - - /* Wait up to 1 second for the switch to finish reading the - * EEPROM. - */ - while (time_before(jiffies, timeout)) { - err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val); - if (err) { - dev_err(chip->dev, "Error reading status"); - return; - } - - /* If the switch is still resetting, it may not - * respond on the bus, and so MDIO read returns - * 0xffff. Differentiate between that, and waiting for - * the EEPROM to be done by bit 0 being set. - */ - if (val != 0xffff && - val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE)) - return; - - usleep_range(1000, 2000); - } - - dev_err(chip->dev, "Timeout waiting for EEPROM done"); -} - /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1 * Offset 0x02: Switch MAC Address Register Bytes 2 & 3 * Offset 0x03: Switch MAC Address Register Bytes 4 & 5 diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index c99ddd117fe6..1095261f5b49 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -282,7 +282,6 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip); -void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip); diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 937a01f2ba75..b2b5f6ba438f 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -340,7 +340,7 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) * Offset 0x15: EEPROM Addr (for 8-bit data access) */ -static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) +int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) { int bit = __bf_shf(MV88E6XXX_G2_EEPROM_CMD_BUSY); int err; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 7e091965582b..d9434f7cae53 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -365,6 +365,7 @@ int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, int port); +int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops; -- cgit From 6a70e5cbedaf8ad10528ac9ac114f3ec20f422df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 09:50:39 -0700 Subject: sky2: Make sure there is at least one frag_addr available In the pathological case of building sky2 with 16k PAGE_SIZE, the frag_addr[] array would never be used, so the original code was correct that size should be 0. But the compiler now gets upset with 0 size arrays in places where it hasn't eliminated the code that might access such an array (it can't figure out that in this case an rx skb with fragments would never be created). To keep the compiler happy, make sure there is at least 1 frag_addr in struct rx_ring_info: In file included from include/linux/skbuff.h:28, from include/net/net_namespace.h:43, from include/linux/netdevice.h:38, from drivers/net/ethernet/marvell/sky2.c:18: drivers/net/ethernet/marvell/sky2.c: In function 'sky2_rx_unmap_skb': include/linux/dma-mapping.h:416:36: warning: array subscript i is outside array bounds of 'dma_addr_t[0]' {aka 'long long unsigned int[]'} [-Warray-bounds=] 416 | #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/marvell/sky2.c:1257:17: note: in expansion of macro 'dma_unmap_page' 1257 | dma_unmap_page(&pdev->dev, re->frag_addr[i], | ^~~~~~~~~~~~~~ In file included from drivers/net/ethernet/marvell/sky2.c:41: drivers/net/ethernet/marvell/sky2.h:2198:25: note: while referencing 'frag_addr' 2198 | dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT]; | ^~~~~~~~~ With CONFIG_PAGE_SIZE_16KB=y, PAGE_SHIFT == 14, so: #define ETH_JUMBO_MTU 9000 causes "ETH_JUMBO_MTU >> PAGE_SHIFT" to be 0. Use "?: 1" to solve this build warning. Cc: Mirko Lindner Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309191958.UBw1cjXk-lkp@intel.com/ Reviewed-by: Alexander Lobakin Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.h b/drivers/net/ethernet/marvell/sky2.h index ddec1627f1a7..8d0bacf4e49c 100644 --- a/drivers/net/ethernet/marvell/sky2.h +++ b/drivers/net/ethernet/marvell/sky2.h @@ -2195,7 +2195,7 @@ struct rx_ring_info { struct sk_buff *skb; dma_addr_t data_addr; DEFINE_DMA_UNMAP_LEN(data_size); - dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT]; + dma_addr_t frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT ?: 1]; }; enum flow_control { -- cgit From f0575116507b981e6a810e78ce3c9040395b958b Mon Sep 17 00:00:00 2001 From: Duje Mihanović Date: Fri, 29 Sep 2023 17:41:57 +0200 Subject: gpio: pxa: disable pinctrl calls for MMP_GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to PXA3xx and MMP2, pinctrl-single isn't capable of setting pin direction on MMP either. Fixes: a770d946371e ("gpio: pxa: add pin control gpio direction and request") Signed-off-by: Duje Mihanović Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pxa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 7e9f7a32d3ee..cae9661862fe 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -237,6 +237,7 @@ static bool pxa_gpio_has_pinctrl(void) switch (gpio_type) { case PXA3XX_GPIO: case MMP2_GPIO: + case MMP_GPIO: return false; default: -- cgit From 760d03f90bf35a0724acd71a60c1ad753fdaa53e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 17 Sep 2023 15:47:16 +0200 Subject: media: pxa_camera: Fix an error handling path in pxa_camera_probe() The commit in Fixes has reordered the code and the error handling path. However one 'goto' was missed. Fix it and branch at the correct place in the error handling path. Fixes: 5073d10cbaba ("media: pxa_camera: Register V4L2 device early") Signed-off-by: Christophe JAILLET Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/platform/intel/pxa_camera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/intel/pxa_camera.c b/drivers/media/platform/intel/pxa_camera.c index 6e6caf50e11e..59b89e421dc2 100644 --- a/drivers/media/platform/intel/pxa_camera.c +++ b/drivers/media/platform/intel/pxa_camera.c @@ -2398,7 +2398,7 @@ static int pxa_camera_probe(struct platform_device *pdev) PXA_CAM_DRV_NAME, pcdev); if (err) { dev_err(&pdev->dev, "Camera interrupt register failed\n"); - goto exit_v4l2_device_unregister; + goto exit_deactivate; } pcdev->notifier.ops = &pxa_camera_sensor_ops; -- cgit From 4670c8c3fb04e0e83e809ee57f94d1a4a704d26c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 27 Sep 2023 11:53:12 +0300 Subject: media: ipu-bridge: Fix Kconfig dependencies The current Kconfig symbol dependencies allow having the following Kconfig symbol values CONFIG_IPU_BRIDGE=m CONFIG_VIDEO_IPU3_CIO2=y CONFIG_CIO2_BRIDGE=n This does not work as the IPU bridge API is conditional to IS_ENABLED(CONFIG_IPU_BRIDGE). Fix this by changing the dependencies so that CONFIG_IPU_BRIDGE can be separately selected. The CONFIG_CIO2_BRIDGE symbol becomes redundant as a result and is removed. Fixes: 2545a2c02ba1 ("media: ipu3-cio2: allow ipu_bridge to be a module again") Suggested-by: Arnd Bergmann Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/Kconfig | 20 ++++++++++++++------ drivers/media/pci/intel/ipu3/Kconfig | 21 +-------------------- drivers/staging/media/atomisp/Kconfig | 2 +- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/media/pci/intel/Kconfig b/drivers/media/pci/intel/Kconfig index e113902fa806..ee4684159d3d 100644 --- a/drivers/media/pci/intel/Kconfig +++ b/drivers/media/pci/intel/Kconfig @@ -1,11 +1,19 @@ # SPDX-License-Identifier: GPL-2.0-only + +source "drivers/media/pci/intel/ipu3/Kconfig" +source "drivers/media/pci/intel/ivsc/Kconfig" + config IPU_BRIDGE - tristate + tristate "Intel IPU Bridge" depends on I2C && ACPI help - This is a helper module for the IPU bridge, which can be - used by ipu3 and other drivers. In order to handle module - dependencies, this is selected by each driver that needs it. + The IPU bridge is a helper library for Intel IPU drivers to + function on systems shipped with Windows. -source "drivers/media/pci/intel/ipu3/Kconfig" -source "drivers/media/pci/intel/ivsc/Kconfig" + Currently used by the ipu3-cio2 and atomisp drivers. + + Supported systems include: + + - Microsoft Surface models (except Surface Pro 3) + - The Lenovo Miix line (for example the 510, 520, 710 and 720) + - Dell 7285 diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig index 0951545eab21..c0a250daa927 100644 --- a/drivers/media/pci/intel/ipu3/Kconfig +++ b/drivers/media/pci/intel/ipu3/Kconfig @@ -2,13 +2,13 @@ config VIDEO_IPU3_CIO2 tristate "Intel ipu3-cio2 driver" depends on VIDEO_DEV && PCI + depends on IPU_BRIDGE || !IPU_BRIDGE depends on ACPI || COMPILE_TEST depends on X86 select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_FWNODE select VIDEOBUF2_DMA_SG - select IPU_BRIDGE if CIO2_BRIDGE help This is the Intel IPU3 CIO2 CSI-2 receiver unit, found in Intel @@ -18,22 +18,3 @@ config VIDEO_IPU3_CIO2 Say Y or M here if you have a Skylake/Kaby Lake SoC with MIPI CSI-2 connected camera. The module will be called ipu3-cio2. - -config CIO2_BRIDGE - bool "IPU3 CIO2 Sensors Bridge" - depends on VIDEO_IPU3_CIO2 && ACPI - depends on I2C - help - This extension provides an API for the ipu3-cio2 driver to create - connections to cameras that are hidden in the SSDB buffer in ACPI. - It can be used to enable support for cameras in detachable / hybrid - devices that ship with Windows. - - Say Y here if your device is a detachable / hybrid laptop that comes - with Windows installed by the OEM, for example: - - - Microsoft Surface models (except Surface Pro 3) - - The Lenovo Miix line (for example the 510, 520, 710 and 720) - - Dell 7285 - - If in doubt, say N here. diff --git a/drivers/staging/media/atomisp/Kconfig b/drivers/staging/media/atomisp/Kconfig index 5d8917160d41..75c985da75b5 100644 --- a/drivers/staging/media/atomisp/Kconfig +++ b/drivers/staging/media/atomisp/Kconfig @@ -12,12 +12,12 @@ menuconfig INTEL_ATOMISP config VIDEO_ATOMISP tristate "Intel Atom Image Signal Processor Driver" depends on VIDEO_DEV && INTEL_ATOMISP + depends on IPU_BRIDGE depends on MEDIA_PCI_SUPPORT depends on PMIC_OPREGION depends on I2C select V4L2_FWNODE select IOSF_MBI - select IPU_BRIDGE select VIDEOBUF2_VMALLOC select VIDEO_V4L2_SUBDEV_API help -- cgit From 90d3c11af000a9ebeee7b0adb244a96174d8be4f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Sep 2023 09:21:19 +0200 Subject: media: pci: intel: ivsc: select V4L2_FWNODE Some missing select statements were already added back, but I ran into another one that is missing: ERROR: modpost: "v4l2_fwnode_endpoint_free" [drivers/media/pci/intel/ivsc/ivsc-csi.ko] undefined! ERROR: modpost: "v4l2_fwnode_endpoint_alloc_parse" [drivers/media/pci/intel/ivsc/ivsc-csi.ko] undefined! ERROR: modpost: "v4l2_fwnode_endpoint_parse" [drivers/media/pci/intel/ivsc/ivsc-csi.ko] undefined! Fixes: 29006e196a56 ("media: pci: intel: ivsc: Add CSI submodule") Signed-off-by: Arnd Bergmann [Sakari Ailus: Drop V4L2_ASYNC dependency, it is implied now.] Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ivsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 212753450576..a8cb981544f7 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -6,7 +6,7 @@ config INTEL_VSC depends on INTEL_MEI && ACPI && VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API - select V4L2_ASYNC + select V4L2_FWNODE help This adds support for Intel Visual Sensing Controller (IVSC). -- cgit From 6bd01c4299d4428738ab18f59334ce8b8207a531 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 26 Sep 2023 12:03:53 +0200 Subject: staging: media: tegra-video: fix infinite recursion regression Since commit 9bf19fbf0c8b ("media: v4l: async: Rework internal lists"), aka v6.6-rc1~97^2~198, probing the tegra-video VI driver causes infinite recursion due tegra_vi_graph_parse_one() calling itself until: [ 1.571168] Insufficient stack space to handle exception! ... [ 1.591416] Internal error: kernel stack overflow: 0 [#1] PREEMPT SMP ARM ... [ 3.861013] of_phandle_iterator_init from __of_parse_phandle_with_args+0x40/0xf0 [ 3.868497] __of_parse_phandle_with_args from of_fwnode_graph_get_remote_endpoint+0x68/0xa8 [ 3.876938] of_fwnode_graph_get_remote_endpoint from fwnode_graph_get_remote_port_parent+0x30/0x7c [ 3.885984] fwnode_graph_get_remote_port_parent from tegra_vi_graph_parse_one+0x7c/0x224 [ 3.894158] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 [ 3.901459] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 [ 3.908760] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 [ 3.916061] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 ... [ 4.857892] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 [ 4.865193] tegra_vi_graph_parse_one from tegra_vi_graph_parse_one+0x144/0x224 [ 4.872494] tegra_vi_graph_parse_one from tegra_vi_init+0x574/0x6d4 [ 4.878842] tegra_vi_init from host1x_device_init+0x84/0x15c [ 4.884594] host1x_device_init from host1x_video_probe+0xa0/0x114 [ 4.890770] host1x_video_probe from really_probe+0xe0/0x400 The reason is the mentioned commit changed tegra_vi_graph_find_entity() to search for an entity in the done notifier list: > @@ -1464,7 +1464,7 @@ tegra_vi_graph_find_entity(struct tegra_vi_channel *chan, > struct tegra_vi_graph_entity *entity; > struct v4l2_async_connection *asd; > > - list_for_each_entry(asd, &chan->notifier.asc_list, asc_entry) { > + list_for_each_entry(asd, &chan->notifier.done_list, asc_entry) { > entity = to_tegra_vi_graph_entity(asd); > if (entity->asd.match.fwnode == fwnode) > return entity; This is not always correct, being tegra_vi_graph_find_entity() called in three locations, in this order: 1. tegra_vi_graph_parse_one() -- called while probing 2. tegra_vi_graph_notify_bound() -- the .bound notifier op 3. tegra_vi_graph_build() -- called in the .complete notifier op Locations 1 and 2 are called before moving the entity from waiting_list to done_list, thus they won't find what they are looking for in done_list. Location 3 happens afterwards and thus it is not broken, however it means tegra_vi_graph_find_entity() should not search in the same list every time. The error appears at step 1: tegra_vi_graph_parse_one() iterates recursively until it finds the entity already notified, which now never happens. Fix by passing the specific notifier list pointer to tegra_vi_graph_find_entity() instead of the channel, so each caller can search in whatever list is correct. Also improve the tegra_vi_graph_find_entity() comment. Fixes: 9bf19fbf0c8b ("media: v4l: async: Rework internal lists") Cc: Thierry Reding Cc: Jonathan Hunter Cc: Sowjanya Komatineni Signed-off-by: Luca Ceresoli [Sakari Ailus: Wrapped some long lines.] Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/staging/media/tegra-video/vi.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c index e98b3010520e..94171e62dee9 100644 --- a/drivers/staging/media/tegra-video/vi.c +++ b/drivers/staging/media/tegra-video/vi.c @@ -1455,17 +1455,18 @@ static int __maybe_unused vi_runtime_suspend(struct device *dev) } /* - * Graph Management + * Find the entity matching a given fwnode in an v4l2_async_notifier list */ static struct tegra_vi_graph_entity * -tegra_vi_graph_find_entity(struct tegra_vi_channel *chan, +tegra_vi_graph_find_entity(struct list_head *list, const struct fwnode_handle *fwnode) { struct tegra_vi_graph_entity *entity; struct v4l2_async_connection *asd; - list_for_each_entry(asd, &chan->notifier.done_list, asc_entry) { + list_for_each_entry(asd, list, asc_entry) { entity = to_tegra_vi_graph_entity(asd); + if (entity->asd.match.fwnode == fwnode) return entity; } @@ -1532,7 +1533,8 @@ static int tegra_vi_graph_build(struct tegra_vi_channel *chan, } /* find the remote entity from notifier list */ - ent = tegra_vi_graph_find_entity(chan, link.remote_node); + ent = tegra_vi_graph_find_entity(&chan->notifier.done_list, + link.remote_node); if (!ent) { dev_err(vi->dev, "no entity found for %pOF\n", to_of_node(link.remote_node)); @@ -1664,7 +1666,8 @@ static int tegra_vi_graph_notify_bound(struct v4l2_async_notifier *notifier, * Locate the entity corresponding to the bound subdev and store the * subdev pointer. */ - entity = tegra_vi_graph_find_entity(chan, subdev->fwnode); + entity = tegra_vi_graph_find_entity(&chan->notifier.waiting_list, + subdev->fwnode); if (!entity) { dev_err(vi->dev, "no entity for subdev %s\n", subdev->name); return -EINVAL; @@ -1713,7 +1716,8 @@ static int tegra_vi_graph_parse_one(struct tegra_vi_channel *chan, /* skip entities that are already processed */ if (device_match_fwnode(vi->dev, remote) || - tegra_vi_graph_find_entity(chan, remote)) { + tegra_vi_graph_find_entity(&chan->notifier.waiting_list, + remote)) { fwnode_handle_put(remote); continue; } -- cgit From 8542f1712074f070ae90b64e6082d10d8e912e32 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 2 Oct 2023 13:04:45 +0300 Subject: ovl: fix file reference leak when submitting aio Commit 724768a39374 ("ovl: fix incorrect fdput() on aio completion") took a refcount on real file before submitting aio, but forgot to avoid clearing FDPUT_FPUT from real.flags stack variable. This can result in a file reference leak. Fixes: 724768a39374 ("ovl: fix incorrect fdput() on aio completion") Reported-by: Gil Lev Signed-off-by: Amir Goldstein --- fs/overlayfs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 693971d20280..8be4dc050d1e 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -341,7 +341,6 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (!aio_req) goto out; - real.flags = 0; aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); aio_req->iocb.ki_complete = ovl_aio_rw_complete; @@ -413,7 +412,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!aio_req) goto out; - real.flags = 0; aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); aio_req->iocb.ki_flags = ifl; -- cgit From e0fe97efdb00f0f32b038a4836406a82886aec9c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 27 Sep 2023 12:05:11 +0300 Subject: RDMA/cma: Initialize ib_sa_multicast structure to 0 when join Initialize the structure to 0 so that it's fields won't have random values. For example fields like rec.traffic_class (as well as rec.flow_label and rec.sl) is used to generate the user AH through: cma_iboe_join_multicast cma_make_mc_event ib_init_ah_from_mcmember And a random traffic_class causes a random IP DSCP in RoCEv2. Fixes: b5de0c60cc30 ("RDMA/cma: Fix use after free race in roce multicast join") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/20230927090511.603595-1-markzhang@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c343edf2f664..1e2cd7c8716e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4968,7 +4968,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; - struct ib_sa_multicast ib; + struct ib_sa_multicast ib = {}; enum ib_gid_type gid_type; bool send_only; -- cgit From 34f08eb0ba6e4869bbfb682bf3d7d0494ffd2f87 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 13 Jul 2023 04:14:29 -0400 Subject: usb: cdnsp: Fixes issue with dequeuing not queued requests Gadget ACM while unloading module try to dequeue not queued usb request which causes the kernel to crash. Patch adds extra condition to check whether usb request is processed by CDNSP driver. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/20230713081429.326660-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index fff9ec9c391f..4b67749edb99 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1125,6 +1125,9 @@ static int cdnsp_gadget_ep_dequeue(struct usb_ep *ep, unsigned long flags; int ret; + if (request->status != -EINPROGRESS) + return 0; + if (!pep->endpoint.desc) { dev_err(pdev->dev, "%s: can't dequeue to disabled endpoint\n", -- cgit From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 28 Sep 2023 20:20:47 +0300 Subject: RDMA/mlx5: Remove not-used cache disable flag During execution of mlx5_mkey_cache_cleanup(), there is a guarantee that MR are not registered and/or destroyed. It means that we don't need newly introduced cache disable flag. Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d1ff98aad162..16713baf0d06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -798,7 +798,6 @@ struct mlx5_mkey_cache { struct dentry *fs_root; unsigned long last_add; struct delayed_work remove_ent_dwork; - u8 disable: 1; }; struct mlx5_ib_port_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 433f96459246..8a3762d9ff58 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1026,7 +1026,6 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) return; mutex_lock(&dev->cache.rb_lock); - dev->cache.disable = true; for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); @@ -1830,10 +1829,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, } mutex_lock(&cache->rb_lock); - if (cache->disable) { - mutex_unlock(&cache->rb_lock); - return 0; - } ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); if (ent) { if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -- cgit From 6658a62e1ddf726483cb2d8bf45ea3f9bd533074 Mon Sep 17 00:00:00 2001 From: Xingxing Luo Date: Fri, 22 Sep 2023 15:59:29 +0800 Subject: usb: musb: Modify the "HWVers" register address musb HWVers rgister address is not 0x69, if we operate the wrong address 0x69, it will cause a kernel crash, because there is no register corresponding to this address in the additional control register of musb. In fact, HWVers has been defined in musb_register.h, and the name is "MUSB_HWVERS", so We need to use this macro instead of 0x69. Fixes: c2365ce5d5a0 ("usb: musb: replace hard coded registers with defines") Cc: stable@vger.kernel.org Signed-off-by: Xingxing Luo Link: https://lore.kernel.org/r/20230922075929.31074-1-xingxing.luo@unisoc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 78c726a71b17..2d623284edf6 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -39,7 +39,7 @@ static const struct musb_register_map musb_regmap[] = { { "IntrUsbE", MUSB_INTRUSBE, 8 }, { "DevCtl", MUSB_DEVCTL, 8 }, { "VControl", 0x68, 32 }, - { "HWVers", 0x69, 16 }, + { "HWVers", MUSB_HWVERS, 16 }, { "LinkInfo", MUSB_LINKINFO, 8 }, { "VPLen", MUSB_VPLEN, 8 }, { "HS_EOF1", MUSB_HS_EOF1, 8 }, -- cgit From 33d7e37232155aadebe4145dcc592f00dabd7a2b Mon Sep 17 00:00:00 2001 From: Xingxing Luo Date: Tue, 19 Sep 2023 11:30:55 +0800 Subject: usb: musb: Get the musb_qh poniter after musb_giveback When multiple threads are performing USB transmission, musb->lock will be unlocked when musb_giveback is executed. At this time, qh may be released in the dequeue process in other threads, resulting in a wild pointer, so it needs to be here get qh again, and judge whether qh is NULL, and when dequeue, you need to set qh to NULL. Fixes: dbac5d07d13e ("usb: musb: host: don't start next rx urb if current one failed") Cc: stable@vger.kernel.org Signed-off-by: Xingxing Luo Link: https://lore.kernel.org/r/20230919033055.14085-1-xingxing.luo@unisoc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index a02c29216955..bc4507781167 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -321,10 +321,16 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb, musb_giveback(musb, urb, status); qh->is_ready = ready; + /* + * musb->lock had been unlocked in musb_giveback, so qh may + * be freed, need to get it again + */ + qh = musb_ep_get_qh(hw_ep, is_in); + /* reclaim resources (and bandwidth) ASAP; deschedule it, and * invalidate qh as soon as list_empty(&hep->urb_list) */ - if (list_empty(&qh->hep->urb_list)) { + if (qh && list_empty(&qh->hep->urb_list)) { struct list_head *head; struct dma_controller *dma = musb->dma_controller; @@ -2398,6 +2404,7 @@ static int musb_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * and its URB list has emptied, recycle this qh. */ if (ready && list_empty(&qh->hep->urb_list)) { + musb_ep_set_qh(qh->hw_ep, is_in, NULL); qh->hep->hcpriv = NULL; list_del(&qh->ring); kfree(qh); -- cgit From 427694cfaafa565a3db5c5ea71df6bc095dca92f Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 27 Sep 2023 16:28:58 +0530 Subject: usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When NCM is used with hosts like Windows PC, it is observed that there are multiple NTB's contained in one usb request giveback. Since the driver unwraps the obtained request data assuming only one NTB is present, we loose the subsequent NTB's present resulting in data loss. Fix this by checking the parsed block length with the obtained data length in usb request and continue parsing after the last byte of current NTB. Cc: stable@vger.kernel.org Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20230927105858.12950-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index feccf4c8cc4f..e6ab8cc225ff 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1156,7 +1156,8 @@ static int ncm_unwrap_ntb(struct gether *port, struct sk_buff_head *list) { struct f_ncm *ncm = func_to_ncm(&port->func); - __le16 *tmp = (void *) skb->data; + unsigned char *ntb_ptr = skb->data; + __le16 *tmp; unsigned index, index2; int ndp_index; unsigned dg_len, dg_len2; @@ -1169,6 +1170,10 @@ static int ncm_unwrap_ntb(struct gether *port, const struct ndp_parser_opts *opts = ncm->parser_opts; unsigned crc_len = ncm->is_crc ? sizeof(uint32_t) : 0; int dgram_counter; + int to_process = skb->len; + +parse_ntb: + tmp = (__le16 *)ntb_ptr; /* dwSignature */ if (get_unaligned_le32(tmp) != opts->nth_sign) { @@ -1215,7 +1220,7 @@ static int ncm_unwrap_ntb(struct gether *port, * walk through NDP * dwSignature */ - tmp = (void *)(skb->data + ndp_index); + tmp = (__le16 *)(ntb_ptr + ndp_index); if (get_unaligned_le32(tmp) != ncm->ndp_sign) { INFO(port->func.config->cdev, "Wrong NDP SIGN\n"); goto err; @@ -1272,11 +1277,11 @@ static int ncm_unwrap_ntb(struct gether *port, if (ncm->is_crc) { uint32_t crc, crc2; - crc = get_unaligned_le32(skb->data + + crc = get_unaligned_le32(ntb_ptr + index + dg_len - crc_len); crc2 = ~crc32_le(~0, - skb->data + index, + ntb_ptr + index, dg_len - crc_len); if (crc != crc2) { INFO(port->func.config->cdev, @@ -1303,7 +1308,7 @@ static int ncm_unwrap_ntb(struct gether *port, dg_len - crc_len); if (skb2 == NULL) goto err; - skb_put_data(skb2, skb->data + index, + skb_put_data(skb2, ntb_ptr + index, dg_len - crc_len); skb_queue_tail(list, skb2); @@ -1316,10 +1321,17 @@ static int ncm_unwrap_ntb(struct gether *port, } while (ndp_len > 2 * (opts->dgram_item_len * 2)); } while (ndp_index); - dev_consume_skb_any(skb); - VDBG(port->func.config->cdev, "Parsed NTB with %d frames\n", dgram_counter); + + to_process -= block_len; + if (to_process != 0) { + ntb_ptr = (unsigned char *)(ntb_ptr + block_len); + goto parse_ntb; + } + + dev_consume_skb_any(skb); + return 0; err: skb_queue_purge(list); -- cgit From 76750f1dcad3e1af2295cdf2f9434e06e3178ef3 Mon Sep 17 00:00:00 2001 From: Hui Liu Date: Thu, 31 Aug 2023 18:19:45 +0800 Subject: usb: typec: qcom: Update the logic of regulator enable and disable Removed the call logic of disable and enable regulator in reset function. Enable the regulator in qcom_pmic_typec_start function and disable it in qcom_pmic_typec_stop function to avoid unbalanced regulator disable warnings. Fixes: a4422ff22142 ("usb: typec: qcom: Add Qualcomm PMIC Type-C driver") Cc: stable Reviewed-by: Bryan O'Donoghue Acked-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue # rb5 Signed-off-by: Hui Liu Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230831-qcom-tcpc-v5-1-5e2661dc6c1d@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c index bb0b8479d80f..52c81378e36e 100644 --- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c +++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c @@ -381,10 +381,6 @@ static int qcom_pmic_typec_pdphy_enable(struct pmic_typec_pdphy *pmic_typec_pdph struct device *dev = pmic_typec_pdphy->dev; int ret; - ret = regulator_enable(pmic_typec_pdphy->vdd_pdphy); - if (ret) - return ret; - /* PD 2.0, DR=TYPEC_DEVICE, PR=TYPEC_SINK */ ret = regmap_update_bits(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_MSG_CONFIG_REG, @@ -422,8 +418,6 @@ static int qcom_pmic_typec_pdphy_disable(struct pmic_typec_pdphy *pmic_typec_pdp ret = regmap_write(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_EN_CONTROL_REG, 0); - regulator_disable(pmic_typec_pdphy->vdd_pdphy); - return ret; } @@ -447,6 +441,10 @@ int qcom_pmic_typec_pdphy_start(struct pmic_typec_pdphy *pmic_typec_pdphy, int i; int ret; + ret = regulator_enable(pmic_typec_pdphy->vdd_pdphy); + if (ret) + return ret; + pmic_typec_pdphy->tcpm_port = tcpm_port; ret = pmic_typec_pdphy_reset(pmic_typec_pdphy); @@ -467,6 +465,8 @@ void qcom_pmic_typec_pdphy_stop(struct pmic_typec_pdphy *pmic_typec_pdphy) disable_irq(pmic_typec_pdphy->irq_data[i].irq); qcom_pmic_typec_pdphy_reset_on(pmic_typec_pdphy); + + regulator_disable(pmic_typec_pdphy->vdd_pdphy); } struct pmic_typec_pdphy *qcom_pmic_typec_pdphy_alloc(struct device *dev) -- cgit From f74a7afc224acd5e922c7a2e52244d891bbe44ee Mon Sep 17 00:00:00 2001 From: Ricardo Cañuelo Date: Wed, 30 Aug 2023 12:04:18 +0200 Subject: usb: hub: Guard against accesses to uninitialized BOS descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many functions in drivers/usb/core/hub.c and drivers/usb/core/hub.h access fields inside udev->bos without checking if it was allocated and initialized. If usb_get_bos_descriptor() fails for whatever reason, udev->bos will be NULL and those accesses will result in a crash: BUG: kernel NULL pointer dereference, address: 0000000000000018 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 5 PID: 17818 Comm: kworker/5:1 Tainted: G W 5.15.108-18910-gab0e1cb584e1 #1 Hardware name: Google Kindred/Kindred, BIOS Google_Kindred.12672.413.0 02/03/2021 Workqueue: usb_hub_wq hub_event RIP: 0010:hub_port_reset+0x193/0x788 Code: 89 f7 e8 20 f7 15 00 48 8b 43 08 80 b8 96 03 00 00 03 75 36 0f b7 88 92 03 00 00 81 f9 10 03 00 00 72 27 48 8b 80 a8 03 00 00 <48> 83 78 18 00 74 19 48 89 df 48 8b 75 b0 ba 02 00 00 00 4c 89 e9 RSP: 0018:ffffab740c53fcf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffa1bc5f678000 RCX: 0000000000000310 RDX: fffffffffffffdff RSI: 0000000000000286 RDI: ffffa1be9655b840 RBP: ffffab740c53fd70 R08: 00001b7d5edaa20c R09: ffffffffb005e060 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: ffffab740c53fd3e R14: 0000000000000032 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffa1be96540000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 000000022e80c005 CR4: 00000000003706e0 Call Trace: hub_event+0x73f/0x156e ? hub_activate+0x5b7/0x68f process_one_work+0x1a2/0x487 worker_thread+0x11a/0x288 kthread+0x13a/0x152 ? process_one_work+0x487/0x487 ? kthread_associate_blkcg+0x70/0x70 ret_from_fork+0x1f/0x30 Fall back to a default behavior if the BOS descriptor isn't accessible and skip all the functionalities that depend on it: LPM support checks, Super Speed capabilitiy checks, U1/U2 states setup. Signed-off-by: Ricardo Cañuelo Cc: stable Link: https://lore.kernel.org/r/20230830100418.1952143-1-ricardo.canuelo@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 25 ++++++++++++++++++++++--- drivers/usb/core/hub.h | 2 +- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3c54b218301c..0ff47eeffb49 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -151,6 +151,10 @@ int usb_device_supports_lpm(struct usb_device *udev) if (udev->quirks & USB_QUIRK_NO_LPM) return 0; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return 0; + /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ @@ -327,6 +331,10 @@ static void usb_set_lpm_parameters(struct usb_device *udev) if (!udev->lpm_capable || udev->speed < USB_SPEED_SUPER) return; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + hub = usb_hub_to_struct_hub(udev->parent); /* It doesn't take time to transition the roothub into U0, since it * doesn't have an upstream link. @@ -2704,13 +2712,17 @@ out_authorized: static enum usb_ssp_rate get_port_ssp_rate(struct usb_device *hdev, u32 ext_portstatus) { - struct usb_ssp_cap_descriptor *ssp_cap = hdev->bos->ssp_cap; + struct usb_ssp_cap_descriptor *ssp_cap; u32 attr; u8 speed_id; u8 ssac; u8 lanes; int i; + if (!hdev->bos) + goto out; + + ssp_cap = hdev->bos->ssp_cap; if (!ssp_cap) goto out; @@ -4215,8 +4227,15 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { int timeout; - __u8 u1_mel = udev->bos->ss_cap->bU1devExitLat; - __le16 u2_mel = udev->bos->ss_cap->bU2DevExitLat; + __u8 u1_mel; + __le16 u2_mel; + + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + + u1_mel = udev->bos->ss_cap->bU1devExitLat; + u2_mel = udev->bos->ss_cap->bU2DevExitLat; /* If the device says it doesn't have *any* exit latency to come out of * U1 or U2, it's probably lying. Assume it doesn't implement that link diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 37897afd1b64..d44dd7f6623e 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -153,7 +153,7 @@ static inline int hub_is_superspeedplus(struct usb_device *hdev) { return (hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS && le16_to_cpu(hdev->descriptor.bcdUSB) >= 0x0310 && - hdev->bos->ssp_cap); + hdev->bos && hdev->bos->ssp_cap); } static inline unsigned hub_power_on_good_delay(struct usb_hub *hub) -- cgit From 8bea147dfdf823eaa8d3baeccc7aeb041b41944b Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 13 Sep 2023 00:52:15 +0000 Subject: usb: dwc3: Soft reset phy on probe for host When there's phy initialization, we need to initiate a soft-reset sequence. That's done through USBCMD.HCRST in the xHCI driver and its initialization, However, the dwc3 driver may modify core configs before the soft-reset. This may result in some connection instability. So, ensure the phy is ready before the controller updates the GCTL.PRTCAPDIR or other settings by issuing phy soft-reset. Note that some host-mode configurations may not expose device registers to initiate the controller soft-reset (via DCTL.CoreSftRst). So we reset through GUSB3PIPECTL and GUSB2PHYCFG instead. Cc: stable@vger.kernel.org Fixes: e835c0a4e23c ("usb: dwc3: don't reset device side if dwc3 was configured as host-only") Reported-by: Kenta Sato Closes: https://lore.kernel.org/linux-usb/ZPUciRLUcjDywMVS@debian.me/ Signed-off-by: Thinh Nguyen Tested-by: Kenta Sato Link: https://lore.kernel.org/r/70aea513215d273669152696cc02b20ddcdb6f1a.1694564261.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9c6bf054f15d..343d2570189f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -279,9 +279,46 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) * XHCI driver will reset the host block. If dwc3 was configured for * host-only mode or current role is host, then we can return early. */ - if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) + if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) return 0; + /* + * If the dr_mode is host and the dwc->current_dr_role is not the + * corresponding DWC3_GCTL_PRTCAP_HOST, then the dwc3_core_init_mode + * isn't executed yet. Ensure the phy is ready before the controller + * updates the GCTL.PRTCAPDIR or other settings by soft-resetting + * the phy. + * + * Note: GUSB3PIPECTL[n] and GUSB2PHYCFG[n] are port settings where n + * is port index. If this is a multiport host, then we need to reset + * all active ports. + */ + if (dwc->dr_mode == USB_DR_MODE_HOST) { + u32 usb3_port; + u32 usb2_port; + + usb3_port = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + usb3_port |= DWC3_GUSB3PIPECTL_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), usb3_port); + + usb2_port = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + usb2_port |= DWC3_GUSB2PHYCFG_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), usb2_port); + + /* Small delay for phy reset assertion */ + usleep_range(1000, 2000); + + usb3_port &= ~DWC3_GUSB3PIPECTL_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), usb3_port); + + usb2_port &= ~DWC3_GUSB2PHYCFG_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), usb2_port); + + /* Wait for clock synchronization */ + msleep(50); + return 0; + } + reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg |= DWC3_DCTL_CSFTRST; reg &= ~DWC3_DCTL_RUN_STOP; -- cgit From 9f35d612da5592f1bf1cae44ec1e023df37bea12 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 26 Sep 2023 15:53:33 +0800 Subject: usb: cdns3: Modify the return value of cdns_set_active () to void when CONFIG_PM_SLEEP is disabled The return type of cdns_set_active () is inconsistent depending on whether CONFIG_PM_SLEEP is enabled, so the return value is modified to void type. Reported-by: Pavel Machek Closes: https://lore.kernel.org/all/ZP7lIKUzD68XA91j@duo.ucw.cz/ Fixes: 2319b9c87fe2 ("usb: cdns3: Put the cdns set active part outside the spin lock") Cc: stable@vger.kernel.org Signed-off-by: Xiaolei Wang Reviewed-by: Pavel Machek Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20230926075333.1791011-1-xiaolei.wang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 4a4dbc2c1561..81a9c9d6be08 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -131,8 +131,7 @@ void cdns_set_active(struct cdns *cdns, u8 set_active); #else /* CONFIG_PM_SLEEP */ static inline int cdns_resume(struct cdns *cdns) { return 0; } -static inline int cdns_set_active(struct cdns *cdns, u8 set_active) -{ return 0; } +static inline void cdns_set_active(struct cdns *cdns, u8 set_active) { } static inline int cdns_suspend(struct cdns *cdns) { return 0; } #endif /* CONFIG_PM_SLEEP */ -- cgit From 3061b6491f491197a35e14e49f805d661b02acd4 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Fri, 29 Sep 2023 17:45:14 +0530 Subject: usb: gadget: udc-xilinx: replace memcpy with memcpy_toio For ARM processor, unaligned access to device memory is not allowed. Method memcpy does not take care of alignment. USB detection failure with the unalingned address of memory, with below kernel crash. To fix the unalingned address kernel panic, replace memcpy with memcpy_toio method. Kernel crash: Unable to handle kernel paging request at virtual address ffff80000c05008a Mem abort info: ESR = 0x96000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061 CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=000000000143b000 [ffff80000c05008a] pgd=100000087ffff003, p4d=100000087ffff003, pud=100000087fffe003, pmd=1000000800bcc003, pte=00680000a0010713 Internal error: Oops: 96000061 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.19-xilinx-v2022.1 #1 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __memcpy+0x30/0x260 lr : __xudc_ep0_queue+0xf0/0x110 sp : ffff800008003d00 x29: ffff800008003d00 x28: ffff800009474e80 x27: 00000000000000a0 x26: 0000000000000100 x25: 0000000000000012 x24: ffff000800bc8080 x23: 0000000000000001 x22: 0000000000000012 x21: ffff000800bc8080 x20: 0000000000000012 x19: ffff000800bc8080 x18: 0000000000000000 x17: ffff800876482000 x16: ffff800008004000 x15: 0000000000004000 x14: 00001f09785d0400 x13: 0103020101005567 x12: 0781400000000200 x11: 00000000c5672a10 x10: 00000000000008d0 x9 : ffff800009463cf0 x8 : ffff8000094757b0 x7 : 0201010055670781 x6 : 4000000002000112 x5 : ffff80000c05009a x4 : ffff000800a15012 x3 : ffff00080362ad80 x2 : 0000000000000012 x1 : ffff000800a15000 x0 : ffff80000c050088 Call trace: __memcpy+0x30/0x260 xudc_ep0_queue+0x3c/0x60 usb_ep_queue+0x38/0x44 composite_ep0_queue.constprop.0+0x2c/0xc0 composite_setup+0x8d0/0x185c configfs_composite_setup+0x74/0xb0 xudc_irq+0x570/0xa40 __handle_irq_event_percpu+0x58/0x170 handle_irq_event+0x60/0x120 handle_fasteoi_irq+0xc0/0x220 handle_domain_irq+0x60/0x90 gic_handle_irq+0x74/0xa0 call_on_irq_stack+0x2c/0x60 do_interrupt_handler+0x54/0x60 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x78/0x7c arch_cpu_idle+0x18/0x2c do_idle+0xdc/0x15c cpu_startup_entry+0x28/0x60 rest_init+0xc8/0xe0 arch_call_rest_init+0x10/0x1c start_kernel+0x694/0x6d4 __primary_switched+0xa4/0xac Fixes: 1f7c51660034 ("usb: gadget: Add xilinx usb2 device support") Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202209020044.CX2PfZzM-lkp@intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Piyush Mehta Link: https://lore.kernel.org/r/20230929121514.13475-1-piyush.mehta@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/udc-xilinx.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 56b8286a8009..74590f93ea61 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -497,11 +497,13 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, /* Get the Buffer address and copy the transmit data.*/ eprambase = (u32 __force *)(udc->addr + ep->rambase); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio((void __iomem *)eprambase, bufferptr, + bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF0COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio((void __iomem *)bufferptr, eprambase, + bytestosend); } /* * Enable the buffer for transmission. @@ -515,11 +517,13 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, eprambase = (u32 __force *)(udc->addr + ep->rambase + ep->ep_usb.maxpacket); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio((void __iomem *)eprambase, bufferptr, + bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF1COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio((void __iomem *)bufferptr, eprambase, + bytestosend); } /* * Enable the buffer for transmission. @@ -1021,7 +1025,7 @@ static int __xudc_ep0_queue(struct xusb_ep *ep0, struct xusb_req *req) udc->addr); length = req->usb_req.actual = min_t(u32, length, EP0_MAX_PACKET); - memcpy(corebuf, req->usb_req.buf, length); + memcpy_toio((void __iomem *)corebuf, req->usb_req.buf, length); udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, length); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); } else { @@ -1752,7 +1756,7 @@ static void xudc_handle_setup(struct xusb_udc *udc) /* Load up the chapter 9 command buffer.*/ ep0rambase = (u32 __force *) (udc->addr + XUSB_SETUP_PKT_ADDR_OFFSET); - memcpy(&setup, ep0rambase, 8); + memcpy_toio((void __iomem *)&setup, ep0rambase, 8); udc->setup = setup; udc->setup.wValue = cpu_to_le16((u16 __force)setup.wValue); @@ -1839,7 +1843,7 @@ static void xudc_ep0_out(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + bytes_to_rx; - memcpy(buffer, ep0rambase, bytes_to_rx); + memcpy_toio((void __iomem *)buffer, ep0rambase, bytes_to_rx); if (req->usb_req.length == req->usb_req.actual) { /* Data transfer completed get ready for Status stage */ @@ -1915,7 +1919,7 @@ static void xudc_ep0_in(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + length; - memcpy(ep0rambase, buffer, length); + memcpy_toio((void __iomem *)ep0rambase, buffer, length); } udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, count); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); -- cgit From 6bc6f7d9d7ac3cdbe9e8b0495538b4a0cc11f032 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 28 Jul 2023 16:09:26 -0500 Subject: x86/sev: Use the GHCB protocol when available for SNP CPUID requests SNP retrieves the majority of CPUID information from the SNP CPUID page. But there are times when that information needs to be supplemented by the hypervisor, for example, obtaining the initial APIC ID of the vCPU from leaf 1. The current implementation uses the MSR protocol to retrieve the data from the hypervisor, even when a GHCB exists. The problem arises when an NMI arrives on return from the VMGEXIT. The NMI will be immediately serviced and may generate a #VC requiring communication with the hypervisor. Since a GHCB exists in this case, it will be used. As part of using the GHCB, the #VC handler will write the GHCB physical address into the GHCB MSR and the #VC will be handled. When the NMI completes, processing resumes at the site of the VMGEXIT which is expecting to read the GHCB MSR and find a CPUID MSR protocol response. Since the NMI handling overwrote the GHCB MSR response, the guest will see an invalid reply from the hypervisor and self-terminate. Fix this problem by using the GHCB when it is available. Any NMI received is properly handled because the GHCB contents are copied into a backup page and restored on NMI exit, thus preserving the active GHCB request or result. [ bp: Touchups. ] Fixes: ee0bfa08a345 ("x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/a5856fa1ebe3879de91a8f6298b6bbd901c61881.1690578565.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 69 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb..dcf325b7b022 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -256,7 +256,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) return 0; } -static int sev_cpuid_hv(struct cpuid_leaf *leaf) +static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) { int ret; @@ -279,6 +279,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf) return ret; } +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + u32 cr4 = native_read_cr4(); + int ret; + + ghcb_set_rax(ghcb, leaf->fn); + ghcb_set_rcx(ghcb, leaf->subfn); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #UD - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + leaf->eax = ghcb->save.rax; + leaf->ebx = ghcb->save.rbx; + leaf->ecx = ghcb->save.rcx; + leaf->edx = ghcb->save.rdx; + + return ES_OK; +} + +static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) + : __sev_cpuid_hv_msr(leaf); +} + /* * This may be called early while still running on the initial identity * mapping. Use RIP-relative addressing to obtain the correct address @@ -388,19 +427,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) return false; } -static void snp_cpuid_hv(struct cpuid_leaf *leaf) +static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { - if (sev_cpuid_hv(leaf)) + if (sev_cpuid_hv(ghcb, ctxt, leaf)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); } -static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) +static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + struct cpuid_leaf *leaf) { struct cpuid_leaf leaf_hv = *leaf; switch (leaf->fn) { case 0x1: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* initial APIC ID */ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); @@ -419,7 +459,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) break; case 0xB: leaf_hv.subfn = 0; - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->edx = leaf_hv.edx; @@ -467,7 +507,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) } break; case 0x8000001E: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->eax = leaf_hv.eax; @@ -488,7 +528,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct cpuid_leaf *leaf) +static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -522,7 +562,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf) return 0; } - return snp_cpuid_postprocess(leaf); + return snp_cpuid_postprocess(ghcb, ctxt, leaf); } /* @@ -544,14 +584,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(NULL, NULL, &leaf); if (!ret) goto cpuid_done; if (ret != -EOPNOTSUPP) goto fail; - if (sev_cpuid_hv(&leaf)) + if (__sev_cpuid_hv_msr(&leaf)) goto fail; cpuid_done: @@ -848,14 +888,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } -static int vc_handle_cpuid_snp(struct pt_regs *regs) +static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { + struct pt_regs *regs = ctxt->regs; struct cpuid_leaf leaf; int ret; leaf.fn = regs->ax; leaf.subfn = regs->cx; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(ghcb, ctxt, &leaf); if (!ret) { regs->ax = leaf.eax; regs->bx = leaf.ebx; @@ -874,7 +915,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, enum es_result ret; int snp_cpuid_ret; - snp_cpuid_ret = vc_handle_cpuid_snp(regs); + snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); if (!snp_cpuid_ret) return ES_OK; if (snp_cpuid_ret != -EOPNOTSUPP) -- cgit From 62d5e970d022ef4bde18948dd67247c3194384c1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 22 Jun 2023 08:45:05 -0500 Subject: x86/sev: Change npages to unsigned long in snp_accept_memory() In snp_accept_memory(), the npages variables value is calculated from phys_addr_t variables but is an unsigned int. A very large range passed into snp_accept_memory() could lead to truncating npages to zero. This doesn't happen at the moment but let's be prepared. Fixes: 6c3211796326 ("x86/sev: Add SNP-specific unaccepted memory support") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/6d511c25576494f682063c9fb6c705b526a3757e.1687441505.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f60..d8c1e3be74c0 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -868,8 +868,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned long npages) void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - unsigned long vaddr; - unsigned int npages; + unsigned long vaddr, npages; if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; -- cgit From b84b53149476b22cc3b8677b771fb4cf06d1d455 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Fri, 29 Sep 2023 21:50:28 +0200 Subject: ASoC: hdmi-codec: Fix broken channel map reporting Commit 4e0871333661 ("ASoC: hdmi-codec: fix channel info for compressed formats") accidentally changed hcp->chmap_idx from ca_id, the CEA channel allocation ID, to idx, the index to the table of channel mappings ordered by preference. This resulted in wrong channel maps being reported to userspace, eg for 5.1 "FL,FR,LFE,FC" was reported instead of the expected "FL,FR,LFE,FC,RL,RR": ~ # speaker-test -c 6 -t sine ... 0 - Front Left 3 - Front Center 1 - Front Right 2 - LFE 4 - Unknown 5 - Unknown ~ # amixer cget iface=PCM,name='Playback Channel Map' | grep ': values' : values=3,4,8,7,0,0,0,0 Switch this back to ca_id in case of PCM audio so the correct channel map is reported again and set it to HDMI_CODEC_CHMAP_IDX_UNKNOWN in case of non-PCM audio so the PCM channel map control returns "Unknown" channels (value 0). Fixes: 4e0871333661 ("ASoC: hdmi-codec: fix channel info for compressed formats") Cc: stable@vger.kernel.org Signed-off-by: Matthias Reichl Link: https://lore.kernel.org/r/20230929195027.97136-1-hias@horus.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 13689e718d36..09eef6042aad 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -531,7 +531,10 @@ static int hdmi_codec_fill_codec_params(struct snd_soc_dai *dai, hp->sample_rate = sample_rate; hp->channels = channels; - hcp->chmap_idx = idx; + if (pcm_audio) + hcp->chmap_idx = ca_id; + else + hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; return 0; } -- cgit From 892fbdb203945d887ad2a109a3700b091a8e3b97 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Sat, 30 Sep 2023 17:55:50 +0800 Subject: ASoC: rt5682: Fix regulator enable/disable sequence This will attempt to disable the regulators if the initial enable fails which is a bug. Fix this bug by modifying the code to the correct sequence. Signed-off-by: Zhang Shurong Link: https://lore.kernel.org/r/tencent_4F37C9B5315B7960041E8E0ADDA869128F08@qq.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682-i2c.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index b05b4f73d8aa..fbad1ed06626 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -157,11 +157,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c) return ret; } - ret = devm_add_action_or_reset(&i2c->dev, rt5682_i2c_disable_regulators, - rt5682); - if (ret) - return ret; - ret = regulator_bulk_enable(ARRAY_SIZE(rt5682->supplies), rt5682->supplies); if (ret) { @@ -169,6 +164,11 @@ static int rt5682_i2c_probe(struct i2c_client *i2c) return ret; } + ret = devm_add_action_or_reset(&i2c->dev, rt5682_i2c_disable_regulators, + rt5682); + if (ret) + return ret; + ret = rt5682_get_ldo1(rt5682, &i2c->dev); if (ret) return ret; -- cgit From 2ec8b010979036c2fe79a64adb6ecc0bd11e91d1 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Fri, 22 Sep 2023 11:28:12 -0700 Subject: spi: npcm-fiu: Fix UMA reads when dummy.nbytes == 0 We don't want to use the value of ilog2(0) as dummy.buswidth is 0 when dummy.nbytes is 0. Since we have no dummy bytes, we don't need to configure the dummy byte bits per clock register value anyway. Signed-off-by: "William A. Kennington III" Link: https://lore.kernel.org/r/20230922182812.2728066-1-william@wkennington.com Signed-off-by: Mark Brown --- drivers/spi/spi-npcm-fiu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c index 0ca21ff0e9cc..e42248519688 100644 --- a/drivers/spi/spi-npcm-fiu.c +++ b/drivers/spi/spi-npcm-fiu.c @@ -353,8 +353,9 @@ static int npcm_fiu_uma_read(struct spi_mem *mem, uma_cfg |= ilog2(op->cmd.buswidth); uma_cfg |= ilog2(op->addr.buswidth) << NPCM_FIU_UMA_CFG_ADBPCK_SHIFT; - uma_cfg |= ilog2(op->dummy.buswidth) - << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; + if (op->dummy.nbytes) + uma_cfg |= ilog2(op->dummy.buswidth) + << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; uma_cfg |= ilog2(op->data.buswidth) << NPCM_FIU_UMA_CFG_RDBPCK_SHIFT; uma_cfg |= op->dummy.nbytes << NPCM_FIU_UMA_CFG_DBSIZ_SHIFT; -- cgit From e59e38158c61162f2e8beb4620df21a1585117df Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 11 Sep 2023 10:22:38 +0200 Subject: usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub The USB2412 is a 2-Port USB 2.0 hub controller that provides a reset pin and a single 3v3 powre source, which makes it suitable to be controlled by the onboard_hub driver. This hub has the same reset timings as USB2514/2517 and the same onboard hub specific-data can be reused for USB2412. Signed-off-by: Javier Carrasco Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230911-topic-2412_onboard_hub-v1-1-7704181ddfff@wolfvision.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 1 + drivers/usb/misc/onboard_usb_hub.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 3da1a4659c5f..57bbe1309094 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -434,6 +434,7 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 4026ba64c592..2a4ab5ac0ebe 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -47,6 +47,7 @@ static const struct onboard_hub_pdata vialab_vl817_data = { }; static const struct of_device_id onboard_hub_match[] = { + { .compatible = "usb424,2412", .data = µchip_usb424_data, }, { .compatible = "usb424,2514", .data = µchip_usb424_data, }, { .compatible = "usb424,2517", .data = µchip_usb424_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, -- cgit From a00e197daec52bcd955e118f5f57d706da5bfe50 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 11 Sep 2023 14:34:15 +0530 Subject: usb: typec: ucsi: Clear EVENT_PENDING bit if ucsi_send_command fails Currently if ucsi_send_command() fails, then we bail out without clearing EVENT_PENDING flag. So when the next connector change event comes, ucsi_connector_change() won't queue the con->work, because of which none of the new events will be processed. Fix this by clearing EVENT_PENDING flag if ucsi_send_command() fails. Cc: stable@vger.kernel.org # 5.16 Fixes: 512df95b9432 ("usb: typec: ucsi: Better fix for missing unplug events issue") Signed-off-by: Prashanth K Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/1694423055-8440-1-git-send-email-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index c6dfe3dff346..509c67c94a70 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -884,6 +884,7 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (ret < 0) { dev_err(ucsi->dev, "%s: GET_CONNECTOR_STATUS failed (%d)\n", __func__, ret); + clear_bit(EVENT_PENDING, &con->ucsi->flags); goto out_unlock; } -- cgit From 41a43013d2366db5b88b42bbcd8e8f040b6ccf21 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Fri, 15 Sep 2023 17:31:05 +0300 Subject: usb: xhci: xhci-ring: Use sysdev for mapping bounce buffer As mentioned in: commit 474ed23a6257 ("xhci: align the last trb before link if it is easily splittable.") A bounce buffer is utilized for ensuring that transfers that span across ring segments are aligned to the EP's max packet size. However, the device that is used to map the DMA buffer to is currently using the XHCI HCD, which does not carry any DMA operations in certain configrations. Migration to using the sysdev entry was introduced for DWC3 based implementations where the IOMMU operations are present. Replace the reference to the controller device to sysdev instead. This allows the bounce buffer to be properly mapped to any implementations that have an IOMMU involved. cc: stable@vger.kernel.org Fixes: 4c39d4b949d3 ("usb: xhci: use bus->sysdev for DMA configuration") Signed-off-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1dde53f6eb31..98389b568633 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -798,7 +798,7 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, struct xhci_ring *ring, struct xhci_td *td) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_segment *seg = td->bounce_seg; struct urb *urb = td->urb; size_t len; @@ -3469,7 +3469,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, u32 *trb_buff_len, struct xhci_segment *seg) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; unsigned int unalign; unsigned int max_pkt; u32 new_buff_len; -- cgit From d7cdfc319b2bcf6899ab0a05eec0958bc802a9a1 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 15 Sep 2023 17:31:06 +0300 Subject: xhci: track port suspend state correctly in unsuccessful resume cases xhci-hub.c tracks suspended ports in a suspended_port bitfield. This is checked when responding to a Get_Status(PORT) request to see if a port in running U0 state was recently resumed, and adds the required USB_PORT_STAT_C_SUSPEND change bit in those cases. The suspended_port bit was left uncleared if a device is disconnected during suspend. The bit remained set even when a new device was connected and enumerated. The set bit resulted in a incorrect Get_Status(PORT) response with a bogus USB_PORT_STAT_C_SUSPEND change bit set once the new device reached U0 link state. USB_PORT_STAT_C_SUSPEND change bit is only used for USB2 ports, but xhci-hub keeps track of both USB2 and USB3 suspended ports. Cc: stable@vger.kernel.org Reported-by: Wesley Cheng Closes: https://lore.kernel.org/linux-usb/d68aa806-b26a-0e43-42fb-b8067325e967@quicinc.com/ Fixes: 1d5810b6923c ("xhci: Rework port suspend structures for limited ports.") Tested-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0054d02239e2..0df5d807a77e 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1062,19 +1062,19 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status, *status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; /* USB3 specific wPortStatus bits */ - if (portsc & PORT_POWER) { + if (portsc & PORT_POWER) *status |= USB_SS_PORT_STAT_POWER; - /* link state handling */ - if (link_state == XDEV_U0) - bus_state->suspended_ports &= ~(1 << portnum); - } - /* remote wake resume signaling complete */ - if (bus_state->port_remote_wakeup & (1 << portnum) && + /* no longer suspended or resuming */ + if (link_state != XDEV_U3 && link_state != XDEV_RESUME && link_state != XDEV_RECOVERY) { - bus_state->port_remote_wakeup &= ~(1 << portnum); - usb_hcd_end_port_resume(&hcd->self, portnum); + /* remote wake resume signaling complete */ + if (bus_state->port_remote_wakeup & (1 << portnum)) { + bus_state->port_remote_wakeup &= ~(1 << portnum); + usb_hcd_end_port_resume(&hcd->self, portnum); + } + bus_state->suspended_ports &= ~(1 << portnum); } xhci_hub_report_usb3_link_state(xhci, status, portsc); @@ -1131,6 +1131,7 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, usb_hcd_end_port_resume(&port->rhub->hcd->self, portnum); } port->rexit_active = 0; + bus_state->suspended_ports &= ~(1 << portnum); } } -- cgit From 15f3ef070933817fac2bcbdb9c85bff9e54e9f80 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Sep 2023 17:31:07 +0300 Subject: xhci: Clear EHB bit only at end of interrupt handler The Event Handler Busy bit shall be cleared by software when the Event Ring is empty. The xHC is thereby informed that it may raise another interrupt once it has enqueued new events (sec 4.17.2). However since commit dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose"), the EHB bit is already cleared after half a segment has been processed. As a result, spurious interrupts may occur: - xhci_irq() processes half a segment, clears EHB, continues processing remaining events. - xHC enqueues new events. Because EHB has been cleared, xHC sets Interrupt Pending bit. Interrupt moderation countdown begins. - Meanwhile xhci_irq() continues processing events. Interrupt moderation countdown reaches zero, so an MSI interrupt is signaled. - xhci_irq() empties the Event Ring, clears EHB again and is done. - Because an MSI interrupt has been signaled, xhci_irq() is run again. It discovers there's nothing to do and returns IRQ_NONE. Avoid by clearing the EHB bit only at the end of xhci_irq(). Fixes: dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v5.5+ Cc: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 98389b568633..3e5dc0723a8f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2996,7 +2996,8 @@ static int xhci_handle_event(struct xhci_hcd *xhci, struct xhci_interrupter *ir) */ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, struct xhci_interrupter *ir, - union xhci_trb *event_ring_deq) + union xhci_trb *event_ring_deq, + bool clear_ehb) { u64 temp_64; dma_addr_t deq; @@ -3017,12 +3018,13 @@ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, return; /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_PTR_MASK; + temp_64 &= ERST_DESI_MASK; temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); } /* Clear the event handler busy flag (RW1C) */ - temp_64 |= ERST_EHB; + if (clear_ehb) + temp_64 |= ERST_EHB; xhci_write_64(xhci, temp_64, &ir->ir_set->erst_dequeue); } @@ -3103,7 +3105,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) while (xhci_handle_event(xhci, ir) > 0) { if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; - xhci_update_erst_dequeue(xhci, ir, event_ring_deq); + xhci_update_erst_dequeue(xhci, ir, event_ring_deq, false); event_ring_deq = ir->event_ring->dequeue; /* ring is half-full, force isoc trbs to interrupt more often */ @@ -3113,7 +3115,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) event_loop = 0; } - xhci_update_erst_dequeue(xhci, ir, event_ring_deq); + xhci_update_erst_dequeue(xhci, ir, event_ring_deq, true); ret = IRQ_HANDLED; out: -- cgit From cf97c5e0f7dda2edc15ecd96775fe6c355823784 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Sep 2023 17:31:08 +0300 Subject: xhci: Preserve RsvdP bits in ERSTBA register correctly xhci_add_interrupter() erroneously preserves only the lowest 4 bits when writing the ERSTBA register, not the lowest 6 bits. Fix it. Migrate the ERST_BASE_RSVDP macro to the modern GENMASK_ULL() syntax to avoid a u64 cast. This was previously fixed by commit 8c1cbec9db1a ("xhci: fix event ring segment table related masks and variables in header"), but immediately undone by commit b17a57f89f69 ("xhci: Refactor interrupter code for initial multi interrupter support."). Fixes: b17a57f89f69 ("xhci: Refactor interrupter code for initial multi interrupter support.") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.3+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++-- drivers/usb/host/xhci.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 8714ab5bf04d..0a37f0d511cf 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2285,8 +2285,8 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, writel(erst_size, &ir->ir_set->erst_size); erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); - erst_base &= ERST_PTR_MASK; - erst_base |= (ir->erst.erst_dma_addr & (u64) ~ERST_PTR_MASK); + erst_base &= ERST_BASE_RSVDP; + erst_base |= ir->erst.erst_dma_addr & ~ERST_BASE_RSVDP; xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); /* Set the event ring dequeue address of this interrupter */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 7e282b4522c0..5df370482521 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -514,7 +514,7 @@ struct xhci_intr_reg { #define ERST_SIZE_MASK (0xffff << 16) /* erst_base bitmasks */ -#define ERST_BASE_RSVDP (0x3f) +#define ERST_BASE_RSVDP (GENMASK_ULL(5, 0)) /* erst_dequeue bitmasks */ /* Dequeue ERST Segment Index (DESI) - Segment number (or alias) -- cgit From d9e8319a6e3538b430f692b5625a76ffa0758adc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Oct 2023 03:36:13 +0100 Subject: ovl: move freeing ovl_entry past rcu delay ... into ->free_inode(), that is. Fixes: 0af950f57fef "ovl: move ovl_entry into ovl_inode" Signed-off-by: Al Viro Signed-off-by: Amir Goldstein --- fs/overlayfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index def266b5e2a3..f09184b865ec 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -167,6 +167,7 @@ static void ovl_free_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); kfree(oi->redirect); + kfree(oi->oe); mutex_destroy(&oi->lock); kmem_cache_free(ovl_inode_cachep, oi); } @@ -176,7 +177,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); - ovl_free_entry(oi->oe); + ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe)); if (S_ISDIR(inode->i_mode)) ovl_dir_cache_free(inode); else -- cgit From c54719c92aa3129f330cce81b88cf34f1627f756 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Oct 2023 03:36:43 +0100 Subject: ovl: fetch inode once in ovl_dentry_revalidate_common() d_inode_rcu() is right - we might be in rcu pathwalk; however, OVL_E() hides plain d_inode() on the same dentry... Fixes: a6ff2bc0be17 ("ovl: use OVL_E() and OVL_E_FLAGS() accessors") Signed-off-by: Al Viro Signed-off-by: Amir Goldstein --- fs/overlayfs/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f09184b865ec..905d3aaf4e55 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -104,8 +104,8 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) static int ovl_dentry_revalidate_common(struct dentry *dentry, unsigned int flags, bool weak) { - struct ovl_entry *oe = OVL_E(dentry); - struct ovl_path *lowerstack = ovl_lowerstack(oe); + struct ovl_entry *oe; + struct ovl_path *lowerstack; struct inode *inode = d_inode_rcu(dentry); struct dentry *upper; unsigned int i; @@ -115,6 +115,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, if (!inode) return -ECHILD; + oe = OVL_I_E(inode); + lowerstack = ovl_lowerstack(oe); upper = ovl_i_dentry_upper(inode); if (upper) ret = ovl_revalidate_real(upper, flags, weak); -- cgit From a535116d80339dbfe50b9b81b2f808c69eefbbc3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 2 Oct 2023 14:21:49 +0300 Subject: ovl: make use of ->layers safe in rcu pathwalk ovl_permission() accesses ->layers[...].mnt; we can't have ->layers freed without an RCU delay on fs shutdown. Fortunately, kern_unmount_array() that is used to drop those mounts does include an RCU delay, so freeing is delayed; unfortunately, the array passed to kern_unmount_array() is formed by mangling ->layers contents and that happens without any delays. The ->layers[...].name string entries are used to store the strings to display in "lowerdir=..." by ovl_show_options(). Those entries are not accessed in RCU walk. Move the name strings into a separate array ofs->config.lowerdirs and reuse the ofs->config.lowerdirs array as the temporary mount array to pass to kern_unmount_array(). Reported-by: Al Viro Link: https://lore.kernel.org/r/20231002023711.GP3389589@ZenIV/ Acked-by: Miklos Szeredi Signed-off-by: Amir Goldstein --- fs/overlayfs/ovl_entry.h | 10 +--------- fs/overlayfs/params.c | 17 +++++++++-------- fs/overlayfs/super.c | 18 +++++++++++------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index e9539f98e86a..d82d2a043da2 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -8,6 +8,7 @@ struct ovl_config { char *upperdir; char *workdir; + char **lowerdirs; bool default_permissions; int redirect_mode; int verity_mode; @@ -39,17 +40,8 @@ struct ovl_layer { int idx; /* One fsid per unique underlying sb (upper fsid == 0) */ int fsid; - char *name; }; -/* - * ovl_free_fs() relies on @mnt being the first member when unmounting - * the private mounts created for each layer. Let's check both the - * offset and type. - */ -static_assert(offsetof(struct ovl_layer, mnt) == 0); -static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *)); - struct ovl_path { const struct ovl_layer *layer; struct dentry *dentry; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index b9355bb6d75a..95b751507ac8 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -752,12 +752,12 @@ void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); - /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ - mounts = (struct vfsmount **) ofs->layers; + /* Reuse ofs->config.lowerdirs as a vfsmount array before freeing it */ + mounts = (struct vfsmount **) ofs->config.lowerdirs; for (i = 0; i < ofs->numlayer; i++) { iput(ofs->layers[i].trap); + kfree(ofs->config.lowerdirs[i]); mounts[i] = ofs->layers[i].mnt; - kfree(ofs->layers[i].name); } kern_unmount_array(mounts, ofs->numlayer); kfree(ofs->layers); @@ -765,6 +765,7 @@ void ovl_free_fs(struct ovl_fs *ofs) free_anon_bdev(ofs->fs[i].pseudo_dev); kfree(ofs->fs); + kfree(ofs->config.lowerdirs); kfree(ofs->config.upperdir); kfree(ofs->config.workdir); if (ofs->creator_cred) @@ -949,16 +950,16 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; - const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower]; + char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower]; - /* ofs->layers[0] is the upper layer */ - seq_printf(m, ",lowerdir=%s", ofs->layers[1].name); + /* lowerdirs[] starts from offset 1 */ + seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]); /* dump regular lower layers */ for (nr = 2; nr < nr_merged_lower; nr++) - seq_printf(m, ":%s", ofs->layers[nr].name); + seq_printf(m, ":%s", ofs->config.lowerdirs[nr]); /* dump data lower layers */ for (nr = 0; nr < ofs->numdatalayer; nr++) - seq_printf(m, "::%s", data_layers[nr].name); + seq_printf(m, "::%s", lowerdatadirs[nr]); if (ofs->config.upperdir) { seq_show_option(m, "upperdir", ofs->config.upperdir); seq_show_option(m, "workdir", ofs->config.workdir); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 905d3aaf4e55..3fa2416264a4 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -572,11 +572,6 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, upper_layer->idx = 0; upper_layer->fsid = 0; - err = -ENOMEM; - upper_layer->name = kstrdup(ofs->config.upperdir, GFP_KERNEL); - if (!upper_layer->name) - goto out; - /* * Inherit SB_NOSEC flag from upperdir. * @@ -1125,7 +1120,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, layers[ofs->numlayer].idx = ofs->numlayer; layers[ofs->numlayer].fsid = fsid; layers[ofs->numlayer].fs = &ofs->fs[fsid]; - layers[ofs->numlayer].name = l->name; + /* Store for printing lowerdir=... in ovl_show_options() */ + ofs->config.lowerdirs[ofs->numlayer] = l->name; l->name = NULL; ofs->numlayer++; ofs->fs[fsid].is_lower = true; @@ -1370,8 +1366,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) if (!layers) goto out_err; + ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL); + if (!ofs->config.lowerdirs) { + kfree(layers); + goto out_err; + } ofs->layers = layers; - /* Layer 0 is reserved for upper even if there's no upper */ + /* + * Layer 0 is reserved for upper even if there's no upper. + * For consistency, config.lowerdirs[0] is NULL. + */ ofs->numlayer = 1; sb->s_stack_depth = 0; -- cgit From 57db57ae15a97c8a67993a799e17d73a9945921f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 30 Sep 2023 10:01:02 -0300 Subject: dt-bindings: display: fsl,imx6-hdmi: Change to 'unevaluatedProperties: false' fsl,imx6-hdmi.yaml makes a reference to synopsys,dw-hdmi.yaml. The 'interrupts' and 'reg' properties are described in synopsys,dw-hdmi.yaml, so use 'unevaluatedProperties: false' so that these two properties can be accepted. This fixes the following schema warnings: hdmi@120000: 'interrupts', 'reg' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/display/imx/fsl,imx6-hdmi.yaml# Signed-off-by: Fabio Estevam Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230930130102.798822-1-festevam@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml index af7fe9c4d196..7979cf07f119 100644 --- a/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml @@ -87,7 +87,7 @@ required: - interrupts - ports -additionalProperties: false +unevaluatedProperties: false examples: - | -- cgit From e930bea4124b8a4a47ba4092d99da30099b9242d Mon Sep 17 00:00:00 2001 From: Antoine Gennart Date: Fri, 29 Sep 2023 15:01:17 +0200 Subject: ASoC: tlv320adc3xxx: BUG: Correct micbias setting The micbias setting for tlv320adc can also have the value '3' which means that the micbias ouput pin is connected to the input pin AVDD. Signed-off-by: Antoine Gennart Link: https://lore.kernel.org/r/20230929130117.77661-1-gennartan@disroot.org Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320adc3xxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index b976c1946286..420bbf588efe 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -293,7 +293,7 @@ #define ADC3XXX_BYPASS_RPGA 0x80 /* MICBIAS control bits */ -#define ADC3XXX_MICBIAS_MASK 0x2 +#define ADC3XXX_MICBIAS_MASK 0x3 #define ADC3XXX_MICBIAS1_SHIFT 5 #define ADC3XXX_MICBIAS2_SHIFT 3 @@ -1099,7 +1099,7 @@ static int adc3xxx_parse_dt_micbias(struct adc3xxx *adc3xxx, unsigned int val; if (!of_property_read_u32(np, propname, &val)) { - if (val >= ADC3XXX_MICBIAS_AVDD) { + if (val > ADC3XXX_MICBIAS_AVDD) { dev_err(dev, "Invalid property value for '%s'\n", propname); return -EINVAL; } -- cgit From 1948fa64727685ac3f6584755212e2e738b6b051 Mon Sep 17 00:00:00 2001 From: Sven Frotscher Date: Thu, 28 Sep 2023 00:36:07 +0200 Subject: ASoC: amd: yc: Fix non-functional mic on Lenovo 82YM Like the Lenovo 82TL, 82V2, 82QF and 82UG, the 82YM (Yoga 7 14ARP8) requires an entry in the quirk list to enable the internal microphone. The latter two received similar fixes in commit 1263cc0f414d ("ASoC: amd: yc: Fix non-functional mic on Lenovo 82QF and 82UG"). Fixes: c008323fe361 ("ASoC: amd: yc: Fix a non-functional mic on Lenovo 82SJ") Cc: stable@vger.kernel.org Signed-off-by: Sven Frotscher Link: https://lore.kernel.org/r/20230927223758.18870-1-sven.frotscher@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 94e9eb8e73f2..15a864dcd7bd 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -241,6 +241,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82V2"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit From 5d007ffdf6025fe83e497c44ed7c8aa8f150c4d1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 11:35:39 +0200 Subject: of: overlay: Reorder struct fragment fields kerneldoc The fields of the fragment structure were reordered, but the kerneldoc was not updated. Fixes: 81225ea682f45629 ("of: overlay: reorder fields in struct fragment") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/cfa36d2bb95e3c399c415dbf58057302c70ef375.1695893695.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- drivers/of/overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index dfb6fb962fc7..a9a292d6d59b 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; -- cgit From c7242a45cb8cad5b6cd840fd4661315b45b1e841 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 3 Oct 2023 09:21:27 +0300 Subject: ovl: fix NULL pointer defer when encoding non-decodable lower fid A wrong return value from ovl_check_encode_origin() would cause ovl_dentry_to_fid() to try to encode fid from NULL upper dentry. Reported-by: syzbot+2208f82282740c1c8915@syzkaller.appspotmail.com Fixes: 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles") Signed-off-by: Amir Goldstein --- fs/overlayfs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index c8c8588bd98c..26b782c53910 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -188,7 +188,7 @@ static int ovl_check_encode_origin(struct dentry *dentry) /* Lower file handle for non-upper non-decodable */ if (!ovl_dentry_upper(dentry) && !decodable) - return 0; + return 1; /* Upper file handle for pure upper */ if (!ovl_dentry_lower(dentry)) -- cgit From 4b2b606075e50cdae62ab2356b0a1e206947c354 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 22 Sep 2023 15:55:08 +0800 Subject: ipv4/fib: send notify when delete source address routes After deleting an interface address in fib_del_ifaddr(), the function scans the fib_info list for stray entries and calls fib_flush() and fib_table_flush(). Then the stray entries will be deleted silently and no RTM_DELROUTE notification will be sent. This lack of notification can make routing daemons, or monitor like `ip monitor route` miss the routing changes. e.g. + ip link add dummy1 type dummy + ip link add dummy2 type dummy + ip link set dummy1 up + ip link set dummy2 up + ip addr add 192.168.5.5/24 dev dummy1 + ip route add 7.7.7.0/24 dev dummy2 src 192.168.5.5 + ip -4 route 7.7.7.0/24 dev dummy2 scope link src 192.168.5.5 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 + ip monitor route + ip addr del 192.168.5.5/24 dev dummy1 Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5 Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5 As Ido reminded, fib_table_flush() isn't only called when an address is deleted, but also when an interface is deleted or put down. The lack of notification in these cases is deliberate. And commit 7c6bb7d2faaf ("net/ipv6: Add knob to skip DELROUTE message on device down") introduced a sysctl to make IPv6 behave like IPv4 in this regard. So we can't send the route delete notify blindly in fib_table_flush(). To fix this issue, let's add a new flag in "struct fib_info" to track the deleted prefer source address routes, and only send notify for them. After update: + ip monitor route + ip addr del 192.168.5.5/24 dev dummy1 Deleted 192.168.5.0/24 dev dummy1 proto kernel scope link src 192.168.5.5 Deleted broadcast 192.168.5.255 dev dummy1 table local proto kernel scope link src 192.168.5.5 Deleted local 192.168.5.5 dev dummy1 table local proto kernel scope host src 192.168.5.5 Deleted 7.7.7.0/24 dev dummy2 scope link src 192.168.5.5 Suggested-by: Thomas Haller Signed-off-by: Hangbin Liu Acked-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230922075508.848925-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 1 + net/ipv4/fib_trie.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f0c13864180e..15de07d36540 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -154,6 +154,7 @@ struct fib_info { int fib_nhs; bool fib_nh_is_v6; bool nh_updated; + bool pfsrc_removed; struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[]; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index eafa4a033515..1ea82bc33ef1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1887,6 +1887,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; + fi->pfsrc_removed = true; ret++; } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d13fb9e76b97..9bdfdab906fe 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2027,6 +2027,7 @@ void fib_table_flush_external(struct fib_table *tb) int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; + struct nl_info info = { .nl_net = net }; struct key_vector *pn = t->kv; unsigned long cindex = 1; struct hlist_node *tmp; @@ -2089,6 +2090,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) fib_notify_alias_delete(net, n->key, &n->leaf, fa, NULL); + if (fi->pfsrc_removed) + rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa, + KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); -- cgit From 9593c7cb6cf670ef724d17f7f9affd7a8d2ad0c5 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 22 Sep 2023 23:04:58 +0200 Subject: ipv6: tcp: add a missing nf_reset_ct() in 3WHS handling Commit b0e214d21203 ("netfilter: keep conntrack reference until IPsecv6 policy checks are done") is a direct copy of the old commit b59c270104f0 ("[NETFILTER]: Keep conntrack reference until IPsec policy checks are done") but for IPv6. However, it also copies a bug that this old commit had. That is: when the third packet of 3WHS connection establishment contains payload, it is added into socket receive queue without the XFRM check and the drop of connection tracking context. That leads to nf_conntrack module being impossible to unload as it waits for all the conntrack references to be dropped while the packet release is deferred in per-cpu cache indefinitely, if not consumed by the application. The issue for IPv4 was fixed in commit 6f0012e35160 ("tcp: add a missing nf_reset_ct() in 3WHS handling") by adding a missing XFRM check and correctly dropping the conntrack context. However, the issue was introduced to IPv6 code afterwards. Fixing it the same way for IPv6 now. Fixes: b0e214d21203 ("netfilter: keep conntrack reference until IPsecv6 policy checks are done") Link: https://lore.kernel.org/netdev/d589a999-d4dd-2768-b2d5-89dec64a4a42@ovn.org/ Signed-off-by: Ilya Maximets Acked-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230922210530.2045146-1-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- net/ipv6/tcp_ipv6.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3a88545a265d..44b6949d72b2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1640,9 +1640,12 @@ process: struct sock *nsk; sk = req->rsk_listener; - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + drop_reason = SKB_DROP_REASON_XFRM_POLICY; + else + drop_reason = tcp_inbound_md5_hash(sk, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1689,6 +1692,7 @@ process: } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); -- cgit From e9c65989920f7c28775ec4e0c11b483910fb67b8 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 24 Sep 2023 02:35:49 +0900 Subject: net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg syzbot reported the following uninit-value access issue: ===================================================== BUG: KMSAN: uninit-value in smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:975 [inline] BUG: KMSAN: uninit-value in smsc75xx_bind+0x5c9/0x11e0 drivers/net/usb/smsc75xx.c:1482 CPU: 0 PID: 8696 Comm: kworker/0:3 Not tainted 5.8.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:975 [inline] smsc75xx_bind+0x5c9/0x11e0 drivers/net/usb/smsc75xx.c:1482 usbnet_probe+0x1152/0x3f90 drivers/net/usb/usbnet.c:1737 usb_probe_interface+0xece/0x1550 drivers/usb/core/driver.c:374 really_probe+0xf20/0x20b0 drivers/base/dd.c:529 driver_probe_device+0x293/0x390 drivers/base/dd.c:701 __device_attach_driver+0x63f/0x830 drivers/base/dd.c:807 bus_for_each_drv+0x2ca/0x3f0 drivers/base/bus.c:431 __device_attach+0x4e2/0x7f0 drivers/base/dd.c:873 device_initial_probe+0x4a/0x60 drivers/base/dd.c:920 bus_probe_device+0x177/0x3d0 drivers/base/bus.c:491 device_add+0x3b0e/0x40d0 drivers/base/core.c:2680 usb_set_configuration+0x380f/0x3f10 drivers/usb/core/message.c:2032 usb_generic_driver_probe+0x138/0x300 drivers/usb/core/generic.c:241 usb_probe_device+0x311/0x490 drivers/usb/core/driver.c:272 really_probe+0xf20/0x20b0 drivers/base/dd.c:529 driver_probe_device+0x293/0x390 drivers/base/dd.c:701 __device_attach_driver+0x63f/0x830 drivers/base/dd.c:807 bus_for_each_drv+0x2ca/0x3f0 drivers/base/bus.c:431 __device_attach+0x4e2/0x7f0 drivers/base/dd.c:873 device_initial_probe+0x4a/0x60 drivers/base/dd.c:920 bus_probe_device+0x177/0x3d0 drivers/base/bus.c:491 device_add+0x3b0e/0x40d0 drivers/base/core.c:2680 usb_new_device+0x1bd4/0x2a30 drivers/usb/core/hub.c:2554 hub_port_connect drivers/usb/core/hub.c:5208 [inline] hub_port_connect_change drivers/usb/core/hub.c:5348 [inline] port_event drivers/usb/core/hub.c:5494 [inline] hub_event+0x5e7b/0x8a70 drivers/usb/core/hub.c:5576 process_one_work+0x1688/0x2140 kernel/workqueue.c:2269 worker_thread+0x10bc/0x2730 kernel/workqueue.c:2415 kthread+0x551/0x590 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Local variable ----buf.i87@smsc75xx_bind created at: __smsc75xx_read_reg drivers/net/usb/smsc75xx.c:83 [inline] smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:968 [inline] smsc75xx_bind+0x485/0x11e0 drivers/net/usb/smsc75xx.c:1482 __smsc75xx_read_reg drivers/net/usb/smsc75xx.c:83 [inline] smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:968 [inline] smsc75xx_bind+0x485/0x11e0 drivers/net/usb/smsc75xx.c:1482 This issue is caused because usbnet_read_cmd() reads less bytes than requested (zero byte in the reproducer). In this case, 'buf' is not properly filled. This patch fixes the issue by returning -ENODATA if usbnet_read_cmd() reads less bytes than requested. Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Reported-and-tested-by: syzbot+6966546b78d050bb0b5d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6966546b78d050bb0b5d Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230923173549.3284502-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/usb/smsc75xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 5d6454fedb3f..78ad2da3ee29 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -90,7 +90,9 @@ static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (unlikely(ret < 0)) { + if (unlikely(ret < 4)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); return ret; -- cgit From eea03d18af9c44235865a4bc9bec4d780ef6cf21 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 23 Sep 2023 19:15:59 -0600 Subject: qed/red_ll2: Fix undefined behavior bug in struct qed_ll2_info The flexible structure (a structure that contains a flexible-array member at the end) `qed_ll2_tx_packet` is nested within the second layer of `struct qed_ll2_info`: struct qed_ll2_tx_packet { ... /* Flexible Array of bds_set determined by max_bds_per_packet */ struct { struct core_tx_bd *txq_bd; dma_addr_t tx_frag; u16 frag_len; } bds_set[]; }; struct qed_ll2_tx_queue { ... struct qed_ll2_tx_packet cur_completing_packet; }; struct qed_ll2_info { ... struct qed_ll2_tx_queue tx_queue; struct qed_ll2_cbs cbs; }; The problem is that member `cbs` in `struct qed_ll2_info` is placed just after an object of type `struct qed_ll2_tx_queue`, which is in itself an implicit flexible structure, which by definition ends in a flexible array member, in this case `bds_set`. This causes an undefined behavior bug at run-time when dynamic memory is allocated for `bds_set`, which could lead to a serious issue if `cbs` in `struct qed_ll2_info` is overwritten by the contents of `bds_set`. Notice that the type of `cbs` is a structure full of function pointers (and a cookie :) ): include/linux/qed/qed_ll2_if.h: 107 typedef 108 void (*qed_ll2_complete_rx_packet_cb)(void *cxt, 109 struct qed_ll2_comp_rx_data *data); 110 111 typedef 112 void (*qed_ll2_release_rx_packet_cb)(void *cxt, 113 u8 connection_handle, 114 void *cookie, 115 dma_addr_t rx_buf_addr, 116 bool b_last_packet); 117 118 typedef 119 void (*qed_ll2_complete_tx_packet_cb)(void *cxt, 120 u8 connection_handle, 121 void *cookie, 122 dma_addr_t first_frag_addr, 123 bool b_last_fragment, 124 bool b_last_packet); 125 126 typedef 127 void (*qed_ll2_release_tx_packet_cb)(void *cxt, 128 u8 connection_handle, 129 void *cookie, 130 dma_addr_t first_frag_addr, 131 bool b_last_fragment, bool b_last_packet); 132 133 typedef 134 void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle, 135 u32 opaque_data_0, u32 opaque_data_1); 136 137 struct qed_ll2_cbs { 138 qed_ll2_complete_rx_packet_cb rx_comp_cb; 139 qed_ll2_release_rx_packet_cb rx_release_cb; 140 qed_ll2_complete_tx_packet_cb tx_comp_cb; 141 qed_ll2_release_tx_packet_cb tx_release_cb; 142 qed_ll2_slowpath_cb slowpath_cb; 143 void *cookie; 144 }; Fix this by moving the declaration of `cbs` to the middle of its containing structure `qed_ll2_info`, preventing it from being overwritten by the contents of `bds_set` at run-time. This bug was introduced in 2017, when `bds_set` was converted to a one-element array, and started to be used as a Variable Length Object (VLO) at run-time. Fixes: f5823fe6897c ("qed: Add ll2 option to limit the number of bds per packet") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZQ+Nz8DfPg56pIzr@work Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 0bfc375161ed..a174c6fc626a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -110,9 +110,9 @@ struct qed_ll2_info { enum core_tx_dest tx_dest; u8 tx_stats_en; bool main_func_queue; + struct qed_ll2_cbs cbs; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; - struct qed_ll2_cbs cbs; }; extern const struct qed_ll2_ops qed_ll2_ops_pass; -- cgit From 2f2fc17bab0011430ceb6f2dc1959e7d1f981444 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Sep 2023 00:48:55 +0200 Subject: sched/eevdf: Also update slice on placement Tasks that never consume their full slice would not update their slice value. This means that tasks that are spawned before the sysctl scaling keep their original (UP) slice length. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230915124822.847197830@noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cb225921bbca..7d73652acbb2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4919,10 +4919,12 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - u64 vslice = calc_delta_fair(se->slice, se); - u64 vruntime = avg_vruntime(cfs_rq); + u64 vslice, vruntime = avg_vruntime(cfs_rq); s64 lag = 0; + se->slice = sysctl_sched_base_slice; + vslice = calc_delta_fair(se->slice, se); + /* * Due to how V is constructed as the weighted average of entities, * adding tasks with positive lag, or removing tasks with negative lag -- cgit From 650cad561cce04b62a8c8e0446b685ef171bc3bb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Sep 2023 14:29:50 +0200 Subject: sched/eevdf: Fix avg_vruntime() The expectation is that placing a task at avg_vruntime() makes it eligible. Turns out there is a corner case where this is not the case. Specifically, avg_vruntime() relies on the fact that integer division is a flooring function (eg. it discards the remainder). By this property the value returned is slightly left of the true average. However! when the average is a negative (relative to min_vruntime) the effect is flipped and it becomes a ceil, with the result that the returned value is just right of the average and thus not eligible. Fixes: af4cf40470c2 ("sched/fair: Add cfs_rq::avg_vruntime") Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7d73652acbb2..ef7490c4b8b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -664,6 +664,10 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; } +/* + * Specifically: avg_runtime() + 0 must result in entity_eligible() := true + * For this to be so, the result of this function must have a left bias. + */ u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; @@ -677,8 +681,12 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) load += weight; } - if (load) + if (load) { + /* sign flips effective floor / ceil */ + if (avg < 0) + avg -= (load - 1); avg = div_s64(avg, load); + } return cfs_rq->min_vruntime + avg; } -- cgit From cbb7eb2dbd9472816e42a1b0fdb51af49abbf812 Mon Sep 17 00:00:00 2001 From: Kai Uwe Broulik Date: Sun, 1 Oct 2023 13:47:10 +0200 Subject: drm: panel-orientation-quirks: Add quirk for One Mix 2S The One Mix 2S is a mini laptop with a 1200x1920 portrait screen mounted in a landscape oriented clamshell case. Because of the too generic DMI strings this entry is also doing bios-date matching. Signed-off-by: Kai Uwe Broulik Reviewed-by: Hans de Goede Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20231001114710.336172-1-foss-linux@broulik.de --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 0cb646cb04ee..d5c15292ae93 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -38,6 +38,14 @@ static const struct drm_dmi_panel_orientation_data gpd_micropc = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data gpd_onemix2s = { + .width = 1200, + .height = 1920, + .bios_dates = (const char * const []){ "05/21/2018", "10/26/2018", + "03/04/2019", NULL }, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + static const struct drm_dmi_panel_orientation_data gpd_pocket = { .width = 1200, .height = 1920, @@ -401,6 +409,14 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* One Mix 2S (generic strings, also match on bios date) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), + }, + .driver_data = (void *)&gpd_onemix2s, }, {} }; -- cgit From 8679328eb859d06a1984ab48d90ac35d11bbcaf1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 21 Sep 2023 16:52:33 +0200 Subject: serial: Reduce spinlocked portion of uart_rs485_config() Commit 44b27aec9d96 ("serial: core, 8250: set RS485 termination GPIO in serial core") enabled support for RS485 termination GPIOs behind i2c expanders by setting the GPIO outside of the critical section protected by the port spinlock. Access to the i2c expander may sleep, which caused a splat with the port spinlock held. Commit 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way") erroneously regressed that by spinlocking the GPIO manipulation again. Fix by moving uart_rs485_config() (the function manipulating the GPIO) outside of the spinlocked section and acquiring the spinlock inside of uart_rs485_config() for the invocation of ->rs485_config() only. This gets us one step closer to pushing the spinlock down into the ->rs485_config() callbacks which actually need it. (Some callbacks do not want to be spinlocked because they perform sleepable register accesses, see e.g. sc16is7xx_config_rs485().) Stack trace for posterity: Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 56 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch Call trace: rcu_note_context_switch __schedule schedule schedule_timeout wait_for_completion_timeout bcm2835_i2c_xfer __i2c_transfer i2c_transfer i2c_transfer_buffer_flags regmap_i2c_write _regmap_raw_write_impl _regmap_bus_raw_write _regmap_write _regmap_update_bits regmap_update_bits_base pca953x_gpio_set_value gpiod_set_raw_value_commit gpiod_set_value_nocheck gpiod_set_value_cansleep uart_rs485_config uart_add_one_port pl011_register_port pl011_probe Fixes: 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way") Suggested-by: Lino Sanfilippo Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.1+ Link: https://lore.kernel.org/r/f3a35967c28b32f3c6432d0aa5936e6a9908282d.1695307688.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 7bdc21d5e13b..ca26a8aef2cb 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1404,12 +1404,18 @@ static void uart_set_rs485_termination(struct uart_port *port, static int uart_rs485_config(struct uart_port *port) { struct serial_rs485 *rs485 = &port->rs485; + unsigned long flags; int ret; + if (!(rs485->flags & SER_RS485_ENABLED)) + return 0; + uart_sanitize_serial_rs485(port, rs485); uart_set_rs485_termination(port, rs485); + spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, NULL, rs485); + spin_unlock_irqrestore(&port->lock, flags); if (ret) memset(rs485, 0, sizeof(*rs485)); @@ -2474,11 +2480,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) if (ret == 0) { if (tty) uart_change_line_settings(tty, state, NULL); + uart_rs485_config(uport); spin_lock_irq(&uport->lock); if (!(uport->rs485.flags & SER_RS485_ENABLED)) ops->set_mctrl(uport, uport->mctrl); - else - uart_rs485_config(uport); ops->start_tx(uport); spin_unlock_irq(&uport->lock); tty_port_set_initialized(port, true); @@ -2587,10 +2592,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->mctrl &= TIOCM_DTR; if (!(port->rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); - else - uart_rs485_config(port); spin_unlock_irqrestore(&port->lock, flags); + uart_rs485_config(port); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. -- cgit From 560706eff7c8e5621b0d63afe0866e0e1906e87e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Sep 2023 09:13:17 +0300 Subject: serial: 8250_omap: Fix errors with no_console_suspend We now get errors on system suspend if no_console_suspend is set as reported by Thomas. The errors started with commit 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend"). Let's fix the issue by checking for console_suspend_enabled in the system suspend and resume path. Note that with this fix the checks for console_suspend_enabled in omap8250_runtime_suspend() become useless. We now keep runtime PM usage count for an attached kernel console starting with commit bedb404e91bb ("serial: 8250_port: Don't use power management for kernel console"). Fixes: 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend") Cc: stable Cc: Udit Kumar Reported-by: Thomas Richard Signed-off-by: Tony Lindgren Tested-by: Thomas Richard Reviewed-by: Dhruva Gole Link: https://lore.kernel.org/r/20230926061319.15140-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 26dd089d8e82..ca972fd37725 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1617,7 +1617,7 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - int err; + int err = 0; serial8250_suspend_port(priv->line); @@ -1627,7 +1627,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - err = pm_runtime_force_suspend(dev); + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1636,11 +1637,15 @@ static int omap8250_suspend(struct device *dev) static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); + struct uart_8250_port *up = serial8250_get_port(priv->line); int err; - err = pm_runtime_force_resume(dev); - if (err) - return err; + if (uart_console(&up->port) && console_suspend_enabled) { + err = pm_runtime_force_resume(dev); + if (err) + return err; + } + serial8250_resume_port(priv->line); /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */ pm_runtime_mark_last_busy(dev); @@ -1717,16 +1722,6 @@ static int omap8250_runtime_suspend(struct device *dev) if (priv->line >= 0) up = serial8250_get_port(priv->line); - /* - * When using 'no_console_suspend', the console UART must not be - * suspended. Since driver suspend is managed by runtime suspend, - * preventing runtime suspend (by returning error) will keep device - * active during suspend. - */ - if (priv->is_suspending && !console_suspend_enabled) { - if (up && uart_console(&up->port)) - return -EBUSY; - } if (priv->habit & UART_ERRATA_CLOCK_DISABLE) { int ret; -- cgit From f8024f1f36a30a082b0457d5779c8847cea57f57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Oct 2023 18:14:08 -0600 Subject: io_uring/kbuf: don't allow registered buffer rings on highmem pages syzbot reports that registering a mapped buffer ring on arm32 can trigger an OOPS. Registered buffer rings have two modes, one of them is the application passing in the memory that the buffer ring should reside in. Once those pages are mapped, we use page_address() to get a virtual address. This will obviously fail on highmem pages, which aren't mapped. Add a check if we have any highmem pages after mapping, and fail the attempt to register a provided buffer ring if we do. This will return the same error as kernels that don't support provided buffer rings to begin with. Link: https://lore.kernel.org/io-uring/000000000000af635c0606bcb889@google.com/ Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Cc: stable@vger.kernel.org Reported-by: syzbot+2113e61b8848fa7951d8@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 556f4df25b0f..9123138aa9f4 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, { struct io_uring_buf_ring *br; struct page **pages; - int nr_pages; + int i, nr_pages; pages = io_pin_pages(reg->ring_addr, flex_array_size(br, bufs, reg->ring_entries), @@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, if (IS_ERR(pages)) return PTR_ERR(pages); + /* + * Apparently some 32-bit boxes (ARM) will return highmem pages, + * which then need to be mapped. We could support that, but it'd + * complicate the code and slowdown the common cases quite a bit. + * So just error out, returning -EINVAL just like we did on kernels + * that didn't support mapped buffer rings. + */ + for (i = 0; i < nr_pages; i++) + if (PageHighMem(pages[i])) + goto error_unpin; + br = page_address(pages[0]); #ifdef SHM_COLOUR /* @@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, * should use IOU_PBUF_RING_MMAP instead, and liburing will handle * this transparently. */ - if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) { - int i; - - for (i = 0; i < nr_pages; i++) - unpin_user_page(pages[i]); - return -EINVAL; - } + if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) + goto error_unpin; #endif bl->buf_pages = pages; bl->buf_nr_pages = nr_pages; @@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, bl->is_mapped = 1; bl->is_mmap = 0; return 0; +error_unpin: + for (i = 0; i < nr_pages; i++) + unpin_user_page(pages[i]); + kvfree(pages); + return -EINVAL; } static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, -- cgit From 1658633c04653578429ff5dfc62fdc159203a8f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Oct 2023 19:51:38 -0600 Subject: io_uring: ensure io_lockdep_assert_cq_locked() handles disabled rings io_lockdep_assert_cq_locked() checks that locking is correctly done when a CQE is posted. If the ring is setup in a disabled state with IORING_SETUP_R_DISABLED, then ctx->submitter_task isn't assigned until the ring is later enabled. We generally don't post CQEs in this state, as no SQEs can be submitted. However it is possible to generate a CQE if tagged resources are being updated. If this happens and PROVE_LOCKING is enabled, then the locking check helper will dereference ctx->submitter_task, which hasn't been set yet. Fixup io_lockdep_assert_cq_locked() to handle this case correctly. While at it, convert it to a static inline as well, so that generated line offsets will actually reflect which condition failed, rather than just the line offset for io_lockdep_assert_cq_locked() itself. Reported-and-tested-by: syzbot+efc45d4e7ba6ab4ef1eb@syzkaller.appspotmail.com Fixes: f26cc9593581 ("io_uring: lockdep annotate CQ locking") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 547c30582fb8..0bc145614a6e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); -#define io_lockdep_assert_cq_locked(ctx) \ - do { \ - lockdep_assert(in_task()); \ - \ - if (ctx->flags & IORING_SETUP_IOPOLL) { \ - lockdep_assert_held(&ctx->uring_lock); \ - } else if (!ctx->task_complete) { \ - lockdep_assert_held(&ctx->completion_lock); \ - } else if (ctx->submitter_task->flags & PF_EXITING) { \ - lockdep_assert(current_work()); \ - } else { \ - lockdep_assert(current == ctx->submitter_task); \ - } \ - } while (0) +#if defined(CONFIG_PROVE_LOCKING) +static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) +{ + lockdep_assert(in_task()); + + if (ctx->flags & IORING_SETUP_IOPOLL) { + lockdep_assert_held(&ctx->uring_lock); + } else if (!ctx->task_complete) { + lockdep_assert_held(&ctx->completion_lock); + } else if (ctx->submitter_task) { + /* + * ->submitter_task may be NULL and we can still post a CQE, + * if the ring has been setup with IORING_SETUP_R_DISABLED. + * Not from an SQE, as those cannot be submitted, but via + * updating tagged resources. + */ + if (ctx->submitter_task->flags & PF_EXITING) + lockdep_assert(current_work()); + else + lockdep_assert(current == ctx->submitter_task); + } +} +#else +static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) +{ +} +#endif static inline void io_req_task_work_add(struct io_kiocb *req) { -- cgit From 8957261cd8149ed9d0738c01c0320bcbff989407 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Fri, 8 Sep 2023 10:15:48 +0530 Subject: ethtool: plca: fix plca enable data type while parsing the value The ETHTOOL_A_PLCA_ENABLED data type is u8. But while parsing the value from the attribute, nla_get_u32() is used in the plca_update_sint() function instead of nla_get_u8(). So plca_cfg.enabled variable is updated with some garbage value instead of 0 or 1 and always enables plca even though plca is disabled through ethtool application. This bug has been fixed by parsing the values based on the attributes type in the policy. Fixes: 8580e16c28f3 ("net/ethtool: add netlink interface for the PLCA RS") Signed-off-by: Parthiban Veerasooran Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230908044548.5878-1-Parthiban.Veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- net/ethtool/plca.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index b238a1afe9ae..b1e2e3b5027f 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -21,16 +21,6 @@ struct plca_reply_data { #define PLCA_REPDATA(__reply_base) \ container_of(__reply_base, struct plca_reply_data, base) -static void plca_update_sint(int *dst, const struct nlattr *attr, - bool *mod) -{ - if (!attr) - return; - - *dst = nla_get_u32(attr); - *mod = true; -} - // PLCA get configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_get_cfg_policy[] = { @@ -38,6 +28,29 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = { NLA_POLICY_NESTED(ethnl_header_policy), }; +static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid, + bool *mod) +{ + const struct nlattr *attr = tb[attrid]; + + if (!attr || + WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy))) + return; + + switch (ethnl_plca_set_cfg_policy[attrid].type) { + case NLA_U8: + *dst = nla_get_u8(attr); + break; + case NLA_U32: + *dst = nla_get_u32(attr); + break; + default: + WARN_ON_ONCE(1); + } + + *mod = true; +} + static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) @@ -144,13 +157,13 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) return -EOPNOTSUPP; memset(&plca_cfg, 0xff, sizeof(plca_cfg)); - plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod); - plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod); - plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod); - plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod); - plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT], + plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod); + plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod); + plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod); + plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod); + plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT, &mod); - plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR], + plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR, &mod); if (!mod) return 0; -- cgit From dfc7f7a988dad34c3bf4c053124fb26aa6c5f916 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 8 Sep 2023 19:58:53 -0400 Subject: net: nfc: llcp: Add lock when modifying device list The device list needs its associated lock held when modifying it, or the list could become corrupted, as syzbot discovered. Reported-and-tested-by: syzbot+c1d0a03d305972dbbe14@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c1d0a03d305972dbbe14 Signed-off-by: Jeremy Cline Reviewed-by: Simon Horman Fixes: 6709d4b7bc2e ("net: nfc: Fix use-after-free caused by nfc_llcp_find_local") Link: https://lore.kernel.org/r/20230908235853.1319596-1-jeremy@jcline.org Signed-off-by: Jakub Kicinski --- net/nfc/llcp_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index f60e424e0607..6705bb895e23 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1636,7 +1636,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); + spin_lock(&llcp_devices_lock); list_add(&local->list, &llcp_devices); + spin_unlock(&llcp_devices_lock); return 0; } -- cgit From 2fd7b0f6d5ad655b1d947d3acdd82f687c31465e Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 2 Oct 2023 14:32:29 -0400 Subject: md/raid5: release batch_last before waiting for another stripe_head When raid5_get_active_stripe is called with a ctx containing a stripe_head in its batch_last pointer, it can cause a deadlock if the task sleeps waiting on another stripe_head to become available. The stripe_head held by batch_last can be blocking the advancement of other stripe_heads, leading to no stripe_heads being released so raid5_get_active_stripe waits forever. Like with the quiesce state handling earlier in the function, batch_last needs to be released by raid5_get_active_stripe before it waits for another stripe_head. Fixes: 3312e6c887fe ("md/raid5: Keep a reference to last stripe_head for batch") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: David Jeffery Reviewed-by: Logan Gunthorpe Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231002183422.13047-1-djeffery@redhat.com --- drivers/md/raid5.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4cb9c608ee19..284cd71bcc68 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -854,6 +854,13 @@ struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); r5l_wake_reclaim(conf->log, 0); + + /* release batch_last before wait to avoid risk of deadlock */ + if (ctx && ctx->batch_last) { + raid5_release_stripe(ctx->batch_last); + ctx->batch_last = NULL; + } + wait_event_lock_irq(conf->wait_for_stripe, is_inactive_blocked(conf, hash), *(conf->hash_locks + hash)); -- cgit From 223ef474316466e9f61f6e0064f3a6fe4923a2c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 3 Oct 2023 09:59:58 -0600 Subject: io_uring: don't allow IORING_SETUP_NO_MMAP rings on highmem pages On at least arm32, but presumably any arch with highmem, if the application passes in memory that resides in highmem for the rings, then we should fail that ring creation. We fail it with -EINVAL, which is what kernels that don't support IORING_SETUP_NO_MMAP will do as well. Cc: stable@vger.kernel.org Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 783ed0fff71b..d839a80a6751 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, { struct page **page_array; unsigned int nr_pages; - int ret; + int ret, i; *npages = 0; @@ -2716,6 +2716,20 @@ err: */ if (page_array[0] != page_array[ret - 1]) goto err; + + /* + * Can't support mapping user allocated ring memory on 32-bit archs + * where it could potentially reside in highmem. Just fail those with + * -EINVAL, just like we did on kernels that didn't support this + * feature. + */ + for (i = 0; i < nr_pages; i++) { + if (PageHighMem(page_array[i])) { + ret = -EINVAL; + goto err; + } + } + *pages = page_array; *npages = nr_pages; return page_to_virt(page_array[0]); -- cgit From f4384b3e54ea813868bb81a861bf5b2406e15d8f Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Fri, 29 Sep 2023 12:30:24 +0530 Subject: tee: amdtee: fix use-after-free vulnerability in amdtee_close_session There is a potential race condition in amdtee_close_session that may cause use-after-free in amdtee_open_session. For instance, if a session has refcount == 1, and one thread tries to free this session via: kref_put(&sess->refcount, destroy_session); the reference count will get decremented, and the next step would be to call destroy_session(). However, if in another thread, amdtee_open_session() is called before destroy_session() has completed execution, alloc_session() may return 'sess' that will be freed up later in destroy_session() leading to use-after-free in amdtee_open_session. To fix this issue, treat decrement of sess->refcount and removal of 'sess' from session list in destroy_session() as a critical section, so that it is executed atomically. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Cc: stable@vger.kernel.org Signed-off-by: Rijo Thomas Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 372d64756ed6..3c15f6a9e91c 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -217,12 +217,12 @@ unlock: return rc; } +/* mutex must be held by caller */ static void destroy_session(struct kref *ref) { struct amdtee_session *sess = container_of(ref, struct amdtee_session, refcount); - mutex_lock(&session_list_mutex); list_del(&sess->list_node); mutex_unlock(&session_list_mutex); kfree(sess); @@ -272,7 +272,8 @@ int amdtee_open_session(struct tee_context *ctx, if (arg->ret != TEEC_SUCCESS) { pr_err("open_session failed %d\n", arg->ret); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); goto out; } @@ -290,7 +291,8 @@ int amdtee_open_session(struct tee_context *ctx, pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); rc = -ENOMEM; goto out; } @@ -331,7 +333,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, &session_list_mutex); return 0; } -- cgit From 152be54224de182730cf4ee2fe073391623c97f9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 2 Oct 2023 15:46:46 +0200 Subject: drm/nouveau: chan: use struct nvif_mclass Use actual struct nvif_mclass instead of identical anonymous struct. Reviewed-by: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231002135008.10651-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_chan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index bb3d6e5c122f..f44d19a50d39 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -257,10 +257,7 @@ static int nouveau_channel_ctor(struct nouveau_drm *drm, struct nvif_device *device, bool priv, u64 runm, struct nouveau_channel **pchan) { - static const struct { - s32 oclass; - int version; - } hosts[] = { + const struct nvif_mclass hosts[] = { { AMPERE_CHANNEL_GPFIFO_B, 0 }, { AMPERE_CHANNEL_GPFIFO_A, 0 }, { TURING_CHANNEL_GPFIFO_A, 0 }, -- cgit From bbe08a0e11ae76fc466c11b9fa6dd6eb52544a46 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 2 Oct 2023 15:46:47 +0200 Subject: drm/nouveau: chan: use channel class definitions Use channel class definitions instead of magic numbers. Reviewed-by: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231002135008.10651-2-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_chan.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index f44d19a50d39..68e9b08a4803 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -440,9 +440,11 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) } /* initialise dma tracking parameters */ - switch (chan->user.oclass & 0x00ff) { - case 0x006b: - case 0x006e: + switch (chan->user.oclass) { + case NV03_CHANNEL_DMA: + case NV10_CHANNEL_DMA: + case NV17_CHANNEL_DMA: + case NV40_CHANNEL_DMA: chan->user_put = 0x40; chan->user_get = 0x44; chan->dma.max = (0x10000 / 4) - 2; -- cgit From d59e75eef52d89201aaf5342a3ac23ddf3e9b112 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 2 Oct 2023 15:46:48 +0200 Subject: drm/nouveau: exec: report max pushs through getparam Report the maximum number of IBs that can be pushed with a single DRM_IOCTL_NOUVEAU_EXEC through DRM_IOCTL_NOUVEAU_GETPARAM. While the maximum number of IBs per ring might vary between chipsets, the kernel will make sure that userspace can only push a fraction of the maximum number of IBs per ring per job, such that we avoid a situation where there's only a single job occupying the ring, which could potentially lead to the ring run dry. Using DRM_IOCTL_NOUVEAU_GETPARAM to report the maximum number of IBs that can be pushed with a single DRM_IOCTL_NOUVEAU_EXEC implies that all channels of a given device have the same ring size. Reviewed-by: Dave Airlie Reviewed-by: Lyude Paul Acked-by: Faith Ekstrand Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231002135008.10651-3-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 21 +++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- drivers/gpu/drm/nouveau/nouveau_dma.h | 3 +++ drivers/gpu/drm/nouveau/nouveau_exec.c | 7 ++++--- drivers/gpu/drm/nouveau/nouveau_exec.h | 10 ++++++++++ include/uapi/drm/nouveau_drm.h | 10 ++++++++++ 6 files changed, 49 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 30afbec9e3b1..2edd7bb13fae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -31,6 +31,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" +#include "nouveau_exec.h" #include "nouveau_gem.h" #include "nouveau_chan.h" #include "nouveau_abi16.h" @@ -183,6 +184,20 @@ nouveau_abi16_fini(struct nouveau_abi16 *abi16) cli->abi16 = NULL; } +static inline int +getparam_dma_ib_max(struct nvif_device *device) +{ + const struct nvif_mclass dmas[] = { + { NV03_CHANNEL_DMA, 0 }, + { NV10_CHANNEL_DMA, 0 }, + { NV17_CHANNEL_DMA, 0 }, + { NV40_CHANNEL_DMA, 0 }, + {} + }; + + return nvif_mclass(&device->object, dmas) < 0 ? NV50_DMA_IB_MAX : 0; +} + int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) { @@ -247,6 +262,12 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) case NOUVEAU_GETPARAM_GRAPH_UNITS: getparam->value = nvkm_gr_units(gr); break; + case NOUVEAU_GETPARAM_EXEC_PUSH_MAX: { + int ib_max = getparam_dma_ib_max(device); + + getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 68e9b08a4803..7c97b2886807 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -454,7 +454,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) chan->user_get = 0x44; chan->user_get_hi = 0x60; chan->dma.ib_base = 0x10000 / 4; - chan->dma.ib_max = (0x02000 / 8) - 1; + chan->dma.ib_max = NV50_DMA_IB_MAX; chan->dma.ib_put = 0; chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put; chan->dma.max = chan->dma.ib_base; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 1744d95b233e..c52cda82353e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -49,6 +49,9 @@ void nv50_dma_push(struct nouveau_channel *, u64 addr, u32 length, /* Maximum push buffer size. */ #define NV50_DMA_PUSH_MAX_LENGTH 0x7fffff +/* Maximum IBs per ring. */ +#define NV50_DMA_IB_MAX ((0x02000 / 8) - 1) + /* Object handles - for stuff that's doesn't use handle == oclass. */ enum { NvDmaFB = 0x80000002, diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 5dda94e1318c..c1837ba95fb5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -379,7 +379,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, struct nouveau_channel *chan = NULL; struct nouveau_exec_job_args args = {}; struct drm_nouveau_exec *req = data; - int ret = 0; + int push_max, ret = 0; if (unlikely(!abi16)) return -ENOMEM; @@ -404,9 +404,10 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (!chan->dma.ib_max) return nouveau_abi16_put(abi16, -ENOSYS); - if (unlikely(req->push_count > NOUVEAU_GEM_MAX_PUSH)) { + push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max); + if (unlikely(req->push_count > push_max)) { NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", - req->push_count, NOUVEAU_GEM_MAX_PUSH); + req->push_count, push_max); return nouveau_abi16_put(abi16, -EINVAL); } diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h index 778cacd90f65..5488d337bcc0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.h +++ b/drivers/gpu/drm/nouveau/nouveau_exec.h @@ -51,4 +51,14 @@ int nouveau_exec_job_init(struct nouveau_exec_job **job, int nouveau_exec_ioctl_exec(struct drm_device *dev, void *data, struct drm_file *file_priv); +static inline unsigned int +nouveau_exec_push_max_from_ib_max(int ib_max) +{ + /* Limit the number of IBs per job to half the size of the ring in order + * to avoid the ring running dry between submissions and preserve one + * more slot for the job's HW fence. + */ + return ib_max > 1 ? ib_max / 2 - 1 : 0; +} + #endif diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 8d7402c13e56..eaf9f248619f 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -44,6 +44,16 @@ extern "C" { #define NOUVEAU_GETPARAM_PTIMER_TIME 14 #define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 #define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 + +/** + * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX + * + * Query the maximum amount of IBs that can be pushed through a single + * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC + * ioctl(). + */ +#define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- cgit From 428c4435b063bebc7eddcd7d311546d6933efa62 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Oct 2023 09:24:02 +1100 Subject: xfs: move log discard work to xfs_discard.c Because we are going to use the same list-based discard submission interface for fstrim-based discards, too. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_discard.c | 77 ++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_discard.h | 6 ++-- fs/xfs/xfs_extent_busy.h | 20 +++++++++-- fs/xfs/xfs_log_cil.c | 93 +++++++----------------------------------------- fs/xfs/xfs_log_priv.h | 5 +-- 5 files changed, 113 insertions(+), 88 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index afc4c78b9eed..3f45c7bb94f2 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2010 Red Hat, Inc. + * Copyright (C) 2010, 2023 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" @@ -19,6 +19,81 @@ #include "xfs_log.h" #include "xfs_ag.h" +struct workqueue_struct *xfs_discard_wq; + +static void +xfs_discard_endio_work( + struct work_struct *work) +{ + struct xfs_busy_extents *extents = + container_of(work, struct xfs_busy_extents, endio_work); + + xfs_extent_busy_clear(extents->mount, &extents->extent_list, false); + kmem_free(extents->owner); +} + +/* + * Queue up the actual completion to a thread to avoid IRQ-safe locking for + * pagb_lock. + */ +static void +xfs_discard_endio( + struct bio *bio) +{ + struct xfs_busy_extents *extents = bio->bi_private; + + INIT_WORK(&extents->endio_work, xfs_discard_endio_work); + queue_work(xfs_discard_wq, &extents->endio_work); + bio_put(bio); +} + +/* + * Walk the discard list and issue discards on all the busy extents in the + * list. We plug and chain the bios so that we only need a single completion + * call to clear all the busy extents once the discards are complete. + */ +int +xfs_discard_extents( + struct xfs_mount *mp, + struct xfs_busy_extents *extents) +{ + struct xfs_extent_busy *busyp; + struct bio *bio = NULL; + struct blk_plug plug; + int error = 0; + + blk_start_plug(&plug); + list_for_each_entry(busyp, &extents->extent_list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, &bio); + if (error && error != -EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llx,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + break; + } + } + + if (bio) { + bio->bi_private = extents; + bio->bi_end_io = xfs_discard_endio; + submit_bio(bio); + } else { + xfs_discard_endio_work(&extents->endio_work); + } + blk_finish_plug(&plug); + + return error; +} + + STATIC int xfs_trim_extents( struct xfs_perag *pag, diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h index de92d9cc958f..2b1a85223a56 100644 --- a/fs/xfs/xfs_discard.h +++ b/fs/xfs/xfs_discard.h @@ -3,8 +3,10 @@ #define XFS_DISCARD_H 1 struct fstrim_range; -struct list_head; +struct xfs_mount; +struct xfs_busy_extents; -extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +int xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy); +int xfs_ioc_trim(struct xfs_mount *mp, struct fstrim_range __user *fstrim); #endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index c37bf87e6781..71c28d031e3b 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -16,9 +16,6 @@ struct xfs_alloc_arg; /* * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that * have been freed but whose transactions aren't committed to disk yet. - * - * Note that we use the transaction ID to record the transaction, not the - * transaction structure itself. See xfs_extent_busy_insert() for details. */ struct xfs_extent_busy { struct rb_node rb_node; /* ag by-bno indexed search tree */ @@ -31,6 +28,23 @@ struct xfs_extent_busy { #define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */ }; +/* + * List used to track groups of related busy extents all the way through + * to discard completion. + */ +struct xfs_busy_extents { + struct xfs_mount *mount; + struct list_head extent_list; + struct work_struct endio_work; + + /* + * Owner is the object containing the struct xfs_busy_extents to free + * once the busy extents have been processed. If only the + * xfs_busy_extents object needs freeing, then point this at itself. + */ + void *owner; +}; + void xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ebc70aaa299c..67a99d94701e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -16,8 +16,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_trace.h" - -struct workqueue_struct *xfs_discard_wq; +#include "xfs_discard.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -103,7 +102,7 @@ xlog_cil_ctx_alloc(void) ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); INIT_LIST_HEAD(&ctx->committing); - INIT_LIST_HEAD(&ctx->busy_extents); + INIT_LIST_HEAD(&ctx->busy_extents.extent_list); INIT_LIST_HEAD(&ctx->log_items); INIT_LIST_HEAD(&ctx->lv_chain); INIT_WORK(&ctx->push_work, xlog_cil_push_work); @@ -132,7 +131,7 @@ xlog_cil_push_pcp_aggregate( if (!list_empty(&cilpcp->busy_extents)) { list_splice_init(&cilpcp->busy_extents, - &ctx->busy_extents); + &ctx->busy_extents.extent_list); } if (!list_empty(&cilpcp->log_items)) list_splice_init(&cilpcp->log_items, &ctx->log_items); @@ -708,76 +707,6 @@ xlog_cil_free_logvec( } } -static void -xlog_discard_endio_work( - struct work_struct *work) -{ - struct xfs_cil_ctx *ctx = - container_of(work, struct xfs_cil_ctx, discard_endio_work); - struct xfs_mount *mp = ctx->cil->xc_log->l_mp; - - xfs_extent_busy_clear(mp, &ctx->busy_extents, false); - kmem_free(ctx); -} - -/* - * Queue up the actual completion to a thread to avoid IRQ-safe locking for - * pagb_lock. Note that we need a unbounded workqueue, otherwise we might - * get the execution delayed up to 30 seconds for weird reasons. - */ -static void -xlog_discard_endio( - struct bio *bio) -{ - struct xfs_cil_ctx *ctx = bio->bi_private; - - INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); - queue_work(xfs_discard_wq, &ctx->discard_endio_work); - bio_put(bio); -} - -static void -xlog_discard_busy_extents( - struct xfs_mount *mp, - struct xfs_cil_ctx *ctx) -{ - struct list_head *list = &ctx->busy_extents; - struct xfs_extent_busy *busyp; - struct bio *bio = NULL; - struct blk_plug plug; - int error = 0; - - ASSERT(xfs_has_discard(mp)); - - blk_start_plug(&plug); - list_for_each_entry(busyp, list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); - - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), - XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, &bio); - if (error && error != -EOPNOTSUPP) { - xfs_info(mp, - "discard failed for extent [0x%llx,%u], error %d", - (unsigned long long)busyp->bno, - busyp->length, - error); - break; - } - } - - if (bio) { - bio->bi_private = ctx; - bio->bi_end_io = xlog_discard_endio; - submit_bio(bio); - } else { - xlog_discard_endio_work(&ctx->discard_endio_work); - } - blk_finish_plug(&plug); -} - /* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as @@ -807,8 +736,8 @@ xlog_cil_committed( xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, ctx->start_lsn, abort); - xfs_extent_busy_sort(&ctx->busy_extents); - xfs_extent_busy_clear(mp, &ctx->busy_extents, + xfs_extent_busy_sort(&ctx->busy_extents.extent_list); + xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list, xfs_has_discard(mp) && !abort); spin_lock(&ctx->cil->xc_push_lock); @@ -817,10 +746,14 @@ xlog_cil_committed( xlog_cil_free_logvec(&ctx->lv_chain); - if (!list_empty(&ctx->busy_extents)) - xlog_discard_busy_extents(mp, ctx); - else - kmem_free(ctx); + if (!list_empty(&ctx->busy_extents.extent_list)) { + ctx->busy_extents.mount = mp; + ctx->busy_extents.owner = ctx; + xfs_discard_extents(mp, &ctx->busy_extents); + return; + } + + kmem_free(ctx); } void diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index af87648331d5..fa3ad1d7b31c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -6,6 +6,8 @@ #ifndef __XFS_LOG_PRIV_H__ #define __XFS_LOG_PRIV_H__ +#include "xfs_extent_busy.h" /* for struct xfs_busy_extents */ + struct xfs_buf; struct xlog; struct xlog_ticket; @@ -223,12 +225,11 @@ struct xfs_cil_ctx { struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ atomic_t space_used; /* aggregate size of regions */ - struct list_head busy_extents; /* busy extents in chkpt */ + struct xfs_busy_extents busy_extents; struct list_head log_items; /* log items in chkpt */ struct list_head lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ - struct work_struct discard_endio_work; struct work_struct push_work; atomic_t order_id; -- cgit From 89cfa899608fc28d018bdf9551a6ff508ea6207e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Oct 2023 09:24:52 +1100 Subject: xfs: reduce AGF hold times during fstrim operations fstrim will hold the AGF lock for as long as it takes to walk and discard all the free space in the AG that meets the userspace trim criteria. For AGs with lots of free space extents (e.g. millions) or the underlying device is really slow at processing discard requests (e.g. Ceph RBD), this means the AGF hold time is often measured in minutes to hours, not a few milliseconds as we normal see with non-discard based operations. This can result in the entire filesystem hanging whilst the long-running fstrim is in progress. We can have transactions get stuck waiting for the AGF lock (data or metadata extent allocation and freeing), and then more transactions get stuck waiting on the locks those transactions hold. We can get to the point where fstrim blocks an extent allocation or free operation long enough that it ends up pinning the tail of the log and the log then runs out of space. At this point, every modification in the filesystem gets blocked. This includes read operations, if atime updates need to be made. To fix this problem, we need to be able to discard free space extents safely without holding the AGF lock. Fortunately, we already do this with online discard via busy extents. We can mark free space extents as "busy being discarded" under the AGF lock and then unlock the AGF, knowing that nobody will be able to allocate that free space extent until we remove it from the busy tree. Modify xfs_trim_extents to use the same asynchronous discard mechanism backed by busy extents as is used with online discard. This results in the AGF only needing to be held for short periods of time and it is never held while we issue discards. Hence if discard submission gets throttled because it is slow and/or there are lots of them, we aren't preventing other operations from being performed on AGF while we wait for discards to complete... Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_discard.c | 174 ++++++++++++++++++++++++++++++++++++++++++----- fs/xfs/xfs_extent_busy.c | 34 +++++++-- fs/xfs/xfs_extent_busy.h | 4 ++ 3 files changed, 188 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 3f45c7bb94f2..f16b254b5eaa 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -19,6 +19,56 @@ #include "xfs_log.h" #include "xfs_ag.h" +/* + * Notes on an efficient, low latency fstrim algorithm + * + * We need to walk the filesystem free space and issue discards on the free + * space that meet the search criteria (size and location). We cannot issue + * discards on extents that might be in use, or are so recently in use they are + * still marked as busy. To serialise against extent state changes whilst we are + * gathering extents to trim, we must hold the AGF lock to lock out other + * allocations and extent free operations that might change extent state. + * + * However, we cannot just hold the AGF for the entire AG free space walk whilst + * we issue discards on each free space that is found. Storage devices can have + * extremely slow discard implementations (e.g. ceph RBD) and so walking a + * couple of million free extents and issuing synchronous discards on each + * extent can take a *long* time. Whilst we are doing this walk, nothing else + * can access the AGF, and we can stall transactions and hence the log whilst + * modifications wait for the AGF lock to be released. This can lead hung tasks + * kicking the hung task timer and rebooting the system. This is bad. + * + * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI + * lock, gathers a range of inode cluster buffers that are allocated, drops the + * AGI lock and then reads all the inode cluster buffers and processes them. It + * loops doing this, using a cursor to keep track of where it is up to in the AG + * for each iteration to restart the INOBT lookup from. + * + * We can't do this exactly with free space - once we drop the AGF lock, the + * state of the free extent is out of our control and we cannot run a discard + * safely on it in this situation. Unless, of course, we've marked the free + * extent as busy and undergoing a discard operation whilst we held the AGF + * locked. + * + * This is exactly how online discard works - free extents are marked busy when + * they are freed, and once the extent free has been committed to the journal, + * the busy extent record is marked as "undergoing discard" and the discard is + * then issued on the free extent. Once the discard completes, the busy extent + * record is removed and the extent is able to be allocated again. + * + * In the context of fstrim, if we find a free extent we need to discard, we + * don't have to discard it immediately. All we need to do it record that free + * extent as being busy and under discard, and all the allocation routines will + * now avoid trying to allocate it. Hence if we mark the extent as busy under + * the AGF lock, we can safely discard it without holding the AGF lock because + * nothing will attempt to allocate that free space until the discard completes. + * + * This also allows us to issue discards asynchronously like we do with online + * discard, and so for fast devices fstrim will run much faster as we can have + * multiple discard operations in flight at once, as well as pipeline the free + * extent search so that it overlaps in flight discard IO. + */ + struct workqueue_struct *xfs_discard_wq; static void @@ -94,21 +144,22 @@ xfs_discard_extents( } -STATIC int -xfs_trim_extents( +static int +xfs_trim_gather_extents( struct xfs_perag *pag, xfs_daddr_t start, xfs_daddr_t end, xfs_daddr_t minlen, + struct xfs_alloc_rec_incore *tcur, + struct xfs_busy_extents *extents, uint64_t *blocks_trimmed) { struct xfs_mount *mp = pag->pag_mount; - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; struct xfs_buf *agbp; - struct xfs_agf *agf; int error; int i; + int batch = 100; /* * Force out the log. This means any transactions that might have freed @@ -120,20 +171,28 @@ xfs_trim_extents( error = xfs_alloc_read_agf(pag, NULL, 0, &agbp); if (error) return error; - agf = agbp->b_addr; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT); /* - * Look up the longest btree in the AGF and start with it. + * Look up the extent length requested in the AGF and start with it. */ - error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i); + if (tcur->ar_startblock == NULLAGBLOCK) + error = xfs_alloc_lookup_ge(cur, 0, tcur->ar_blockcount, &i); + else + error = xfs_alloc_lookup_le(cur, tcur->ar_startblock, + tcur->ar_blockcount, &i); if (error) goto out_del_cursor; + if (i == 0) { + /* nothing of that length left in the AG, we are done */ + tcur->ar_blockcount = 0; + goto out_del_cursor; + } /* * Loop until we are done with all extents that are large - * enough to be worth discarding. + * enough to be worth discarding or we hit batch limits. */ while (i) { xfs_agblock_t fbno; @@ -148,7 +207,16 @@ xfs_trim_extents( error = -EFSCORRUPTED; break; } - ASSERT(flen <= be32_to_cpu(agf->agf_longest)); + + if (--batch <= 0) { + /* + * Update the cursor to point at this extent so we + * restart the next batch from this extent. + */ + tcur->ar_startblock = fbno; + tcur->ar_blockcount = flen; + break; + } /* * use daddr format for all range/len calculations as that is @@ -163,6 +231,7 @@ xfs_trim_extents( */ if (dlen < minlen) { trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen); + tcur->ar_blockcount = 0; break; } @@ -185,29 +254,98 @@ xfs_trim_extents( goto next_extent; } - trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen); - error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); - if (error) - break; + xfs_extent_busy_insert_discard(pag, fbno, flen, + &extents->extent_list); *blocks_trimmed += flen; - next_extent: error = xfs_btree_decrement(cur, 0, &i); if (error) break; - if (fatal_signal_pending(current)) { - error = -ERESTARTSYS; - break; - } + /* + * If there's no more records in the tree, we are done. Set the + * cursor block count to 0 to indicate to the caller that there + * is no more extents to search. + */ + if (i == 0) + tcur->ar_blockcount = 0; } + /* + * If there was an error, release all the gathered busy extents because + * we aren't going to issue a discard on them any more. + */ + if (error) + xfs_extent_busy_clear(mp, &extents->extent_list, false); out_del_cursor: xfs_btree_del_cursor(cur, error); xfs_buf_relse(agbp); return error; } +/* + * Iterate the free list gathering extents and discarding them. We need a cursor + * for the repeated iteration of gather/discard loop, so use the longest extent + * we found in the last batch as the key to start the next. + */ +static int +xfs_trim_extents( + struct xfs_perag *pag, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_daddr_t minlen, + uint64_t *blocks_trimmed) +{ + struct xfs_alloc_rec_incore tcur = { + .ar_blockcount = pag->pagf_longest, + .ar_startblock = NULLAGBLOCK, + }; + int error = 0; + + do { + struct xfs_busy_extents *extents; + + extents = kzalloc(sizeof(*extents), GFP_KERNEL); + if (!extents) { + error = -ENOMEM; + break; + } + + extents->mount = pag->pag_mount; + extents->owner = extents; + INIT_LIST_HEAD(&extents->extent_list); + + error = xfs_trim_gather_extents(pag, start, end, minlen, + &tcur, extents, blocks_trimmed); + if (error) { + kfree(extents); + break; + } + + /* + * We hand the extent list to the discard function here so the + * discarded extents can be removed from the busy extent list. + * This allows the discards to run asynchronously with gathering + * the next round of extents to discard. + * + * However, we must ensure that we do not reference the extent + * list after this function call, as it may have been freed by + * the time control returns to us. + */ + error = xfs_discard_extents(pag->pag_mount, extents); + if (error) + break; + + if (fatal_signal_pending(current)) { + error = -ERESTARTSYS; + break; + } + } while (tcur.ar_blockcount != 0); + + return error; + +} + /* * trim a range of the filesystem. * diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 7c2fdc71e42d..746814815b1d 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -19,13 +19,13 @@ #include "xfs_log.h" #include "xfs_ag.h" -void -xfs_extent_busy_insert( - struct xfs_trans *tp, +static void +xfs_extent_busy_insert_list( struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, - unsigned int flags) + unsigned int flags, + struct list_head *busy_list) { struct xfs_extent_busy *new; struct xfs_extent_busy *busyp; @@ -40,7 +40,7 @@ xfs_extent_busy_insert( new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_extent_busy(tp->t_mountp, pag->pag_agno, bno, len); + trace_xfs_extent_busy(pag->pag_mount, pag->pag_agno, bno, len); spin_lock(&pag->pagb_lock); rbp = &pag->pagb_tree.rb_node; @@ -62,10 +62,32 @@ xfs_extent_busy_insert( rb_link_node(&new->rb_node, parent, rbp); rb_insert_color(&new->rb_node, &pag->pagb_tree); - list_add(&new->list, &tp->t_busy); + list_add(&new->list, busy_list); spin_unlock(&pag->pagb_lock); } +void +xfs_extent_busy_insert( + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_agblock_t bno, + xfs_extlen_t len, + unsigned int flags) +{ + xfs_extent_busy_insert_list(pag, bno, len, flags, &tp->t_busy); +} + +void +xfs_extent_busy_insert_discard( + struct xfs_perag *pag, + xfs_agblock_t bno, + xfs_extlen_t len, + struct list_head *busy_list) +{ + xfs_extent_busy_insert_list(pag, bno, len, XFS_EXTENT_BUSY_DISCARDED, + busy_list); +} + /* * Search for a busy extent within the range of the extent we are about to * allocate. You need to be holding the busy extent tree lock when calling diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 71c28d031e3b..0639aab336f3 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -49,6 +49,10 @@ void xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); +void +xfs_extent_busy_insert_discard(struct xfs_perag *pag, xfs_agblock_t bno, + xfs_extlen_t len, struct list_head *busy_list); + void xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list, bool do_discard); -- cgit From e78a40b851712b422d7d4ae345f25511d47a9a38 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Oct 2023 09:25:04 +1100 Subject: xfs: abort fstrim if kernel is suspending A recent ext4 patch posting from Jan Kara reminded me of a discussion a year ago about fstrim in progress preventing kernels from suspending. The fix is simple, we should do the same for XFS. This removes the -ERESTARTSYS error return from this code, replacing it with either the last error seen or the number of blocks successfully trimmed up to the point where we detected the stop condition. References: https://bugzilla.kernel.org/show_bug.cgi?id=216322 Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_discard.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f16b254b5eaa..d5787991bb5b 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -283,6 +283,12 @@ out_del_cursor: return error; } +static bool +xfs_trim_should_stop(void) +{ + return fatal_signal_pending(current) || freezing(current); +} + /* * Iterate the free list gathering extents and discarding them. We need a cursor * for the repeated iteration of gather/discard loop, so use the longest extent @@ -336,10 +342,9 @@ xfs_trim_extents( if (error) break; - if (fatal_signal_pending(current)) { - error = -ERESTARTSYS; + if (xfs_trim_should_stop()) break; - } + } while (tcur.ar_blockcount != 0); return error; @@ -408,12 +413,12 @@ xfs_ioc_trim( for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) { error = xfs_trim_extents(pag, start, end, minlen, &blocks_trimmed); - if (error) { + if (error) last_error = error; - if (error == -ERESTARTSYS) { - xfs_perag_rele(pag); - break; - } + + if (xfs_trim_should_stop()) { + xfs_perag_rele(pag); + break; } } -- cgit From a0c55bba0d0d0b5591083f65f830940d8ae63f31 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 26 Sep 2023 21:30:54 +0900 Subject: rswitch: Fix PHY station management clock setting Fix the MPIC.PSMCS value following the programming example in the section 6.4.2 Management Data Clock (MDC) Setting, Ethernet MAC IP, S4 Hardware User Manual Rev.1.00. The value is calculated by MPIC.PSMCS = clk[MHz] / (MDC frequency[MHz] * 2) - 1 with the input clock frequency from clk_get_rate() and MDC frequency of 2.5MHz. Otherwise, this driver cannot communicate PHYs on the R-Car S4 Starter Kit board. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Reported-by: Tam Nguyen Signed-off-by: Yoshihiro Shimoda Tested-by: Kuninori Morimoto Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230926123054.3976752-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 13 ++++++++++++- drivers/net/ethernet/renesas/rswitch.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index ea9186178091..fc01ad3f340d 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -4,6 +4,7 @@ * Copyright (C) 2022 Renesas Electronics Corporation */ +#include #include #include #include @@ -1049,7 +1050,7 @@ static void rswitch_rmac_setting(struct rswitch_etha *etha, const u8 *mac) static void rswitch_etha_enable_mii(struct rswitch_etha *etha) { rswitch_modify(etha->addr, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, - MPIC_PSMCS(0x05) | MPIC_PSMHT(0x06)); + MPIC_PSMCS(etha->psmcs) | MPIC_PSMHT(0x06)); rswitch_modify(etha->addr, MPSM, 0, MPSM_MFF_C45); } @@ -1693,6 +1694,12 @@ static void rswitch_etha_init(struct rswitch_private *priv, int index) etha->index = index; etha->addr = priv->addr + RSWITCH_ETHA_OFFSET + index * RSWITCH_ETHA_SIZE; etha->coma_addr = priv->addr; + + /* MPIC.PSMCS = (clk [MHz] / (MDC frequency [MHz] * 2) - 1. + * Calculating PSMCS value as MDC frequency = 2.5MHz. So, multiply + * both the numerator and the denominator by 10. + */ + etha->psmcs = clk_get_rate(priv->clk) / 100000 / (25 * 2) - 1; } static int rswitch_device_alloc(struct rswitch_private *priv, int index) @@ -1900,6 +1907,10 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&priv->lock); + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) + return PTR_ERR(priv->clk); + attr = soc_device_match(rswitch_soc_no_speed_change); if (attr) priv->etha_no_runtime_change = true; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index f0c16a37ea55..04f49a7a5843 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -915,6 +915,7 @@ struct rswitch_etha { bool external_phy; struct mii_bus *mii; phy_interface_t phy_interface; + u32 psmcs; u8 mac_addr[MAX_ADDR_LEN]; int link; int speed; @@ -1012,6 +1013,7 @@ struct rswitch_private { struct rswitch_mfwd mfwd; spinlock_t lock; /* lock interrupt registers' control */ + struct clk *clk; bool etha_no_runtime_change; bool gwca_halt; -- cgit From 9147b9ded499d9853bdf0e9804b7eaa99c4429ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Sep 2023 15:47:27 -0400 Subject: btrfs: fix some -Wmaybe-uninitialized warnings in ioctl.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jens reported the following warnings from -Wmaybe-uninitialized recent Linus' branch. In file included from ./include/asm-generic/rwonce.h:26, from ./arch/arm64/include/asm/rwonce.h:71, from ./include/linux/compiler.h:246, from ./include/linux/export.h:5, from ./include/linux/linkage.h:7, from ./include/linux/kernel.h:17, from fs/btrfs/ioctl.c:6: In function ‘instrument_copy_from_user_before’, inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3, inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7, inlined from ‘btrfs_ioctl_space_info’ at fs/btrfs/ioctl.c:2999:6, inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4616:10: ./include/linux/kasan-checks.h:38:27: warning: ‘space_args’ may be used uninitialized [-Wmaybe-uninitialized] 38 | #define kasan_check_write __kasan_check_write ./include/linux/instrumented.h:129:9: note: in expansion of macro ‘kasan_check_write’ 129 | kasan_check_write(to, n); | ^~~~~~~~~~~~~~~~~ ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’: ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const volatile void *’ to ‘__kasan_check_write’ declared here 20 | bool __kasan_check_write(const volatile void *p, unsigned int size); | ^~~~~~~~~~~~~~~~~~~ fs/btrfs/ioctl.c:2981:39: note: ‘space_args’ declared here 2981 | struct btrfs_ioctl_space_args space_args; | ^~~~~~~~~~ In function ‘instrument_copy_from_user_before’, inlined from ‘_copy_from_user’ at ./include/linux/uaccess.h:148:3, inlined from ‘copy_from_user’ at ./include/linux/uaccess.h:183:7, inlined from ‘_btrfs_ioctl_send’ at fs/btrfs/ioctl.c:4343:9, inlined from ‘btrfs_ioctl’ at fs/btrfs/ioctl.c:4658:10: ./include/linux/kasan-checks.h:38:27: warning: ‘args32’ may be used uninitialized [-Wmaybe-uninitialized] 38 | #define kasan_check_write __kasan_check_write ./include/linux/instrumented.h:129:9: note: in expansion of macro ‘kasan_check_write’ 129 | kasan_check_write(to, n); | ^~~~~~~~~~~~~~~~~ ./include/linux/kasan-checks.h: In function ‘btrfs_ioctl’: ./include/linux/kasan-checks.h:20:6: note: by argument 1 of type ‘const volatile void *’ to ‘__kasan_check_write’ declared here 20 | bool __kasan_check_write(const volatile void *p, unsigned int size); | ^~~~~~~~~~~~~~~~~~~ fs/btrfs/ioctl.c:4341:49: note: ‘args32’ declared here 4341 | struct btrfs_ioctl_send_args_32 args32; | ^~~~~~ This was due to his config options and having KASAN turned on, which adds some extra checks around copy_from_user(), which then triggered the -Wmaybe-uninitialized checker for these cases. Fix the warnings by initializing the different structs we're copying into. Reported-by: Jens Axboe Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d27b0d86b8e2..bf35b6fce8f0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2978,7 +2978,7 @@ static void get_block_group_info(struct list_head *groups_list, static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, void __user *arg) { - struct btrfs_ioctl_space_args space_args; + struct btrfs_ioctl_space_args space_args = { 0 }; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest_orig; @@ -4338,7 +4338,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat if (compat) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) - struct btrfs_ioctl_send_args_32 args32; + struct btrfs_ioctl_send_args_32 args32 = { 0 }; ret = copy_from_user(&args32, argp, sizeof(args32)); if (ret) -- cgit From 5f521494cc73520ffac18ede0758883b9aedd018 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 27 Sep 2023 10:43:15 +0930 Subject: btrfs: reject unknown mount options early [BUG] The following script would allow invalid mount options to be specified (although such invalid options would just be ignored): # mkfs.btrfs -f $dev # mount $dev $mnt1 <<< Successful mount expected # mount $dev $mnt2 -o junk <<< Failed mount expected # echo $? 0 [CAUSE] For the 2nd mount, since the fs is already mounted, we won't go through open_ctree() thus no btrfs_parse_options(), but only through btrfs_parse_subvol_options(). However we do not treat unrecognized options from valid but irrelevant options, thus those invalid options would just be ignored by btrfs_parse_subvol_options(). [FIX] Add the handling for Opt_err to handle invalid options and error out, while still ignore other valid options inside btrfs_parse_subvol_options(). Reported-by: Anand Jain CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1a093ec0f7e3..f49e597e197f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -954,6 +954,10 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name, *subvol_objectid = subvolid; break; + case Opt_err: + btrfs_err(NULL, "unrecognized mount option '%s'", p); + error = -EINVAL; + goto out; default: break; } -- cgit From f8d1b011ca8c9d64ce32da431a8420635a96958a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 26 Sep 2023 19:31:19 +0100 Subject: btrfs: always print transaction aborted messages with an error level Commit b7af0635c87f ("btrfs: print transaction aborted messages with an error level") changed the log level of transaction aborted messages from a debug level to an error level, so that such messages are always visible even on production systems where the log level is normally above the debug level (and also on some syzbot reports). Later, commit fccf0c842ed4 ("btrfs: move btrfs_abort_transaction to transaction.c") changed the log level back to debug level when the error number for a transaction abort should not have a stack trace printed. This happened for absolutely no reason. It's always useful to print transaction abort messages with an error level, regardless of whether the error number should cause a stack trace or not. So change back the log level to error level. Fixes: fccf0c842ed4 ("btrfs: move btrfs_abort_transaction to transaction.c") CC: stable@vger.kernel.org # 6.5+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/transaction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 6b309f8a99a8..93869cda6af9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -219,8 +219,8 @@ do { \ (errno))) { \ /* Stack trace printed. */ \ } else { \ - btrfs_debug((trans)->fs_info, \ - "Transaction aborted (error %d)", \ + btrfs_err((trans)->fs_info, \ + "Transaction aborted (error %d)", \ (errno)); \ } \ } \ -- cgit From 48774f3bf8b4dd3b1a0e155825c9ce48483db14c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Sep 2023 12:09:21 +0100 Subject: btrfs: error out when COWing block using a stale transaction At btrfs_cow_block() we have these checks to verify we are not using a stale transaction (a past transaction with an unblocked state or higher), and the only thing we do is to trigger a WARN with a message and a stack trace. This however is a critical problem, highly unexpected and if it happens it's most likely due to a bug, so we should error out and turn the fs into error state so that such issue is much more easily noticed if it's triggered. The problem is critical because using such stale transaction will lead to not persisting the extent buffer used for the COW operation, as allocating a tree block adds the range of the respective extent buffer to the ->dirty_pages iotree of the transaction, and a stale transaction, in the unlocked state or higher, will not flush dirty extent buffers anymore, therefore resulting in not persisting the tree block and resource leaks (not cleaning the dirty_pages iotree for example). So do the following changes: 1) Return -EUCLEAN if we find a stale transaction; 2) Turn the fs into error state, with error -EUCLEAN, so that no transaction can be committed, and generate a stack trace; 3) Combine both conditions into a single if statement, as both are related and have the same error message; 4) Mark the check as unlikely, since this is not expected to ever happen. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a4cb4b642987..7afd0a6495f3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -686,14 +686,22 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_err(fs_info, "COW'ing blocks on a fs root that's being dropped"); - if (trans->transaction != fs_info->running_transaction) - WARN(1, KERN_CRIT "trans %llu running %llu\n", - trans->transid, - fs_info->running_transaction->transid); - - if (trans->transid != fs_info->generation) - WARN(1, KERN_CRIT "trans %llu running %llu\n", - trans->transid, fs_info->generation); + /* + * COWing must happen through a running transaction, which always + * matches the current fs generation (it's a transaction with a state + * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs + * into error state to prevent the commit of any transaction. + */ + if (unlikely(trans->transaction != fs_info->running_transaction || + trans->transid != fs_info->generation)) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, +"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu", + buf->start, btrfs_root_id(root), trans->transid, + fs_info->running_transaction->transid, + fs_info->generation); + return -EUCLEAN; + } if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; -- cgit From a2caab29884397e583d09be6546259a83ebfbdb1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Sep 2023 12:09:22 +0100 Subject: btrfs: error when COWing block from a root that is being deleted At btrfs_cow_block() we check if the block being COWed belongs to a root that is being deleted and if so we log an error message. However this is an unexpected case and it indicates a bug somewhere, so we should return an error and abort the transaction. So change this in the following ways: 1) Abort the transaction with -EUCLEAN, so that if the issue ever happens it can easily be noticed; 2) Change the logged message level from error to critical, and change the message itself to print the block's logical address and the ID of the root; 3) Return -EUCLEAN to the caller; 4) As this is an unexpected scenario, that should never happen, mark the check as unlikely, allowing the compiler to potentially generate better code. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7afd0a6495f3..db1f3bc7f328 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -682,9 +682,13 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start; int ret; - if (test_bit(BTRFS_ROOT_DELETING, &root->state)) - btrfs_err(fs_info, - "COW'ing blocks on a fs root that's being dropped"); + if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, + "attempt to COW block %llu on root %llu that is being deleted", + buf->start, btrfs_root_id(root)); + return -EUCLEAN; + } /* * COWing must happen through a running transaction, which always -- cgit From e36f94914021e58ee88a8856c7fdf35adf9c7ee1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Sep 2023 12:09:23 +0100 Subject: btrfs: error out when reallocating block for defrag using a stale transaction At btrfs_realloc_node() we have these checks to verify we are not using a stale transaction (a past transaction with an unblocked state or higher), and the only thing we do is to trigger two WARN_ON(). This however is a critical problem, highly unexpected and if it happens it's most likely due to a bug, so we should error out and turn the fs into error state so that such issue is much more easily noticed if it's triggered. The problem is critical because in btrfs_realloc_node() we COW tree blocks, and using such stale transaction will lead to not persisting the extent buffers used for the COW operations, as allocating tree block adds the range of the respective extent buffers to the ->dirty_pages iotree of the transaction, and a stale transaction, in the unlocked state or higher, will not flush dirty extent buffers anymore, therefore resulting in not persisting the tree block and resource leaks (not cleaning the dirty_pages iotree for example). So do the following changes: 1) Return -EUCLEAN if we find a stale transaction; 2) Turn the fs into error state, with error -EUCLEAN, so that no transaction can be committed, and generate a stack trace; 3) Combine both conditions into a single if statement, as both are related and have the same error message; 4) Mark the check as unlikely, since this is not expected to ever happen. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index db1f3bc7f328..da519c1b6ad0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -817,8 +817,22 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; - WARN_ON(trans->transaction != fs_info->running_transaction); - WARN_ON(trans->transid != fs_info->generation); + /* + * COWing must happen through a running transaction, which always + * matches the current fs generation (it's a transaction with a state + * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs + * into error state to prevent the commit of any transaction. + */ + if (unlikely(trans->transaction != fs_info->running_transaction || + trans->transid != fs_info->generation)) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, +"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu", + parent->start, btrfs_root_id(root), trans->transid, + fs_info->running_transaction->transid, + fs_info->generation); + return -EUCLEAN; + } parent_nritems = btrfs_header_nritems(parent); blocksize = fs_info->nodesize; -- cgit From 07a1141ff170ff5d4f9c4fbb0453727ab48096e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Oct 2023 17:31:06 +0200 Subject: nbd: don't call blk_mark_disk_dead nbd_clear_sock_ioctl blk_mark_disk_dead is the proper interface to shut down a block device, but it also makes the disk unusable forever. nbd_clear_sock_ioctl on the other hand wants to shut down the file system, but allow the block device to be used again when when connected to another socket. Switch nbd to use disk_force_media_change and nbd_bdev_reset to go back to a behavior of the old __invalidate_device call, with the added benefit of incrementing the device generation as there is no guarantee the old content comes back when the device is reconnected. Reported-by: Samuel Holland Reported-by: Shinichiro Kawasaki Fixes: 0c1c9a27ce90 ("nbd: call blk_mark_disk_dead in nbd_clear_sock_ioctl") Signed-off-by: Christoph Hellwig Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20231003153106.1331363-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index df1cd0f718b8..800f131222fc 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1436,8 +1436,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd) static void nbd_clear_sock_ioctl(struct nbd_device *nbd) { - blk_mark_disk_dead(nbd->disk); nbd_clear_sock(nbd); + disk_force_media_change(nbd->disk); + nbd_bdev_reset(nbd); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); -- cgit From 7aed44babc7f97e82b38e9a68515e699692cc100 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 25 Sep 2023 12:30:57 +0200 Subject: vringh: don't use vringh_kiov_advance() in vringh_iov_xfer() In the while loop of vringh_iov_xfer(), `partlen` could be 0 if one of the `iov` has 0 lenght. In this case, we should skip the iov and go to the next one. But calling vringh_kiov_advance() with 0 lenght does not cause the advancement, since it returns immediately if asked to advance by 0 bytes. Let's restore the code that was there before commit b8c06ad4d67d ("vringh: implement vringh_kiov_advance()"), avoiding using vringh_kiov_advance(). Fixes: b8c06ad4d67d ("vringh: implement vringh_kiov_advance()") Cc: stable@vger.kernel.org Reported-by: Jason Wang Signed-off-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vringh.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 955d938eb663..7b8fd977f71c 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -123,8 +123,18 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh, done += partlen; len -= partlen; ptr += partlen; + iov->consumed += partlen; + iov->iov[iov->i].iov_len -= partlen; + iov->iov[iov->i].iov_base += partlen; - vringh_kiov_advance(iov, partlen); + if (!iov->iov[iov->i].iov_len) { + /* Fix up old iov element then increment. */ + iov->iov[iov->i].iov_len = iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } } return done; } -- cgit From c0409dd3d151f661e7e57b901a81a02565df163c Mon Sep 17 00:00:00 2001 From: Rex Zhang Date: Sat, 16 Sep 2023 14:06:19 +0800 Subject: dmaengine: idxd: use spin_lock_irqsave before wait_event_lock_irq In idxd_cmd_exec(), wait_event_lock_irq() explicitly calls spin_unlock_irq()/spin_lock_irq(). If the interrupt is on before entering wait_event_lock_irq(), it will become off status after wait_event_lock_irq() is called. Later, wait_for_completion() may go to sleep but irq is disabled. The scenario is warned in might_sleep(). Fix it by using spin_lock_irqsave() instead of the primitive spin_lock() to save the irq status before entering wait_event_lock_irq() and using spin_unlock_irqrestore() instead of the primitive spin_unlock() to restore the irq status before entering wait_for_completion(). Before the change: idxd_cmd_exec() { interrupt is on spin_lock() // interrupt is on wait_event_lock_irq() spin_unlock_irq() // interrupt is enabled ... spin_lock_irq() // interrupt is disabled spin_unlock() // interrupt is still disabled wait_for_completion() // report "BUG: sleeping function // called from invalid context... // in_atomic() irqs_disabled()" } After applying spin_lock_irqsave(): idxd_cmd_exec() { interrupt is on spin_lock_irqsave() // save the on state // interrupt is disabled wait_event_lock_irq() spin_unlock_irq() // interrupt is enabled ... spin_lock_irq() // interrupt is disabled spin_unlock_irqrestore() // interrupt is restored to on wait_for_completion() // No Call trace } Fixes: f9f4082dbc56 ("dmaengine: idxd: remove interrupt disable for cmd_lock") Signed-off-by: Rex Zhang Signed-off-by: Lijun Pan Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20230916060619.3744220-1-rex.zhang@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 22d6f4e455b7..8f754f922217 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -477,6 +477,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, union idxd_command_reg cmd; DECLARE_COMPLETION_ONSTACK(done); u32 stat; + unsigned long flags; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -490,7 +491,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock(&idxd->cmd_lock); + spin_lock_irqsave(&idxd->cmd_lock, flags); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), idxd->cmd_lock); @@ -507,7 +508,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock(&idxd->cmd_lock); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); wait_for_completion(&done); stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); spin_lock(&idxd->cmd_lock); -- cgit From 37d4f55567982e445f86dc0ff4ecfa72921abfe8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Sep 2023 17:04:43 +0300 Subject: net: ethernet: ti: am65-cpsw: Fix error code in am65_cpsw_nuss_init_tx_chns() This accidentally returns success, but it should return a negative error code. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index bea6fc0f324c..31e84c503e22 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1750,6 +1750,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) if (tx_chn->irq <= 0) { dev_err(dev, "Failed to get tx dma irq %d\n", tx_chn->irq); + ret = tx_chn->irq ?: -ENXIO; goto err; } -- cgit From a325f174d70828f62872847b703206566dcdb64c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Sep 2023 17:05:59 +0300 Subject: net: ti: icssg-prueth: Fix signedness bug in prueth_init_tx_chns() The "tx_chn->irq" variable is unsigned so the error checking does not work correctly. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 410612f43cbd..e3dcb0845fb6 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -316,12 +316,14 @@ static int prueth_init_tx_chns(struct prueth_emac *emac) goto fail; } - tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); - if (tx_chn->irq <= 0) { - ret = -EINVAL; + ret = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); + if (ret <= 0) { + if (!ret) + ret = -EINVAL; netdev_err(ndev, "failed to get tx irq\n"); goto fail; } + tx_chn->irq = ret; snprintf(tx_chn->name, sizeof(tx_chn->name), "%s-tx%d", dev_name(dev), tx_chn->id); -- cgit From f9a1d3216a4942cfe00bc424b80b2b80e8ee05c1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Sep 2023 17:06:58 +0300 Subject: dmaengine: ti: k3-udma-glue: clean up k3_udma_glue_tx_get_irq() return The k3_udma_glue_tx_get_irq() function currently returns negative error codes on error, zero on error and positive values for success. This complicates life for the callers who need to propagate the error code. Also GCC will not warn about unsigned comparisons when you check: if (unsigned_irq <= 0) All the callers have been fixed now but let's just make this easy going forward. Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Acked-by: Vinod Koul Signed-off-by: David S. Miller --- drivers/dma/ti/k3-udma-glue.c | 3 +++ drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 ++-- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 4 +--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index 789193ed0386..c278d5facf7d 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -558,6 +558,9 @@ int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn) tx_chn->virq = k3_ringacc_get_ring_irq_num(tx_chn->ringtxcq); } + if (!tx_chn->virq) + return -ENXIO; + return tx_chn->virq; } EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_irq); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 31e84c503e22..24120605502f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1747,10 +1747,10 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) } tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); - if (tx_chn->irq <= 0) { + if (tx_chn->irq < 0) { dev_err(dev, "Failed to get tx dma irq %d\n", tx_chn->irq); - ret = tx_chn->irq ?: -ENXIO; + ret = tx_chn->irq; goto err; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index e3dcb0845fb6..4914d0ef58e9 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -317,9 +317,7 @@ static int prueth_init_tx_chns(struct prueth_emac *emac) } ret = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); - if (ret <= 0) { - if (!ret) - ret = -EINVAL; + if (ret < 0) { netdev_err(ndev, "failed to get tx irq\n"); goto fail; } -- cgit From 528ab3e605cabf2f9c9bd5944d3bfe15f6e94f81 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 25 Sep 2023 16:28:18 +0200 Subject: platform/x86: think-lmi: Fix reference leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a duplicate attribute is found using kset_find_obj(), a reference to that attribute is returned which needs to be disposed accordingly using kobject_put(). Move the setting name validation into a separate function to allow for this change without having to duplicate the cleanup code for this setting. As a side note, a very similar bug was fixed in commit 7295a996fdab ("platform/x86: dell-sysman: Fix reference leak"), so it seems that the bug was copied from that driver. Compile-tested only. Fixes: 1bcad8e510b2 ("platform/x86: think-lmi: Fix issues with duplicate attributes") Reviewed-by: Mark Pearson Reviewed-by: Ilpo Järvinen Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20230925142819.74525-2-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 79346881cadb..aee869769843 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1248,6 +1248,24 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.authentication_kset); } +static int tlmi_validate_setting_name(struct kset *attribute_kset, char *name) +{ + struct kobject *duplicate; + + if (!strcmp(name, "Reserved")) + return -EINVAL; + + duplicate = kset_find_obj(attribute_kset, name); + if (duplicate) { + pr_debug("Duplicate attribute name found - %s\n", name); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); + return -EBUSY; + } + + return 0; +} + static int tlmi_sysfs_init(void) { int i, ret; @@ -1276,10 +1294,8 @@ static int tlmi_sysfs_init(void) continue; /* check for duplicate or reserved values */ - if (kset_find_obj(tlmi_priv.attribute_kset, tlmi_priv.setting[i]->display_name) || - !strcmp(tlmi_priv.setting[i]->display_name, "Reserved")) { - pr_debug("duplicate or reserved attribute name found - %s\n", - tlmi_priv.setting[i]->display_name); + if (tlmi_validate_setting_name(tlmi_priv.attribute_kset, + tlmi_priv.setting[i]->display_name) < 0) { kfree(tlmi_priv.setting[i]->possible_values); kfree(tlmi_priv.setting[i]); tlmi_priv.setting[i] = NULL; -- cgit From 981368e1440b76f68b1ac8f5fb14e739f80ecc4e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 25 Sep 2023 16:28:19 +0200 Subject: platform/x86: hp-bioscfg: Fix reference leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a duplicate attribute is found using kset_find_obj(), a reference to that attribute is returned which needs to be disposed accordingly using kobject_put(). Use kobject_put() to dispose the duplicate attribute in such a case. As a side note, a very similar bug was fixed in commit 7295a996fdab ("platform/x86: dell-sysman: Fix reference leak"), so it seems that the bug was copied from that driver. Compile-tested only. Fixes: a34fc329b189 ("platform/x86: hp-bioscfg: bioscfg") Suggested-by: Ilpo Järvinen Reviewed-by: Ilpo Järvinen Signed-off-by: Armin Wolf Reviewed-by: Jorge Lopez Link: https://lore.kernel.org/r/20230925142819.74525-3-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 8c4f9e12f018..5798b49ddaba 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -659,7 +659,7 @@ static int hp_init_bios_package_attribute(enum hp_wmi_data_type attr_type, const char *guid, int min_elements, int instance_id) { - struct kobject *attr_name_kobj; + struct kobject *attr_name_kobj, *duplicate; union acpi_object *elements; struct kset *temp_kset; @@ -704,8 +704,11 @@ static int hp_init_bios_package_attribute(enum hp_wmi_data_type attr_type, } /* All duplicate attributes found are ignored */ - if (kset_find_obj(temp_kset, str_value)) { + duplicate = kset_find_obj(temp_kset, str_value); + if (duplicate) { pr_debug("Duplicate attribute name found - %s\n", str_value); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); goto pack_attr_exit; } @@ -768,7 +771,7 @@ static int hp_init_bios_buffer_attribute(enum hp_wmi_data_type attr_type, const char *guid, int min_elements, int instance_id) { - struct kobject *attr_name_kobj; + struct kobject *attr_name_kobj, *duplicate; struct kset *temp_kset; char str[MAX_BUFF_SIZE]; @@ -794,8 +797,11 @@ static int hp_init_bios_buffer_attribute(enum hp_wmi_data_type attr_type, temp_kset = bioscfg_drv.main_dir_kset; /* All duplicate attributes found are ignored */ - if (kset_find_obj(temp_kset, str)) { + duplicate = kset_find_obj(temp_kset, str); + if (duplicate) { pr_debug("Duplicate attribute name found - %s\n", str); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); goto buff_attr_exit; } -- cgit From 2545deba314eec91dc5ca1a954fe97f91ef1cf07 Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Wed, 27 Sep 2023 11:48:24 -0700 Subject: platform/x86/intel/ifs: release cpus_read_lock() Couple of error paths in do_core_test() was returning directly without doing a necessary cpus_read_unlock(). Following lockdep warning was observed when exercising these scenarios with PROVE_RAW_LOCK_NESTING enabled: [ 139.304775] ================================================ [ 139.311185] WARNING: lock held when returning to user space! [ 139.317593] 6.6.0-rc2ifs01+ #11 Tainted: G S W I [ 139.324499] ------------------------------------------------ [ 139.330908] bash/11476 is leaving the kernel with locks still held! [ 139.338000] 1 lock held by bash/11476: [ 139.342262] #0: ffffffffaa26c930 (cpu_hotplug_lock){++++}-{0:0}, at: do_core_test+0x35/0x1c0 [intel_ifs] Fix the flow so that all scenarios release the lock prior to returning from the function. Fixes: 5210fb4e1880 ("platform/x86/intel/ifs: Sysfs interface for Array BIST") Cc: stable@vger.kernel.org Signed-off-by: Jithu Joseph Link: https://lore.kernel.org/r/20230927184824.2566086-1-jithu.joseph@intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/ifs/runtest.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 1061eb7ec399..43c864add778 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -331,14 +331,15 @@ int do_core_test(int cpu, struct device *dev) switch (test->test_num) { case IFS_TYPE_SAF: if (!ifsd->loaded) - return -EPERM; - ifs_test_core(cpu, dev); + ret = -EPERM; + else + ifs_test_core(cpu, dev); break; case IFS_TYPE_ARRAY_BIST: ifs_array_test_core(cpu, dev); break; default: - return -EINVAL; + ret = -EINVAL; } out: cpus_read_unlock(); -- cgit From cbf92564100cf26000ef0af5e0f4e85ed0a13f4b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Sep 2023 22:40:11 -0700 Subject: platform/mellanox: tmfifo: fix kernel-doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc notation for structs and struct members to prevent these warnings: mlxbf-tmfifo.c:73: warning: cannot understand function prototype: 'struct mlxbf_tmfifo_vring ' mlxbf-tmfifo.c:128: warning: cannot understand function prototype: 'struct mlxbf_tmfifo_vdev ' mlxbf-tmfifo.c:146: warning: cannot understand function prototype: 'struct mlxbf_tmfifo_irq_info ' mlxbf-tmfifo.c:158: warning: cannot understand function prototype: 'struct mlxbf_tmfifo_io ' mlxbf-tmfifo.c:182: warning: cannot understand function prototype: 'struct mlxbf_tmfifo ' mlxbf-tmfifo.c:208: warning: cannot understand function prototype: 'struct mlxbf_tmfifo_msg_hdr ' mlxbf-tmfifo.c:138: warning: Function parameter or member 'config' not described in 'mlxbf_tmfifo_vdev' mlxbf-tmfifo.c:212: warning: Function parameter or member 'unused' not described in 'mlxbf_tmfifo_msg_hdr' Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc") Fixes: bc05ea63b394 ("platform/mellanox: Add BlueField-3 support in the tmfifo driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: lore.kernel.org/r/202309252330.saRU491h-lkp@intel.com Cc: Liming Sun Cc: Hans de Goede Cc: Ilpo Järvinen Cc: Mark Gross Cc: Vadim Pasternak Cc: platform-driver-x86@vger.kernel.org Link: https://lore.kernel.org/r/20230926054013.11450-1-rdunlap@infradead.org Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-tmfifo.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index f3696a54a2bd..fd38d8c8371e 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -53,7 +53,7 @@ struct mlxbf_tmfifo; /** - * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring + * struct mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring * @va: virtual address of the ring * @dma: dma address of the ring * @vq: pointer to the virtio virtqueue @@ -113,12 +113,13 @@ enum { }; /** - * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device + * struct mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device * @vdev: virtio device, in which the vdev.id.device field has the * VIRTIO_ID_xxx id to distinguish the virtual device. * @status: status of the device * @features: supported features of the device * @vrings: array of tmfifo vrings of this device + * @config: non-anonymous union for cons and net * @config.cons: virtual console config - * select if vdev.id.device is VIRTIO_ID_CONSOLE * @config.net: virtual network config - @@ -138,7 +139,7 @@ struct mlxbf_tmfifo_vdev { }; /** - * mlxbf_tmfifo_irq_info - Structure of the interrupt information + * struct mlxbf_tmfifo_irq_info - Structure of the interrupt information * @fifo: pointer to the tmfifo structure * @irq: interrupt number * @index: index into the interrupt array @@ -150,7 +151,7 @@ struct mlxbf_tmfifo_irq_info { }; /** - * mlxbf_tmfifo_io - Structure of the TmFifo IO resource (for both rx & tx) + * struct mlxbf_tmfifo_io - Structure of the TmFifo IO resource (for both rx & tx) * @ctl: control register offset (TMFIFO_RX_CTL / TMFIFO_TX_CTL) * @sts: status register offset (TMFIFO_RX_STS / TMFIFO_TX_STS) * @data: data register offset (TMFIFO_RX_DATA / TMFIFO_TX_DATA) @@ -162,7 +163,7 @@ struct mlxbf_tmfifo_io { }; /** - * mlxbf_tmfifo - Structure of the TmFifo + * struct mlxbf_tmfifo - Structure of the TmFifo * @vdev: array of the virtual devices running over the TmFifo * @lock: lock to protect the TmFifo access * @res0: mapped resource block 0 @@ -198,7 +199,7 @@ struct mlxbf_tmfifo { }; /** - * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header + * struct mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header * @type: message type * @len: payload length in network byte order. Messages sent into the FIFO * will be read by the other side as data stream in the same byte order. @@ -208,6 +209,7 @@ struct mlxbf_tmfifo { struct mlxbf_tmfifo_msg_hdr { u8 type; __be16 len; + /* private: */ u8 unused[5]; } __packed __aligned(sizeof(u64)); -- cgit From 34c271e778c1d8589ee9c833eee5ecb6fbb03149 Mon Sep 17 00:00:00 2001 From: Tomasz Swiatek Date: Fri, 22 Sep 2023 18:46:32 +0200 Subject: platform/x86: touchscreen_dmi: Add info for the BUSH Bush Windows tablet Add touchscreen info for the BUSH Bush Windows tablet. It was tested using gslx680_ts_acpi module and on patched kernel installed on device. Link: https://github.com/onitake/gsl-firmware/pull/215 Link: https://github.com/systemd/systemd/pull/29268 Signed-off-by: Tomasz Swiatek Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index f9301a9382e7..363a9848e2b8 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -42,6 +42,21 @@ static const struct ts_dmi_data archos_101_cesium_educ_data = { .properties = archos_101_cesium_educ_props, }; +static const struct property_entry bush_bush_windows_tablet_props[] = { + PROPERTY_ENTRY_U32("touchscreen-size-x", 1850), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1280), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-bush-bush-windows-tablet.fw"), + { } +}; + +static const struct ts_dmi_data bush_bush_windows_tablet_data = { + .acpi_name = "MSSL1680:00", + .properties = bush_bush_windows_tablet_props, +}; + static const struct property_entry chuwi_hi8_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1665), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -1070,6 +1085,13 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ARCHOS 101 Cesium Educ"), }, }, + { + /* Bush Windows tablet */ + .driver_data = (void *)&bush_bush_windows_tablet_data, + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Bush Windows tablet"), + }, + }, { /* Chuwi Hi8 */ .driver_data = (void *)&chuwi_hi8_data, -- cgit From 51e7a66666e0ca9642c59464ef8359f0ac604d41 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Tue, 26 Sep 2023 16:42:51 -0500 Subject: ibmveth: Remove condition to recompute TCP header checksum. In some OVS environments the TCP pseudo header checksum may need to be recomputed. Currently this is only done when the interface instance is configured for "Trunk Mode". We found the issue also occurs in some Kubernetes environments, these environments do not use "Trunk Mode", therefor the condition is removed. Performance tests with this change show only a fractional decrease in throughput (< 0.2%). Fixes: 7525de2516fb ("ibmveth: Set CHECKSUM_PARTIAL if NULL TCP CSUM.") Signed-off-by: David Wilder Reviewed-by: Nick Child Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmveth.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 832a2ae01950..a8d79ee350f8 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1303,24 +1303,23 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb, * the user space for finding a flow. During this process, OVS computes * checksum on the first packet when CHECKSUM_PARTIAL flag is set. * - * So, re-compute TCP pseudo header checksum when configured for - * trunk mode. + * So, re-compute TCP pseudo header checksum. */ + if (iph_proto == IPPROTO_TCP) { struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); + if (tcph->check == 0x0000) { /* Recompute TCP pseudo header checksum */ - if (adapter->is_active_trunk) { - tcphdrlen = skb->len - iphlen; - if (skb_proto == ETH_P_IP) - tcph->check = - ~csum_tcpudp_magic(iph->saddr, - iph->daddr, tcphdrlen, iph_proto, 0); - else if (skb_proto == ETH_P_IPV6) - tcph->check = - ~csum_ipv6_magic(&iph6->saddr, - &iph6->daddr, tcphdrlen, iph_proto, 0); - } + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, + iph->daddr, tcphdrlen, iph_proto, 0); + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, + &iph6->daddr, tcphdrlen, iph_proto, 0); /* Setup SKB fields for checksum offload */ skb_partial_csum_set(skb, iphlen, offsetof(struct tcphdr, check)); -- cgit From af84f9e447a65b4b9f79e7e5d69e19039b431c56 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 29 Sep 2023 10:42:10 +0200 Subject: netfilter: nft_payload: rebuild vlan header on h_proto access nft can perform merging of adjacent payload requests. This means that: ether saddr 00:11 ... ether type 8021ad ... is a single payload expression, for 8 bytes, starting at the ethernet source offset. Check that offset+length is fully within the source/destination mac addersses. This bug prevents 'ether type' from matching the correct h_proto in case vlan tag got stripped. Fixes: de6843be3082 ("netfilter: nft_payload: rebuild vlan header when needed") Reported-by: David Ward Signed-off-by: Florian Westphal --- net/netfilter/nft_payload.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 8cb800989947..120f6d395b98 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt) return pkt->inneroff; } +static bool nft_payload_need_vlan_copy(const struct nft_payload *priv) +{ + unsigned int len = priv->offset + priv->len; + + /* data past ether src/dst requested, copy needed */ + if (len > offsetof(struct ethhdr, h_proto)) + return true; + + return false; +} + void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -172,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr, goto err; if (skb_vlan_tag_present(skb) && - priv->offset >= offsetof(struct ethhdr, h_proto)) { + nft_payload_need_vlan_copy(priv)) { if (!nft_payload_copy_vlan(dest, skb, priv->offset, priv->len)) goto err; -- cgit From 8e56b063c86569e51eed1c5681ce6361fa97fc7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Oct 2023 13:17:53 -0400 Subject: netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp In Scenario A and B below, as the delayed INIT_ACK always changes the peer vtag, SCTP ct with the incorrect vtag may cause packet loss. Scenario A: INIT_ACK is delayed until the peer receives its own INIT_ACK 192.168.1.2 > 192.168.1.1: [INIT] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT] [init tag: 1414468151] 192.168.1.2 > 192.168.1.1: [INIT ACK] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT ACK] [init tag: 1650211246] * 192.168.1.2 > 192.168.1.1: [COOKIE ECHO] 192.168.1.1 > 192.168.1.2: [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: [COOKIE ACK] Scenario B: INIT_ACK is delayed until the peer completes its own handshake 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] * This patch fixes it as below: In SCTP_CID_INIT processing: - clear ct->proto.sctp.init[!dir] if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]. (Scenario E) - set ct->proto.sctp.init[dir]. In SCTP_CID_INIT_ACK processing: - drop it if !ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario B, Scenario C) - drop it if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario A) In SCTP_CID_COOKIE_ACK processing: - clear ct->proto.sctp.init[dir] and ct->proto.sctp.init[!dir]. (Scenario D) Also, it's important to allow the ct state to move forward with cookie_echo and cookie_ack from the opposite dir for the collision scenarios. There are also other Scenarios where it should allow the packet through, addressed by the processing above: Scenario C: new CT is created by INIT_ACK. Scenario D: start INIT on the existing ESTABLISHED ct. Scenario E: start INIT after the old collision on the existing ESTABLISHED ct. 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] (both side are stopped, then start new connection again in hours) 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 242308742] Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Xin Long Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_sctp.h | 1 + net/netfilter/nf_conntrack_proto_sctp.c | 43 ++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b6bcc8f2f46b..c6bd533983c1 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ACK) { + ct->proto.sctp.init[dir] = 0; + ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); @@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT || - sch->type == SCTP_CID_INIT_ACK) { - struct sctp_inithdr _inithdr, *ih; + if (sch->type == SCTP_CID_INIT) { + struct sctp_inithdr _ih, *ih; - ih = skb_header_pointer(skb, offset + sizeof(_sch), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) goto out_unlock; - pr_debug("Setting vtag %x for dir %d\n", - ih->init_tag, !dir); + + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so @@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; + } else if (sch->type == SCTP_CID_INIT_ACK) { + struct sctp_inithdr _ih, *ih; + __be32 vtag; + + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) + goto out_unlock; + + vtag = ct->proto.sctp.vtag[!dir]; + if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) + goto out_unlock; + /* collision */ + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && + vtag != ih->init_tag) + goto out_unlock; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); + ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; -- cgit From cf791b22bef7d9352ff730a8727d3871942d6001 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Oct 2023 13:17:54 -0400 Subject: selftests: netfilter: test for sctp collision processing in nf_conntrack This patch adds a test case to reproduce the SCTP DATA chunk retransmission timeout issue caused by the improper SCTP collision processing in netfilter nf_conntrack_proto_sctp. In this test, client sends a INIT chunk, but the INIT_ACK replied from server is delayed until the server sends a INIT chunk to start a new connection from its side. After the connection is complete from server side, the delayed INIT_ACK arrives in nf_conntrack_proto_sctp. The delayed INIT_ACK should be dropped in nf_conntrack_proto_sctp instead of updating the vtag with the out-of-date init_tag, otherwise, the vtag in DATA chunks later sent by client don't match the vtag in the conntrack entry and the DATA chunks get dropped. Signed-off-by: Xin Long Signed-off-by: Florian Westphal --- tools/testing/selftests/netfilter/Makefile | 5 +- .../netfilter/conntrack_sctp_collision.sh | 89 +++++++++++++++++++ tools/testing/selftests/netfilter/sctp_collision.c | 99 ++++++++++++++++++++++ 3 files changed, 191 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_sctp_collision.sh create mode 100644 tools/testing/selftests/netfilter/sctp_collision.c diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 321db8850da0..ef90aca4cc96 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ + conntrack_sctp_collision.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close audit_logread +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..a924e595cfd8 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + ip net add $CLIENT_NS + ip net add $SERVER_NS + ip net add $ROUTER_NS + ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS + ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS + + ip -n $SERVER_NS link set link0 up + ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0 + ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n $ROUTER_NS link set link1 up + ip -n $ROUTER_NS link set link2 up + ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1 + ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2 + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + + ip -n $CLIENT_NS link set link3 up + ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3 + ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb + tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms + + # simulate the ctstate check on OVS nf_conntrack + ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe sctp + ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null + ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null + ip net del "$CLIENT_NS" + ip net del "$SERVER_NS" + ip net del "$ROUTER_NS" +} + +do_test() { + ip net exec $SERVER_NS ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec $CLIENT_NS ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s \n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} -- cgit From 203bb9d39866d3c5a8135433ce3742fe4f9d5741 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 23 Sep 2023 03:53:49 +0200 Subject: selftests: netfilter: Extend nft_audit.sh Add tests for sets and elements and deletion of all kinds. Also reorder rule reset tests: By moving the bulk rule add command up, the two 'reset rules' tests become identical. While at it, fix for a failing bulk rule add test's error status getting lost due to its use in a pipe. Avoid this by using a temporary file. Headings in diff output for failing tests contain no useful data, strip them. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- tools/testing/selftests/netfilter/nft_audit.sh | 97 +++++++++++++++++++++----- 1 file changed, 81 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh index 83c271b1c735..0b3255e7b353 100755 --- a/tools/testing/selftests/netfilter/nft_audit.sh +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -12,10 +12,11 @@ nft --version >/dev/null 2>&1 || { } logfile=$(mktemp) +rulefile=$(mktemp) echo "logging into $logfile" ./audit_logread >"$logfile" & logread_pid=$! -trap 'kill $logread_pid; rm -f $logfile' EXIT +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT exec 3<"$logfile" do_test() { # (cmd, log) @@ -26,12 +27,14 @@ do_test() { # (cmd, log) res=$(diff -a -u <(echo "$2") - <&3) [ $? -eq 0 ] && { echo "OK"; return; } echo "FAIL" - echo "$res" - ((RC++)) + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) } nft flush ruleset +# adding tables, chains and rules + for table in t1 t2; do do_test "nft add table $table" \ "table=$table family=2 entries=1 op=nft_register_table" @@ -62,6 +65,28 @@ for table in t1 t2; do "table=$table family=2 entries=6 op=nft_register_rule" done +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# resetting rules + do_test 'nft reset rules t1 c2' \ 'table=t1 family=2 entries=3 op=nft_reset_rule' @@ -70,19 +95,6 @@ do_test 'nft reset rules table t1' \ table=t1 family=2 entries=3 op=nft_reset_rule table=t1 family=2 entries=3 op=nft_reset_rule' -do_test 'nft reset rules' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule' - -for ((i = 0; i < 500; i++)); do - echo "add rule t2 c3 counter accept comment \"rule $i\"" -done | do_test 'nft -f -' \ -'table=t2 family=2 entries=500 op=nft_register_rule' - do_test 'nft reset rules t2 c3' \ 'table=t2 family=2 entries=189 op=nft_reset_rule table=t2 family=2 entries=188 op=nft_reset_rule @@ -105,4 +117,57 @@ table=t2 family=2 entries=180 op=nft_reset_rule table=t2 family=2 entries=188 op=nft_reset_rule table=t2 family=2 entries=135 op=nft_reset_rule' +# resetting sets and elements + +elem=(22 ,80 ,443) +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + exit $RC -- cgit From 1426b9ba7c453755d182ebf7e7f2367ba249dcf4 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 Oct 2023 09:29:35 -0300 Subject: ASoC: dt-bindings: fsl,micfil: Document #sound-dai-cells imx8mp.dtsi passes #sound-dai-cells = <0> in the micfil node. Document #sound-dai-cells to fix the following schema warning: audio-controller@30ca0000: '#sound-dai-cells' does not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/sound/fsl,micfil.yaml# Signed-off-by: Fabio Estevam Reviewed-by: Adam Ford Link: https://lore.kernel.org/r/20231004122935.2250889-1-festevam@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/fsl,micfil.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/fsl,micfil.yaml b/Documentation/devicetree/bindings/sound/fsl,micfil.yaml index 4b99a18c79a0..b7e605835639 100644 --- a/Documentation/devicetree/bindings/sound/fsl,micfil.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,micfil.yaml @@ -56,6 +56,9 @@ properties: - const: clkext3 minItems: 2 + "#sound-dai-cells": + const: 0 + required: - compatible - reg -- cgit From 128c20eda73bd3e78505c574fb17adb46195c98b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 26 Sep 2023 16:24:01 +0200 Subject: drm/i915: Don't set PIPE_CONTROL_FLUSH_L3 for aux inval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PIPE_CONTROL_FLUSH_L3 is not needed for aux invalidation so don't set that. Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation") Cc: Jonathan Cavitt Cc: Andi Shyti Cc: # v5.8+ Cc: Andrzej Hajda Cc: Tvrtko Ursulin Cc: Matt Roper Cc: Tejas Upadhyay Cc: Lucas De Marchi Cc: Prathap Kumar Valsan Cc: Tapani Pälli Cc: Mark Janes Cc: Rodrigo Vivi Signed-off-by: Nirmoy Das Acked-by: Matt Roper Reviewed-by: Andi Shyti Tested-by: Tapani Pälli Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230926142401.25687-1-nirmoy.das@intel.com (cherry picked from commit 03d681412b38558aefe4fb0f46e36efa94bb21ef) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index a4ff55aa5e55..7ad36198aab2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -271,8 +271,17 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if (mode & EMIT_FLUSH) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; - bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ -- cgit From 6007265ad70a87aa9b4eea79b5e5828da452cfd8 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 28 Sep 2023 20:20:18 +0200 Subject: drm/i915: Register engines early to avoid type confusion Commit 1ec23ed7126e ("drm/i915: Use uabi engines for the default engine map") switched from using for_each_engine() to for_each_uabi_engine() to iterate over the user engines. While this seems to be a sensible change, it's only safe to do when the engines are actually chained using the rb-tree structure which is not the case during early driver initialization where it can be either a lock-less list or regular double-linked list. In fact, the modesetting initialization code may end up calling default_engines() through the fb helper code while the engines list is still llist_node-based: i915_driver_probe() -> intel_display_driver_probe() -> intel_fbdev_init() -> drm_fb_helper_init() -> drm_client_init() -> drm_client_open() -> drm_file_alloc() -> i915_driver_open() -> i915_gem_open() -> i915_gem_context_open() -> i915_gem_create_context() -> default_engines() Using for_each_uabi_engine() in default_engines() is therefore wrong, as it would try to interpret the llist as rb-tree, making it find no engine at all, as the rb_left and rb_right members will still be NULL, as they haven't been initialized yet. To fix this type confusion register the engines earlier and at the same time reduce the amount of code that has to deal with the intermediate llist state. Reported-by: sanity checks in grsecurity Suggested-by: Tvrtko Ursulin Fixes: 1ec23ed7126e ("drm/i915: Use uabi engines for the default engine map") Signed-off-by: Mathias Krause Cc: Jonathan Cavitt Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230928182019.10256-2-minipli@grsecurity.net [tursulin: fixed commit tag typo] (cherry picked from commit 2b562f032fc2594fb3fac22b7a2eb3c1969a7ba3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f65bb33dd21..a8551ce322de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1199,6 +1199,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } + /* + * Register engines early to ensure the engine list is in its final + * rb-tree form, lowering the amount of code that has to deal with + * the intermediate llist state. + */ + intel_engines_driver_register(dev_priv); + return 0; /* @@ -1246,8 +1253,6 @@ err_unlock: void i915_gem_driver_register(struct drm_i915_private *i915) { i915_gem_driver_register__shrinker(i915); - - intel_engines_driver_register(i915); } void i915_gem_driver_unregister(struct drm_i915_private *i915) -- cgit From 1fbb6c1d88c421bf9e7fc456aeabc5dc026062e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Oct 2023 07:07:42 -0700 Subject: drm/i915: Invalidate the TLBs on each GT With multi-GT devices, the object may have been bound on each GT and so we need to invalidate the TLBs across all GT before releasing the pages back to the system. Fixes: d6c531ab4820 ("drm/i915: Invalidate the TLBs on each GT") Signed-off-by: Chris Wilson Signed-off-by: Jonathan Cavitt CC: Matt Roper CC: Andi Shyti Reviewed-by: Andi Shyti Reviewed-by: Nirmoy Das Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231002140742.933530-1-jonathan.cavitt@intel.com (cherry picked from commit 6b8ace7a14e7926b7b914ccd96a8ac657c0d518c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6b6d22c19411..0ba955611dfb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) for_each_gt(gt, i915, id) { if (!obj->mm.tlb[id]) - return; + continue; intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]); obj->mm.tlb[id] = 0; -- cgit From 6df241aacef5f9175b818a80c2ee018697efabc0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2023 12:47:59 +0200 Subject: dt-bindings: cache: andestech,ax45mp-cache: Fix unit address in example The unit address in the example does not match the reg property. Correct the unit address to match reality. Fixes: 3e7bf4685e42786d ("dt-bindings: cache: andestech,ax45mp-cache: Add DT binding documentation for L2 cache controller") Signed-off-by: Geert Uytterhoeven Reviewed-by: Krzysztof Kozlowski Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/7b93655219a6ad696dd3faa9f36fde6b094694a9.1696330005.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml index 9ab5f0c435d4..d2cbe49f4e15 100644 --- a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml +++ b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml @@ -69,7 +69,7 @@ examples: - | #include - cache-controller@2010000 { + cache-controller@13400000 { compatible = "andestech,ax45mp-cache", "cache"; reg = <0x13400000 0x100000>; interrupts = <508 IRQ_TYPE_LEVEL_HIGH>; -- cgit From 01f1ae2733e2bb4de92fefcea5fda847d92aede1 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 6 Aug 2023 11:25:11 +0800 Subject: dmaengine: mediatek: Fix deadlock caused by synchronize_irq() The synchronize_irq(c->irq) will not return until the IRQ handler mtk_uart_apdma_irq_handler() is completed. If the synchronize_irq() holds a spin_lock and waits the IRQ handler to complete, but the IRQ handler also needs the same spin_lock. The deadlock will happen. The process is shown below: cpu0 cpu1 mtk_uart_apdma_device_pause() | mtk_uart_apdma_irq_handler() spin_lock_irqsave() | | spin_lock_irqsave() //hold the lock to wait | synchronize_irq() | This patch reorders the synchronize_irq(c->irq) outside the spin_lock in order to mitigate the bug. Fixes: 9135408c3ace ("dmaengine: mediatek: Add MediaTek UART APDMA support") Signed-off-by: Duoming Zhou Reviewed-by: Eugen Hristev Link: https://lore.kernel.org/r/20230806032511.45263-1-duoming@zju.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-uart-apdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index c51dc017b48a..06d12ac39144 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -450,9 +450,8 @@ static int mtk_uart_apdma_device_pause(struct dma_chan *chan) mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); - synchronize_irq(c->irq); - spin_unlock_irqrestore(&c->vc.lock, flags); + synchronize_irq(c->irq); return 0; } -- cgit From 0d880dc6f032e0b541520e9926f398a77d3d433c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 23 Sep 2023 03:53:50 +0200 Subject: netfilter: nf_tables: Deduplicate nft_register_obj audit logs When adding/updating an object, the transaction handler emits suitable audit log entries already, the one in nft_obj_notify() is redundant. To fix that (and retain the audit logging from objects' 'update' callback), Introduce an "audit log free" variant for internal use. Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table") Signed-off-by: Phil Sutter Reviewed-by: Richard Guy Briggs Acked-by: Paul Moore (Audit) Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 44 ++++++++++++++++---------- tools/testing/selftests/netfilter/nft_audit.sh | 20 ++++++++++++ 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4356189360fb..a72b6aeefb1b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7871,24 +7871,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, return nft_delobj(&ctx, obj); } -void nft_obj_notify(struct net *net, const struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, int event, - u16 flags, int family, int report, gfp_t gfp) +static void +__nft_obj_notify(struct net *net, const struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *skb; int err; - char *buf = kasprintf(gfp, "%s:%u", - table->name, nft_net->base_seq); - - audit_log_nfcfg(buf, - family, - obj->handle, - event == NFT_MSG_NEWOBJ ? - AUDIT_NFT_OP_OBJ_REGISTER : - AUDIT_NFT_OP_OBJ_UNREGISTER, - gfp); - kfree(buf); if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -7911,13 +7901,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } + +void nft_obj_notify(struct net *net, const struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp) +{ + struct nftables_pernet *nft_net = nft_pernet(net); + char *buf = kasprintf(gfp, "%s:%u", + table->name, nft_net->base_seq); + + audit_log_nfcfg(buf, + family, + obj->handle, + event == NFT_MSG_NEWOBJ ? + AUDIT_NFT_OP_OBJ_REGISTER : + AUDIT_NFT_OP_OBJ_UNREGISTER, + gfp); + kfree(buf); + + __nft_obj_notify(net, table, obj, portid, seq, event, + flags, family, report, gfp); +} EXPORT_SYMBOL_GPL(nft_obj_notify); static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { - nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->flags, ctx->family, ctx->report, GFP_KERNEL); + __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, + ctx->seq, event, ctx->flags, ctx->family, + ctx->report, GFP_KERNEL); } /* diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh index 0b3255e7b353..bb34329e02a7 100755 --- a/tools/testing/selftests/netfilter/nft_audit.sh +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -85,6 +85,26 @@ do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ do_test "nft add element t1 s3 $setelem" \ "table=t1 family=2 entries=3 op=nft_register_setelem" +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + # resetting rules do_test 'nft reset rules t1 c2' \ -- cgit From 087388278e0f301f4c61ddffb1911d3a180f84b8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 28 Sep 2023 15:12:44 +0200 Subject: netfilter: nf_tables: nft_set_rbtree: fix spurious insertion failure nft_rbtree_gc_elem() walks back and removes the end interval element that comes before the expired element. There is a small chance that we've cached this element as 'rbe_ge'. If this happens, we hold and test a pointer that has been queued for freeing. It also causes spurious insertion failures: $ cat test-testcases-sets-0044interval_overlap_0.1/testout.log Error: Could not process rule: File exists add element t s { 0 - 2 } ^^^^^^ Failed to insert 0 - 2 given: table ip t { set s { type inet_service flags interval,timeout timeout 2s gc-interval 2s } } The set (rbtree) is empty. The 'failure' doesn't happen on next attempt. Reason is that when we try to insert, the tree may hold an expired element that collides with the range we're adding. While we do evict/erase this element, we can trip over this check: if (rbe_ge && nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; rbe_ge was erased by the synchronous gc, we should not have done this check. Next attempt won't find it, so retry results in successful insertion. Restart in-kernel to avoid such spurious errors. Such restart are rare, unless userspace intentionally adds very large numbers of elements with very short timeouts while setting a huge gc interval. Even in this case, this cannot loop forever, on each retry an existing element has been removed. As the caller is holding the transaction mutex, its impossible for a second entity to add more expiring elements to the tree. After this it also becomes feasible to remove the async gc worker and perform all garbage collection from the commit path. Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_rbtree.c | 46 ++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 487572dcd614..2660ceab3759 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, rb_erase(&rbe->node, &priv->root); } -static int nft_rbtree_gc_elem(const struct nft_set *__set, - struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, - u8 genmask) +static const struct nft_rbtree_elem * +nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe, u8 genmask) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); if (!gc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* search for end interval coming before this element. * end intervals don't carry a timeout extension, they @@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, prev = rb_prev(prev); } + rbe_prev = NULL; if (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); nft_rbtree_gc_remove(net, set, priv, rbe_prev); @@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, */ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe_prev); } @@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, nft_rbtree_gc_remove(net, set, priv, rbe); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); nft_trans_gc_elem_add(gc, rbe); nft_trans_gc_queue_sync_done(gc); - return 0; + return rbe_prev; } static bool nft_rbtree_update_first(const struct nft_set *set, @@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - int d, err; + int d; /* Descend the tree to search for an existing element greater than the * key value to insert that is greater than the new element. This is the @@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (nft_set_elem_expired(&rbe->ext) && nft_set_elem_active(&rbe->ext, cur_genmask)) { - err = nft_rbtree_gc_elem(set, priv, rbe, genmask); - if (err < 0) - return err; + const struct nft_rbtree_elem *removed_end; + + removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + if (IS_ERR(removed_end)) + return PTR_ERR(removed_end); + + if (removed_end == rbe_le || removed_end == rbe_ge) + return -EAGAIN; continue; } @@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *rbe = elem->priv; int err; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); - err = __nft_rbtree_insert(net, set, rbe, ext); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + do { + if (fatal_signal_pending(current)) + return -EINTR; + + cond_resched(); + + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + err = __nft_rbtree_insert(net, set, rbe, ext); + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); + } while (err == -EAGAIN); return err; } -- cgit From cee66375b7f4c03cd944dcc9e0a15eb78edbb24d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2023 12:22:57 +0200 Subject: dt-bindings: media: renesas,vin: Fix field-even-active spelling make dt_binding_check: field-active-even: missing type definition The property is named "field-even-active", not "field-active-even". Fixes: 3ab7801dfab998a2 ("media: dt-bindings: media: rcar-vin: Describe optional ep properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/c999eef0a14c8678f56eb698d27b2243e09afed4.1696328563.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/media/renesas,vin.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/media/renesas,vin.yaml b/Documentation/devicetree/bindings/media/renesas,vin.yaml index 324703bfb1bd..5539d0f8e74d 100644 --- a/Documentation/devicetree/bindings/media/renesas,vin.yaml +++ b/Documentation/devicetree/bindings/media/renesas,vin.yaml @@ -95,7 +95,7 @@ properties: synchronization is selected. default: 1 - field-active-even: true + field-even-active: true bus-width: true @@ -144,7 +144,7 @@ properties: synchronization is selected. default: 1 - field-active-even: true + field-even-active: true bus-width: true -- cgit From 80f39e1c27ba9e5a1ea7e68e21c569c9d8e46062 Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Wed, 4 Oct 2023 05:47:01 -0700 Subject: Input: i8042 - add Fujitsu Lifebook E5411 to i8042 quirk table In the initial boot stage the integrated keyboard of Fujitsu Lifebook E5411 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:308A) not working at all. Since the integrated touchpad is managed by the i2c_designware input driver in the Linux kernel and you can't find a PS/2 mouse port on the computer I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20231004011749.101789-1-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 1724d6cb8649..9c39553d30fa 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -618,6 +618,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook E5411 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU CLIENT COMPUTING LIMITED"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E5411"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { -- cgit From 423622a90abb243944d1517b9f57db53729e45c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2023 07:18:31 -0700 Subject: Input: goodix - ensure int GPIO is in input for gpio_count == 1 && gpio_int_idx == 0 case Add a special case for gpio_count == 1 && gpio_int_idx == 0 to goodix_add_acpi_gpio_mappings(). It seems that on newer x86/ACPI devices the reset and irq GPIOs are no longer listed as GPIO resources instead there is only 1 GpioInt resource and _PS0 does the whole reset sequence for us. This means that we must call acpi_device_fix_up_power() on these devices to ensure that the chip is reset before we try to use it. This part was already fixed in commit 3de93e6ed2df ("Input: goodix - call acpi_device_fix_up_power() in some cases") by adding a call to acpi_device_fix_up_power() to the generic "Unexpected ACPI resources" catch all. But it turns out that this case on some hw needs some more special handling. Specifically the firmware may bootup with the IRQ pin in output mode. The reset sequence from ACPI _PS0 (executed by acpi_device_fix_up_power()) should put the pin in input mode, but the GPIO subsystem has cached the direction at bootup, causing request_irq() to fail due to gpiochip_lock_as_irq() failure: [ 9.119864] Goodix-TS i2c-GDIX1002:00: Unexpected ACPI resources: gpio_count 1, gpio_int_idx 0 [ 9.317443] Goodix-TS i2c-GDIX1002:00: ID 911, version: 1060 [ 9.321902] input: Goodix Capacitive TouchScreen as /devices/pci0000:00/0000:00:17.0/i2c_designware.4/i2c-5/i2c-GDIX1002:00/input/input8 [ 9.327840] gpio gpiochip0: (INT3453:00): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ [ 9.327856] gpio gpiochip0: (INT3453:00): unable to lock HW IRQ 26 for IRQ [ 9.327861] genirq: Failed to request resources for GDIX1002:00 (irq 131) on irqchip intel-gpio [ 9.327912] Goodix-TS i2c-GDIX1002:00: request IRQ failed: -5 Fix this by adding a special case for gpio_count == 1 && gpio_int_idx == 0 which adds an ACPI GPIO lookup table for the int GPIO even though we cannot use it for reset purposes (as there is no reset GPIO). Adding the lookup will make the gpiod_int = gpiod_get(..., GPIOD_IN) call succeed, which will explicitly set the direction to input fixing the issue. Note this re-uses the acpi_goodix_int_first_gpios[] lookup table, since there is only 1 GPIO in the ACPI resources the reset entry in that lookup table will amount to a no-op. Reported-and-tested-by: Michael Smith <1973.mjsmith@gmail.com> Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231003215144.69527-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index da9954d6df44..af32fbe57b63 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -900,6 +900,25 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) dev_info(dev, "No ACPI GpioInt resource, assuming that the GPIO order is reset, int\n"); ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_last_gpios; + } else if (ts->gpio_count == 1 && ts->gpio_int_idx == 0) { + /* + * On newer devices there is only 1 GpioInt resource and _PS0 + * does the whole reset sequence for us. + */ + acpi_device_fix_up_power(ACPI_COMPANION(dev)); + + /* + * Before the _PS0 call the int GPIO may have been in output + * mode and the call should have put the int GPIO in input mode, + * but the GPIO subsys cached state may still think it is + * in output mode, causing gpiochip_lock_as_irq() failure. + * + * Add a mapping for the int GPIO to make the + * gpiod_int = gpiod_get(..., GPIOD_IN) call succeed, + * which will explicitly set the direction to input. + */ + ts->irq_pin_access_method = IRQ_PIN_ACCESS_NONE; + gpio_mapping = acpi_goodix_int_first_gpios; } else { dev_warn(dev, "Unexpected ACPI resources: gpio_count %d, gpio_int_idx %d\n", ts->gpio_count, ts->gpio_int_idx); -- cgit From 4785aa8028536c2be656d22c74ec1995b97056f3 Mon Sep 17 00:00:00 2001 From: Oza Pawandeep Date: Tue, 3 Oct 2023 10:33:33 -0700 Subject: cpuidle, ACPI: Evaluate LPI arch_flags for broadcast timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arm® Functional Fixed Hardware Specification defines LPI states, which provide an architectural context loss flags field that can be used to describe the context that might be lost when an LPI state is entered. - Core context Lost - General purpose registers. - Floating point and SIMD registers. - System registers, include the System register based - generic timer for the core. - Debug register in the core power domain. - PMU registers in the core power domain. - Trace register in the core power domain. - Trace context loss - GICR - GICD Qualcomm's custom CPUs preserves the architectural state, including keeping the power domain for local timers active. when core is power gated, the local timers are sufficient to wake the core up without needing broadcast timer. The patch fixes the evaluation of cpuidle arch_flags, and moves only to broadcast timer if core context lost is defined in ACPI LPI. Fixes: a36a7fecfe60 ("ACPI / processor_idle: Add support for Low Power Idle(LPI) states") Reviewed-by: Sudeep Holla Acked-by: Rafael J. Wysocki Signed-off-by: Oza Pawandeep Link: https://lore.kernel.org/r/20231003173333.2865323-1-quic_poza@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 19 +++++++++++++++++++ drivers/acpi/processor_idle.c | 3 +-- include/linux/acpi.h | 9 +++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 4d537d56eb84..6792a1f83f2a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include #include #include #include @@ -44,6 +45,24 @@ #define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ trbe_interrupt) + sizeof(u16)) +/* + * Arm® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dc615ef6550a..3a34a8c425fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1217,8 +1217,7 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; state->target_residency = lpi->min_residency; - if (lpi->arch_flags) - state->flags |= CPUIDLE_FLAG_TIMER_STOP; + state->flags |= arch_get_idle_state_flags(lpi->arch_flags); if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_RCU_IDLE; state->enter = acpi_idle_lpi_enter; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..afd94c9b8b8a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1480,6 +1480,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); -- cgit From 0add5c597f3253a9c6108a0a81d57f44ab0d9d30 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 26 Sep 2023 14:27:30 -0400 Subject: ipv4: Set offload_failed flag in fibmatch results Due to a small omission, the offload_failed flag is missing from ipv4 fibmatch results. Make sure it is set correctly. The issue can be witnessed using the following commands: echo "1 1" > /sys/bus/netdevsim/new_device ip link add dummy1 up type dummy ip route add 192.0.2.0/24 dev dummy1 echo 1 > /sys/kernel/debug/netdevsim/netdevsim1/fib/fail_route_offload ip route add 198.51.100.0/24 dev dummy1 ip route # 192.168.15.0/24 has rt_trap # 198.51.100.0/24 has rt_offload_failed ip route get 192.168.15.1 fibmatch # Result has rt_trap ip route get 198.51.100.1 fibmatch # Result differs from the route shown by `ip route`, it is missing # rt_offload_failed ip link del dev dummy1 echo 1 > /sys/bus/netdevsim/del_device Fixes: 36c5100e859d ("IPv4: Add "offload failed" indication to routes") Signed-off-by: Benjamin Poirier Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230926182730.231208-1-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a57062283219..b214b5a2e045 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3417,6 +3417,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); + fri.offload_failed = + READ_ONCE(fa->offload_failed); break; } } -- cgit From b21f18ef964b2c71aa0b451df6d17b7bcad8280d Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Wed, 4 Oct 2023 10:31:15 +0530 Subject: PM: hibernate: Fix copying the zero bitmap to safe pages The following crash is observed 100% of the time during resume from the hibernation on a x86 QEMU system. [ 12.931887] ? __die_body+0x1a/0x60 [ 12.932324] ? page_fault_oops+0x156/0x420 [ 12.932824] ? search_exception_tables+0x37/0x50 [ 12.933389] ? fixup_exception+0x21/0x300 [ 12.933889] ? exc_page_fault+0x69/0x150 [ 12.934371] ? asm_exc_page_fault+0x26/0x30 [ 12.934869] ? get_buffer.constprop.0+0xac/0x100 [ 12.935428] snapshot_write_next+0x7c/0x9f0 [ 12.935929] ? submit_bio_noacct_nocheck+0x2c2/0x370 [ 12.936530] ? submit_bio_noacct+0x44/0x2c0 [ 12.937035] ? hib_submit_io+0xa5/0x110 [ 12.937501] load_image+0x83/0x1a0 [ 12.937919] swsusp_read+0x17f/0x1d0 [ 12.938355] ? create_basic_memory_bitmaps+0x1b7/0x240 [ 12.938967] load_image_and_restore+0x45/0xc0 [ 12.939494] software_resume+0x13c/0x180 [ 12.939994] resume_store+0xa3/0x1d0 The commit being fixed introduced a bug in copying the zero bitmap to safe pages. A temporary bitmap is allocated with PG_ANY flag in prepare_image() to make a copy of zero bitmap after the unsafe pages are marked. Freeing this temporary bitmap with PG_UNSAFE_KEEP later results in an inconsistent state of unsafe pages. Since free bit is left as is for this temporary bitmap after free, these pages are treated as unsafe pages when they are allocated again. This results in incorrect calculation of the number of pages pre-allocated for the image. nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages; The allocate_unsafe_pages is estimated to be higher than the actual which results in running short of pages in safe_pages_list. Hence the crash is observed in get_buffer() due to NULL pointer access of safe_pages_list. Fix this issue by creating the temporary zero bitmap from safe pages (free bit not set) so that the corresponding free bits can be cleared while freeing this bitmap. Fixes: 005e8dddd497 ("PM: hibernate: don't store zero pages in the image file") Suggested-by:: Brian Geffon Signed-off-by: Pavankumar Kondeti Reviewed-by: Brian Geffon Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 87e9f7e2bdc0..0f12e0a97e43 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2647,7 +2647,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, memory_bm_free(bm, PG_UNSAFE_KEEP); /* Make a copy of zero_bm so it can be created in safe pages */ - error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY); + error = memory_bm_create(&tmp, GFP_ATOMIC, PG_SAFE); if (error) goto Free; @@ -2660,7 +2660,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, goto Free; duplicate_memory_bitmap(zero_bm, &tmp); - memory_bm_free(&tmp, PG_UNSAFE_KEEP); + memory_bm_free(&tmp, PG_UNSAFE_CLEAR); /* At this point zero_bm is in safe pages and it can be used for restoring. */ if (nr_highmem > 0) { -- cgit From 0c0faa2946d6f8b9557689e253519e32caf8b49d Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 17 Sep 2023 08:18:50 -0700 Subject: HID: nvidia-shield: Select POWER_SUPPLY Kconfig option Battery information reported by the driver depends on the power supply subsystem. Select the required subsystem when the HID_NVIDIA_SHIELD Kconfig option is enabled. Fixes: 3ab196f88237 ("HID: nvidia-shield: Add battery support for Thunderstrike") Signed-off-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index c72459d24237..790aa908e2a7 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -800,6 +800,7 @@ config HID_NVIDIA_SHIELD depends on USB_HID depends on BT_HIDP depends on LEDS_CLASS + select POWER_SUPPLY help Support for NVIDIA SHIELD accessories. -- cgit From 643445531829d89dc5ddbe0c5ee4ff8f84ce8687 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 20 Sep 2023 14:07:04 +0800 Subject: workqueue: Fix UAF report by KASAN in pwq_release_workfn() Currently, for UNBOUND wq, if the apply_wqattrs_prepare() return error, the apply_wqattr_cleanup() will be called and use the pwq_release_worker kthread to release resources asynchronously. however, the kfree(wq) is invoked directly in failure path of alloc_workqueue(), if the kfree(wq) has been executed and when the pwq_release_workfn() accesses wq, this leads to the following scenario: BUG: KASAN: slab-use-after-free in pwq_release_workfn+0x339/0x380 kernel/workqueue.c:4124 Read of size 4 at addr ffff888027b831c0 by task pool_workqueue_/3 CPU: 0 PID: 3 Comm: pool_workqueue_ Not tainted 6.5.0-rc7-next-20230825-syzkaller #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 pwq_release_workfn+0x339/0x380 kernel/workqueue.c:4124 kthread_worker_fn+0x2fc/0xa80 kernel/kthread.c:823 kthread+0x33a/0x430 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Allocated by task 5054: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa2/0xb0 mm/kasan/common.c:383 kmalloc include/linux/slab.h:599 [inline] kzalloc include/linux/slab.h:720 [inline] alloc_workqueue+0x16f/0x1490 kernel/workqueue.c:4684 kvm_mmu_init_tdp_mmu+0x23/0x100 arch/x86/kvm/mmu/tdp_mmu.c:19 kvm_mmu_init_vm+0x248/0x2e0 arch/x86/kvm/mmu/mmu.c:6180 kvm_arch_init_vm+0x39/0x720 arch/x86/kvm/x86.c:12311 kvm_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1222 [inline] kvm_dev_ioctl_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:5089 [inline] kvm_dev_ioctl+0xa31/0x1c20 arch/x86/kvm/../../../virt/kvm/kvm_main.c:5131 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 5054: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] __kmem_cache_free+0xb8/0x2f0 mm/slub.c:3822 alloc_workqueue+0xe76/0x1490 kernel/workqueue.c:4746 kvm_mmu_init_tdp_mmu+0x23/0x100 arch/x86/kvm/mmu/tdp_mmu.c:19 kvm_mmu_init_vm+0x248/0x2e0 arch/x86/kvm/mmu/mmu.c:6180 kvm_arch_init_vm+0x39/0x720 arch/x86/kvm/x86.c:12311 kvm_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1222 [inline] kvm_dev_ioctl_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:5089 [inline] kvm_dev_ioctl+0xa31/0x1c20 arch/x86/kvm/../../../virt/kvm/kvm_main.c:5131 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd This commit therefore flush pwq_release_worker in the alloc_and_link_pwqs() before invoke kfree(wq). Reported-by: syzbot+60db9f652c92d5bacba4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=60db9f652c92d5bacba4 Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/workqueue.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b9f053a5a5f0..ebe24a5e1435 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4600,6 +4600,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) } cpus_read_unlock(); + /* for unbound pwq, flush the pwq_release_worker ensures that the + * pwq_release_workfn() completes before calling kfree(wq). + */ + if (ret) + kthread_flush_worker(pwq_release_worker); + return ret; enomem: -- cgit From b15e7490a1efd4c0f54434c3a85ced1b6b536b7a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 21 Sep 2023 10:16:41 -0700 Subject: KVM: selftests: Treat %llx like %lx when formatting guest printf Treat %ll* formats the same as %l* formats when processing printfs from the guest so that using e.g. %llx instead of %lx generates the expected output. Ideally, unexpected formats would generate compile-time warnings or errors, but it's not at all obvious how to actually accomplish that. Alternatively, guest_vsnprintf() could assert on an unexpected format, but since the vast majority of printfs are for failed guest asserts, getting *something* printed is better than nothing. E.g. before ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=4286 tid=4290 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007f3aae6076da: ?? ??:0 3 0x00007f3aae32161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x%lx), got 0x0 and after ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=5664 tid=5668 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007fbe180076da: ?? ??:0 3 0x00007fbe17d2161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x100000000), got 0xcc Fixes: e5119382499c ("KVM: selftests: Add guest_snprintf() to KVM selftests") Cc: Aaron Lewis Link: https://lore.kernel.org/r/20230921171641.3641776-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/guest_sprintf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index c4a69d8aeb68..74627514c4d4 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -200,6 +200,13 @@ repeat: ++fmt; } + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + /* default base */ base = 10; -- cgit From 332c4d90a09c2cdc59f88d6a6c58c1601403b891 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 14 Sep 2023 17:48:03 +0800 Subject: KVM: selftests: Remove obsolete and incorrect test case metadata Delete inaccurate descriptions and obsolete metadata for test cases. It adds zero value, and has a non-zero chance of becoming stale and misleading in the future. No functional changes intended. Suggested-by: Sean Christopherson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230914094803.94661-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/ucall_common.h | 2 -- tools/testing/selftests/kvm/lib/x86_64/apic.c | 2 -- tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh | 1 - tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c | 4 ---- tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 4 ---- 7 files changed, 17 deletions(-) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 112bc1da732a..ce33d306c2cb 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c index 7168e25c194e..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index e446d76d1c0c..6c1278562090 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 7f36c32fa760..18ac5c1952a3 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 0560149e66ed..7cbb409801ee 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index 5b669818e39a..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 05898ad9f4d9..9ec9ab60b63e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" -- cgit From 6f195d6b0da3b689922ba9e302af2f49592fa9fc Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Wed, 27 Sep 2023 13:57:49 -0400 Subject: net: stmmac: dwmac-stm32: fix resume on STM32 MCU The STM32MP1 keeps clk_rx enabled during suspend, and therefore the driver does not enable the clock in stm32_dwmac_init() if the device was suspended. The problem is that this same code runs on STM32 MCUs, which do disable clk_rx during suspend, causing the clock to never be re-enabled on resume. This patch adds a variant flag to indicate that clk_rx remains enabled during suspend, and uses this to decide whether to enable the clock in stm32_dwmac_init() if the device was suspended. This approach fixes this specific bug with limited opportunity for unintended side-effects, but I have a follow up patch that will refactor the clock configuration and hopefully make it less error prone. Fixes: 6528e02cc9ff ("net: ethernet: stmmac: add adaptation for stm32mp157c.") Signed-off-by: Ben Wolsieffer Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20230927175749.1419774-1-ben.wolsieffer@hefring.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 26ea8c687881..a0e276783e65 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -104,6 +104,7 @@ struct stm32_ops { int (*parse_data)(struct stm32_dwmac *dwmac, struct device *dev); u32 syscfg_eth_mask; + bool clk_rx_enable_in_suspend; }; static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) @@ -121,7 +122,8 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; - if (!dwmac->dev->power.is_suspended) { + if (!dwmac->ops->clk_rx_enable_in_suspend || + !dwmac->dev->power.is_suspended) { ret = clk_prepare_enable(dwmac->clk_rx); if (ret) { clk_disable_unprepare(dwmac->clk_tx); @@ -513,7 +515,8 @@ static struct stm32_ops stm32mp1_dwmac_data = { .suspend = stm32mp1_suspend, .resume = stm32mp1_resume, .parse_data = stm32mp1_parse_data, - .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK + .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK, + .clk_rx_enable_in_suspend = true }; static const struct of_device_id stm32_dwmac_match[] = { -- cgit From 08e50cf071847323414df0835109b6f3560d44f5 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Wed, 27 Sep 2023 18:14:14 +0000 Subject: tipc: fix a potential deadlock on &tx->lock It seems that tipc_crypto_key_revoke() could be be invoked by wokequeue tipc_crypto_work_rx() under process context and timer/rx callback under softirq context, thus the lock acquisition on &tx->lock seems better use spin_lock_bh() to prevent possible deadlock. This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. tipc_crypto_work_rx() --> tipc_crypto_key_distr() --> tipc_bcast_xmit() --> tipc_bcbase_xmit() --> tipc_bearer_bc_xmit() --> tipc_crypto_xmit() --> tipc_ehdr_build() --> tipc_crypto_key_revoke() --> spin_lock(&tx->lock) --> tipc_disc_timeout() --> tipc_bearer_xmit_skb() --> tipc_crypto_xmit() --> tipc_ehdr_build() --> tipc_crypto_key_revoke() --> spin_lock(&tx->lock) Signed-off-by: Chengfeng Ye Reviewed-by: Jacob Keller Acked-by: Jon Maloy Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Link: https://lore.kernel.org/r/20230927181414.59928-1-dg573847474@gmail.com Signed-off-by: Jakub Kicinski --- net/tipc/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 302fd749c424..43c3f1c971b8 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1441,14 +1441,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_key key; - spin_lock(&tx->lock); + spin_lock_bh(&tx->lock); key = tx->key; WARN_ON(!key.active || tx_key != key.active); /* Free the active key */ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); - spin_unlock(&tx->lock); + spin_unlock_bh(&tx->lock); pr_warn("%s: key is revoked\n", tx->name); return -EKEYREVOKED; -- cgit From cedc019b9f260facfadd20c6c490e403abf292e3 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Tue, 3 Oct 2023 20:13:03 -0500 Subject: smb: use kernel_connect() and kernel_bind() Recent changes to kernel_connect() and kernel_bind() ensure that callers are insulated from changes to the address parameter made by BPF SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and ops->bind() with kernel_connect() and kernel_bind() to ensure that SMB mounts do not see their mount address overwritten in such cases. Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Cc: # 6.0+ Signed-off-by: Jordan Rife Acked-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/connect.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 3902e90dca6b..ce11165446cf 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2895,9 +2895,9 @@ bind_socket(struct TCP_Server_Info *server) if (server->srcaddr.ss_family != AF_UNSPEC) { /* Bind to the specified local IP address */ struct socket *socket = server->ssocket; - rc = socket->ops->bind(socket, - (struct sockaddr *) &server->srcaddr, - sizeof(server->srcaddr)); + rc = kernel_bind(socket, + (struct sockaddr *) &server->srcaddr, + sizeof(server->srcaddr)); if (rc < 0) { struct sockaddr_in *saddr4; struct sockaddr_in6 *saddr6; @@ -3046,8 +3046,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); - rc = socket->ops->connect(socket, saddr, slen, - server->noblockcnt ? O_NONBLOCK : 0); + rc = kernel_connect(socket, saddr, slen, + server->noblockcnt ? O_NONBLOCK : 0); /* * When mounting SMB root file systems, we do not want to block in * connect. Otherwise bail out and then let cifs_reconnect() perform -- cgit From 3b8bb3171571f92eda863e5f78b063604c61f72a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 4 Oct 2023 17:28:38 -0300 Subject: smb: client: do not start laundromat thread on nohandlecache Honor 'nohandlecache' mount option by not starting laundromat thread even when SMB server supports directory leases. Do not waste system resources by having laundromat thread running with no directory caching at all. Fixes: 2da338ff752a ("smb3: do not start laundromat thread when dir leases disabled") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/connect.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index ce11165446cf..7b923e36501b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2474,8 +2474,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) static struct cifs_tcon * cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) { - int rc, xid; struct cifs_tcon *tcon; + bool nohandlecache; + int rc, xid; tcon = cifs_find_tcon(ses, ctx); if (tcon) { @@ -2493,14 +2494,17 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) goto out_fail; } - if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) - tcon = tcon_info_alloc(true); + if (ses->server->dialect >= SMB20_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)) + nohandlecache = ctx->nohandlecache; else - tcon = tcon_info_alloc(false); + nohandlecache = true; + tcon = tcon_info_alloc(!nohandlecache); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; } + tcon->nohandlecache = nohandlecache; if (ctx->snapshot_time) { if (ses->server->vals->protocol_id == 0) { @@ -2662,10 +2666,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->nocase = ctx->nocase; tcon->broken_sparse_sup = ctx->no_sparse; tcon->max_cached_dirs = ctx->max_cached_dirs; - if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) - tcon->nohandlecache = ctx->nohandlecache; - else - tcon->nohandlecache = true; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); -- cgit From 513dbc10cfc1da6754e004ea651d6bc480c23eb9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2023 17:38:45 -0700 Subject: page_pool: fix documentation typos Correct grammar for better readability. Signed-off-by: Randy Dunlap Cc: Jesper Dangaard Brouer Reviewed-by: Simon Horman Acked-by: Ilias Apalodimas Link: https://lore.kernel.org/r/20231001003846.29541-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 94231533a369..8e7751464ff5 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -16,13 +16,13 @@ * page_pool_alloc_pages() call. Drivers should use * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). * - * API keeps track of in-flight pages, in order to let API user know + * The API keeps track of in-flight pages, in order to let API users know * when it is safe to free a page_pool object. Thus, API users * must call page_pool_put_page() to free the page, or attach - * the page to a page_pool-aware objects like skbs marked with + * the page to a page_pool-aware object like skbs marked with * skb_mark_for_recycle(). * - * API user must call page_pool_put_page() once on a page, as it + * API users must call page_pool_put_page() once on a page, as it * will either recycle the page, or in case of refcnt > 1, it will * release the DMA mapping and in-flight state accounting. */ -- cgit From 767881c470b3140c33795031c4e4d65572731c1c Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 1 Oct 2023 20:38:53 +0800 Subject: pmdomain: imx: scu-pd: correct DMA2 channel Per "dt-bindings/firmware/imx/rsrc.h", `IMX_SC_R_DMA_2_CH0 + 5` not equals to IMX_SC_R_DMA_2_CH5, so there should be two entries in imx8qxp_scu_pd_ranges, otherwise the imx_scu_add_pm_domain may filter out wrong power domains. Fixes: 927b7d15dcf2 ("genpd: imx: scu-pd: enlarge PD range") Reported-by: Dong Aisheng Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20231001123853.200773-1-peng.fan@oss.nxp.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/scu-pd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pmdomain/imx/scu-pd.c b/drivers/pmdomain/imx/scu-pd.c index 2f693b67ddb4..891c1d925a9d 100644 --- a/drivers/pmdomain/imx/scu-pd.c +++ b/drivers/pmdomain/imx/scu-pd.c @@ -150,7 +150,8 @@ static const struct imx_sc_pd_range imx8qxp_scu_pd_ranges[] = { { "mclk-out-1", IMX_SC_R_MCLK_OUT_1, 1, false, 0 }, { "dma0-ch", IMX_SC_R_DMA_0_CH0, 32, true, 0 }, { "dma1-ch", IMX_SC_R_DMA_1_CH0, 16, true, 0 }, - { "dma2-ch", IMX_SC_R_DMA_2_CH0, 32, true, 0 }, + { "dma2-ch-0", IMX_SC_R_DMA_2_CH0, 5, true, 0 }, + { "dma2-ch-1", IMX_SC_R_DMA_2_CH5, 27, true, 0 }, { "dma3-ch", IMX_SC_R_DMA_3_CH0, 32, true, 0 }, { "asrc0", IMX_SC_R_ASRC_0, 1, false, 0 }, { "asrc1", IMX_SC_R_ASRC_1, 1, false, 0 }, -- cgit From 059217c18be6757b95bfd77ba53fb50b48b8a816 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 1 Oct 2023 11:12:38 -0400 Subject: tcp: fix quick-ack counting to count actual ACKs of new data This commit fixes quick-ack counting so that it only considers that a quick-ack has been provided if we are sending an ACK that newly acknowledges data. The code was erroneously using the number of data segments in outgoing skbs when deciding how many quick-ack credits to remove. This logic does not make sense, and could cause poor performance in request-response workloads, like RPC traffic, where requests or responses can be multi-segment skbs. When a TCP connection decides to send N quick-acks, that is to accelerate the cwnd growth of the congestion control module controlling the remote endpoint of the TCP connection. That quick-ack decision is purely about the incoming data and outgoing ACKs. It has nothing to do with the outgoing data or the size of outgoing data. And in particular, an ACK only serves the intended purpose of allowing the remote congestion control to grow the congestion window quickly if the ACK is ACKing or SACKing new data. The fix is simple: only count packets as serving the goal of the quickack mechanism if they are ACKing/SACKing new data. We can tell whether this is the case by checking inet_csk_ack_scheduled(), since we schedule an ACK exactly when we are ACKing/SACKing new data. Fixes: fc6415bcb0f5 ("[TCP]: Fix quick-ack decrementing with TSO.") Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231001151239.1866845-1-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 6 ++++-- net/ipv4/tcp_output.c | 7 +++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 91688d0dadcd..7b1a720691ae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -348,12 +348,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule); -static inline void tcp_dec_quickack_mode(struct sock *sk, - const unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { + /* How many ACKs S/ACKing new data have we sent? */ + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; + if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccfc8bbf7455..aa0fc8c766e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -177,8 +177,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, - u32 rcv_nxt) +static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) { struct tcp_sock *tp = tcp_sk(sk); @@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ - tcp_dec_quickack_mode(sk, pkts); + tcp_dec_quickack_mode(sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1387,7 +1386,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); + tcp_event_ack_sent(sk, rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); -- cgit From 4720852ed9afb1c5ab84e96135cb5b73d5afde6f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 1 Oct 2023 11:12:39 -0400 Subject: tcp: fix delayed ACKs for MSS boundary condition This commit fixes poor delayed ACK behavior that can cause poor TCP latency in a particular boundary condition: when an application makes a TCP socket write that is an exact multiple of the MSS size. The problem is that there is painful boundary discontinuity in the current delayed ACK behavior. With the current delayed ACK behavior, we have: (1) If an app reads data when > 1*MSS is unacknowledged, then tcp_cleanup_rbuf() ACKs immediately because of: tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || (2) If an app reads all received data, and the packets were < 1*MSS, and either (a) the app is not ping-pong or (b) we received two packets < 1*MSS, then tcp_cleanup_rbuf() ACKs immediately beecause of: ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !inet_csk_in_pingpong_mode(sk))) && (3) *However*: if an app reads exactly 1*MSS of data, tcp_cleanup_rbuf() does not send an immediate ACK. This is true even if the app is not ping-pong and the 1*MSS of data had the PSH bit set, suggesting the sending application completed an application write. Thus if the app is not ping-pong, we have this painful case where >1*MSS gets an immediate ACK, and <1*MSS gets an immediate ACK, but a write whose last skb is an exact multiple of 1*MSS can get a 40ms delayed ACK. This means that any app that transfers data in one direction and takes care to align write size or packet size with MSS can suffer this problem. With receive zero copy making 4KB MSS values more common, it is becoming more common to have application writes naturally align with MSS, and more applications are likely to encounter this delayed ACK problem. The fix in this commit is to refine the delayed ACK heuristics with a simple check: immediately ACK a received 1*MSS skb with PSH bit set if the app reads all data. Why? If an skb has a len of exactly 1*MSS and has the PSH bit set then it is likely the end of an application write. So more data may not be arriving soon, and yet the data sender may be waiting for an ACK if cwnd-bound or using TX zero copy. Thus we set ICSK_ACK_PUSHED in this case so that tcp_cleanup_rbuf() will send an ACK immediately if the app reads all of the data and is not ping-pong. Note that this logic is also executed for the case where len > MSS, but in that case this logic does not matter (and does not hurt) because tcp_cleanup_rbuf() will always ACK immediately if the app reads data and there is more than an MSS of unACKed data. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Cc: Xin Guo Link: https://lore.kernel.org/r/20231001151239.1866845-2-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06fe1cf645d5..8afb0950a697 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -253,6 +253,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (unlikely(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE)) tcp_gro_dev_warn(sk, skb, len); + /* If the skb has a len of exactly 1*MSS and has the PSH bit + * set then it is likely the end of an application write. So + * more data may not be arriving soon, and yet the data sender + * may be waiting for an ACK if cwnd-bound or using TX zero + * copy. So we set ICSK_ACK_PUSHED here so that + * tcp_cleanup_rbuf() will send an ACK immediately if the app + * reads all of the data and is not ping-pong. If len > MSS + * then this logic does not matter (and does not hurt) because + * tcp_cleanup_rbuf() will always ACK immediately if the app + * reads data and there is more than an MSS of unACKed data. + */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. -- cgit From 2222a78075f0c19ca18db53fd6623afb4aff602d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2023 10:58:45 -0400 Subject: sctp: update transport state when processing a dupcook packet During the 4-way handshake, the transport's state is set to ACTIVE in sctp_process_init() when processing INIT_ACK chunk on client or COOKIE_ECHO chunk on server. In the collision scenario below: 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] when processing COOKIE_ECHO on 192.168.1.2, as it's in COOKIE_WAIT state, sctp_sf_do_dupcook_b() is called by sctp_sf_do_5_2_4_dupcook() where it creates a new association and sets its transport to ACTIVE then updates to the old association in sctp_assoc_update(). However, in sctp_assoc_update(), it will skip the transport update if it finds a transport with the same ipaddr already existing in the old asoc, and this causes the old asoc's transport state not to move to ACTIVE after the handshake. This means if DATA retransmission happens at this moment, it won't be able to enter PF state because of the check 'transport->state == SCTP_ACTIVE' in sctp_do_8_2_transport_strike(). This patch fixes it by updating the transport in sctp_assoc_update() with sctp_assoc_add_peer() where it updates the transport state if there is already a transport with the same ipaddr exists in the old asoc. Signed-off-by: Xin Long Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/fd17356abe49713ded425250cc1ae51e9f5846c6.1696172325.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/associola.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 796529167e8d..c45c192b7878 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1159,8 +1159,7 @@ int sctp_assoc_update(struct sctp_association *asoc, /* Add any peer addresses from the new association. */ list_for_each_entry(trans, &new->peer.transport_addr_list, transports) - if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) && - !sctp_assoc_add_peer(asoc, &trans->ipaddr, + if (!sctp_assoc_add_peer(asoc, &trans->ipaddr, GFP_ATOMIC, trans->state)) return -ENOMEM; -- cgit From 1f4e803cd9c9166eb8b6c8b0b8e4124f7499fc07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2023 11:04:20 -0400 Subject: sctp: update hb timer immediately after users change hb_interval Currently, when hb_interval is changed by users, it won't take effect until the next expiry of hb timer. As the default value is 30s, users have to wait up to 30s to wait its hb_interval update to work. This becomes pretty bad in containers where a much smaller value is usually set on hb_interval. This patch improves it by resetting the hb timer immediately once the value of hb_interval is updated by users. Note that we don't address the already existing 'problem' when sending a heartbeat 'on demand' if one hb has just been sent(from the timer) mentioned in: https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg590224.html Signed-off-by: Xin Long Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/75465785f8ee5df2fb3acdca9b8fafdc18984098.1696172660.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ab943e8fb1db..7f89e43154c0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2450,6 +2450,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if (trans) { trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + sctp_transport_reset_hb_timer(trans); } else if (asoc) { asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); -- cgit From d0f95894fda7d4f895b29c1097f92d7fee278cb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Oct 2023 18:34:55 +0000 Subject: netlink: annotate data-races around sk->sk_err syzbot caught another data-race in netlink when setting sk->sk_err. Annotate all of them for good measure. BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg write to 0xffff8881613bb220 of 4 bytes by task 28147 on cpu 0: netlink_recvmsg+0x448/0x780 net/netlink/af_netlink.c:1994 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] __sys_recvfrom+0x1f4/0x2e0 net/socket.c:2229 __do_sys_recvfrom net/socket.c:2247 [inline] __se_sys_recvfrom net/socket.c:2243 [inline] __x64_sys_recvfrom+0x78/0x90 net/socket.c:2243 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffff8881613bb220 of 4 bytes by task 28146 on cpu 1: netlink_recvmsg+0x448/0x780 net/netlink/af_netlink.c:1994 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] __sys_recvfrom+0x1f4/0x2e0 net/socket.c:2229 __do_sys_recvfrom net/socket.c:2247 [inline] __se_sys_recvfrom net/socket.c:2243 [inline] __x64_sys_recvfrom+0x78/0x90 net/socket.c:2243 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000 -> 0x00000016 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 28146 Comm: syz-executor.0 Not tainted 6.6.0-rc3-syzkaller-00055-g9ed22ae6be81 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231003183455.3410550-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 642b9d382fb4..eb086b06d60d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk) if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) { if (!test_and_set_bit(NETLINK_S_CONGESTED, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; + WRITE_ONCE(sk->sk_err, ENOBUFS); sk_error_report(sk); } } @@ -1605,7 +1605,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) goto out; } - sk->sk_err = p->code; + WRITE_ONCE(sk->sk_err, p->code); sk_error_report(sk); out: return ret; @@ -1991,7 +1991,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = -ret; + WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); } } @@ -2511,7 +2511,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, err_bad_put: nlmsg_free(skb); err_skb: - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS); sk_error_report(NETLINK_CB(in_skb).sk); } EXPORT_SYMBOL(netlink_ack); -- cgit From 53ff5cf89142b978b1a5ca8dc4d4425e6a09745f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 4 Oct 2023 18:25:01 +0900 Subject: ksmbd: fix race condition between session lookup and expire Thread A + Thread B ksmbd_session_lookup | smb2_sess_setup sess = xa_load | | | xa_erase(&conn->sessions, sess->id); | | ksmbd_session_destroy(sess) --> kfree(sess) | // UAF! | sess->last_active = jiffies | + This patch add rwsem to fix race condition between ksmbd_session_lookup and ksmbd_expire_session. Reported-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 2 ++ fs/smb/server/connection.h | 1 + fs/smb/server/mgmt/user_session.c | 10 +++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index db7fa704a3f6..4b38c3a285f6 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -84,6 +84,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) spin_lock_init(&conn->llist_lock); INIT_LIST_HEAD(&conn->lock_list); + init_rwsem(&conn->session_lock); + down_write(&conn_list_lock); list_add(&conn->conns_list, &conn_list); up_write(&conn_list_lock); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index ab2583f030ce..3c005246a32e 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -50,6 +50,7 @@ struct ksmbd_conn { struct nls_table *local_nls; struct unicode_map *um; struct list_head conns_list; + struct rw_semaphore session_lock; /* smb session 1 per user */ struct xarray sessions; unsigned long last_active; diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 8a5dcab05614..b8be14a96cf6 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -174,7 +174,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) unsigned long id; struct ksmbd_session *sess; - down_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { if (sess->state != SMB2_SESSION_VALID || time_after(jiffies, @@ -185,7 +185,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) continue; } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } int ksmbd_session_register(struct ksmbd_conn *conn, @@ -227,7 +227,9 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } } + up_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { unsigned long chann_id; struct channel *chann; @@ -244,7 +246,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) ksmbd_session_destroy(sess); } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, @@ -252,9 +254,11 @@ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, { struct ksmbd_session *sess; + down_read(&conn->session_lock); sess = xa_load(&conn->sessions, id); if (sess) sess->last_active = jiffies; + up_read(&conn->session_lock); return sess; } -- cgit From 5a7ee91d1154f35418367a6eaae74046fd06ed89 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 4 Oct 2023 18:28:39 +0900 Subject: ksmbd: fix race condition with fp fp can used in each command. If smb2_close command is coming at the same time, UAF issue can happen by race condition. Time + Thread A | Thread B1 B2 .... B5 smb2_open | smb2_close | __open_id | insert fp to file_table | | | atomic_dec_and_test(&fp->refcount) | if fp->refcount == 0, free fp by kfree. // UAF! | use fp | + This patch add f_state not to use freed fp is used and not to free fp in use. Reported-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 +++- fs/smb/server/vfs_cache.c | 23 ++++++++++++++++++++--- fs/smb/server/vfs_cache.h | 9 +++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 544022dd6d20..420e7691c8cf 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3370,8 +3370,10 @@ err_out: } ksmbd_revert_fsids(work); err_out1: - if (!rc) + if (!rc) { + ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); + } if (rc) { if (rc == -EINVAL) rsp->hdr.Status = STATUS_INVALID_PARAMETER; diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index f41f8d6108ce..c4b80ab7df74 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -333,6 +333,9 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) static struct ksmbd_file *ksmbd_fp_get(struct ksmbd_file *fp) { + if (fp->f_state != FP_INITED) + return NULL; + if (!atomic_inc_not_zero(&fp->refcount)) return NULL; return fp; @@ -382,15 +385,20 @@ int ksmbd_close_fd(struct ksmbd_work *work, u64 id) return 0; ft = &work->sess->file_table; - read_lock(&ft->lock); + write_lock(&ft->lock); fp = idr_find(ft->idr, id); if (fp) { set_close_state_blocked_works(fp); - if (!atomic_dec_and_test(&fp->refcount)) + if (fp->f_state != FP_INITED) fp = NULL; + else { + fp->f_state = FP_CLOSED; + if (!atomic_dec_and_test(&fp->refcount)) + fp = NULL; + } } - read_unlock(&ft->lock); + write_unlock(&ft->lock); if (!fp) return -EINVAL; @@ -570,6 +578,7 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) fp->tcon = work->tcon; fp->volatile_id = KSMBD_NO_FID; fp->persistent_id = KSMBD_NO_FID; + fp->f_state = FP_NEW; fp->f_ci = ksmbd_inode_get(fp); if (!fp->f_ci) { @@ -591,6 +600,14 @@ err_out: return ERR_PTR(ret); } +void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state) +{ + write_lock(&ft->lock); + fp->f_state = state; + write_unlock(&ft->lock); +} + static int __close_file_table_ids(struct ksmbd_file_table *ft, struct ksmbd_tree_connect *tcon, diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index fcb13413fa8d..03d0bf941216 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -60,6 +60,12 @@ struct ksmbd_inode { __le32 m_fattr; }; +enum { + FP_NEW = 0, + FP_INITED, + FP_CLOSED +}; + struct ksmbd_file { struct file *filp; u64 persistent_id; @@ -98,6 +104,7 @@ struct ksmbd_file { /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; + unsigned int f_state; }; static inline void set_ctx_actor(struct dir_context *ctx, @@ -142,6 +149,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); int ksmbd_init_global_file_table(void); void ksmbd_free_global_file_table(void); void ksmbd_set_fd_limit(unsigned long limit); +void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state); /* * INODE hash -- cgit From c69813471a1ec081a0b9bf0c6bd7e8afd818afce Mon Sep 17 00:00:00 2001 From: luosili Date: Wed, 4 Oct 2023 18:29:36 +0900 Subject: ksmbd: fix uaf in smb20_oplock_break_ack drop reference after use opinfo. Signed-off-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 420e7691c8cf..b9d6e8e451ba 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8038,10 +8038,10 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); + opinfo_put(opinfo); + ksmbd_fd_put(work, fp); rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; -- cgit From 7ca9da7d873ee8024e9548d3366101c2b6843eab Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 4 Oct 2023 18:30:14 +0900 Subject: ksmbd: fix race condition from parallel smb2 logoff requests If parallel smb2 logoff requests come in before closing door, running request count becomes more than 1 even though connection status is set to KSMBD_SESS_NEED_RECONNECT. It can't get condition true, and sleep forever. This patch fix race condition problem by returning error if connection status was already set to KSMBD_SESS_NEED_RECONNECT. Reported-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index b9d6e8e451ba..e774c9855f7f 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2164,17 +2164,17 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_debug(SMB, "request\n"); - sess_id = le64_to_cpu(req->hdr.SessionId); - - rsp->StructureSize = cpu_to_le16(4); - err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp)); - if (err) { - rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; + ksmbd_conn_lock(conn); + if (!ksmbd_conn_good(conn)) { + ksmbd_conn_unlock(conn); + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; smb2_set_err_rsp(work); - return err; + return -ENOENT; } - + sess_id = le64_to_cpu(req->hdr.SessionId); ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_RECONNECT); + ksmbd_conn_unlock(conn); + ksmbd_close_session_fds(work); ksmbd_conn_wait_idle(conn, sess_id); @@ -2196,6 +2196,14 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_free_user(sess->user); sess->user = NULL; ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE); + + rsp->StructureSize = cpu_to_le16(4); + err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp)); + if (err) { + rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; + smb2_set_err_rsp(work); + return err; + } return 0; } -- cgit From 75ac9a3dd65f7eab4d12b0a0f744234b5300a491 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 4 Oct 2023 18:31:03 +0900 Subject: ksmbd: fix race condition from parallel smb2 lock requests There is a race condition issue between parallel smb2 lock request. Time + Thread A | Thread A smb2_lock | smb2_lock | insert smb_lock to lock_list | spin_unlock(&work->conn->llist_lock) | | | spin_lock(&conn->llist_lock); | kfree(cmp_lock); | // UAF! | list_add(&smb_lock->llist, &rollback_list) + This patch swaps the line for adding the smb lock to the rollback list and adding the lock list of connection to fix the race issue. Reported-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e774c9855f7f..fd6f05786ac2 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7038,10 +7038,6 @@ skip: ksmbd_debug(SMB, "would have to wait for getting lock\n"); - spin_lock(&work->conn->llist_lock); - list_add_tail(&smb_lock->clist, - &work->conn->lock_list); - spin_unlock(&work->conn->llist_lock); list_add(&smb_lock->llist, &rollback_list); argv = kmalloc(sizeof(void *), GFP_KERNEL); @@ -7072,9 +7068,6 @@ skip: if (work->state != KSMBD_WORK_ACTIVE) { list_del(&smb_lock->llist); - spin_lock(&work->conn->llist_lock); - list_del(&smb_lock->clist); - spin_unlock(&work->conn->llist_lock); locks_free_lock(flock); if (work->state == KSMBD_WORK_CANCELLED) { @@ -7094,19 +7087,16 @@ skip: } list_del(&smb_lock->llist); - spin_lock(&work->conn->llist_lock); - list_del(&smb_lock->clist); - spin_unlock(&work->conn->llist_lock); release_async_work(work); goto retry; } else if (!rc) { + list_add(&smb_lock->llist, &rollback_list); spin_lock(&work->conn->llist_lock); list_add_tail(&smb_lock->clist, &work->conn->lock_list); list_add_tail(&smb_lock->flist, &fp->lock_list); spin_unlock(&work->conn->llist_lock); - list_add(&smb_lock->llist, &rollback_list); ksmbd_debug(SMB, "successful in taking lock\n"); } else { goto out; -- cgit From 4953856f280b2b606089a72a93a1e9212a3adaca Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 11 Aug 2023 12:25:26 +0800 Subject: drm/amd/pm: add unique_id for gc 11.0.3 add unique_id for gc 11.0.3 Signed-off-by: Kenneth Feng Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 41147da54458..8bb2da13826f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2040,6 +2040,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): *states = ATTR_STATE_SUPPORTED; break; default: -- cgit From 5d061675b7538e25d060d13310880c01160207c4 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Fri, 22 Sep 2023 17:21:21 -0400 Subject: drm/amdgpu: Fix a memory leak Fix a memory leak in amdgpu_fru_get_product_info(). Cc: Alex Deucher Reported-by: Yang Wang Fixes: 0dbf2c562625 ("drm/amdgpu: Interpret IPMI data for product information (v2)") Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 9c66d98af6d8..7cd0dfaeee20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -170,6 +170,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) csum += pia[size - 1]; if (csum) { DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + kfree(pia); return -EIO; } -- cgit From 2a1fe39a5be785e962e387146aed34fa9a829f3f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 26 Sep 2023 21:07:43 -0500 Subject: drm/amd: Fix logic error in sienna_cichlid_update_pcie_parameters() While aligning SMU11 with SMU13 implementation an assumption was made that `dpm_context->dpm_tables.pcie_table` was populated in dpm table initialization like in SMU13 but it isn't. So restore some of the original logic and instead just check for amdgpu_device_pcie_dynamic_switching_supported() to decide whether to hardcode values; erring on the side of performance. Cc: stable@vger.kernel.org # 6.1+ Reported-and-tested-by: Umio Yasuno Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1447#note_2101382 Fixes: e701156ccc6c ("drm/amd: Align SMU11 SMU_MSG_OverridePcieParameters implementation with SMU13") Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 41 ++++++++++++---------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 4bb289f9b4b8..da2860da6018 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2082,36 +2082,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return ret; } +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; - u32 smu_pcie_arg; + uint8_t *table_member1, *table_member2; + uint32_t min_gen_speed, max_gen_speed; + uint32_t min_lane_width, max_lane_width; + uint32_t smu_pcie_arg; int ret, i; - /* PCIE gen speed and lane width override */ - if (!amdgpu_device_pcie_dynamic_switching_supported()) { - if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap) - pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1]; + GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); + GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); - if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap) - pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1]; + min_gen_speed = MAX(0, table_member1[0]); + max_gen_speed = MIN(pcie_gen_cap, table_member1[1]); + min_gen_speed = min_gen_speed > max_gen_speed ? + max_gen_speed : min_gen_speed; + min_lane_width = MAX(1, table_member2[0]); + max_lane_width = MIN(pcie_width_cap, table_member2[1]); + min_lane_width = min_lane_width > max_lane_width ? + max_lane_width : min_lane_width; - /* Force all levels to use the same settings */ - for (i = 0; i < NUM_LINK_LEVELS; i++) { - pcie_table->pcie_gen[i] = pcie_gen_cap; - pcie_table->pcie_lane[i] = pcie_width_cap; - } + if (!amdgpu_device_pcie_dynamic_switching_supported()) { + pcie_table->pcie_gen[0] = max_gen_speed; + pcie_table->pcie_lane[0] = max_lane_width; } else { - for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - } + pcie_table->pcie_gen[0] = min_gen_speed; + pcie_table->pcie_lane[0] = min_lane_width; } + pcie_table->pcie_gen[1] = max_gen_speed; + pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16 | -- cgit From 134b8c5d8674e7cde380f82e9aedfd46dcdd16f7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 26 Sep 2023 17:59:53 -0500 Subject: drm/amd: Fix detection of _PR3 on the PCIe root port On some systems with Navi3x dGPU will attempt to use BACO for runtime PM but fails to resume properly. This is because on these systems the root port goes into D3cold which is incompatible with BACO. This happens because in this case dGPU is connected to a bridge between root port which causes BOCO detection logic to fail. Fix the intent of the logic by looking at root port, not the immediate upstream bridge for _PR3. Cc: stable@vger.kernel.org Suggested-by: Jun Ma Tested-by: David Perry Fixes: b10c1c5b3a4e ("drm/amdgpu: add check for ACPI power resources") Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 30c4f5cca02c..2b8356699f23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2093,7 +2093,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { - parent = pci_upstream_bridge(adev->pdev); + parent = pcie_find_root_port(adev->pdev); adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } -- cgit From b206011bf05069797df1f4c5ce639398728978e2 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Mon, 18 Sep 2023 18:43:13 -0400 Subject: drm/amd/display: apply edge-case DISPCLK WDIVIDER changes to master OTG pipes only [Why] The edge-case DISPCLK WDIVIDER changes call stream_enc functions. But with MPC pipes, downstream pipes have null stream_enc and will cause crash. [How] Only call stream_enc functions for pipes that are OTG master. Reviewed-by: Alvin Lee Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Aurabindo Pillai Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 4 ++-- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index c435f7632e8e..5ee87965a078 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -157,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -188,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 984b52923534..e9345f6554db 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -355,7 +355,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -401,7 +401,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) -- cgit From 33b235a6e6ebe0f05f3586a71e8d281d00f71e2e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 5 Oct 2023 11:22:03 +0900 Subject: ksmbd: fix race condition between tree conn lookup and disconnect if thread A in smb2_write is using work-tcon, other thread B use smb2_tree_disconnect free the tcon, then thread A will use free'd tcon. Time + Thread A | Thread A smb2_write | smb2_tree_disconnect | | | kfree(tree_conn) | // UAF! | work->tcon->share_conf | + This patch add state, reference count and lock for tree conn to fix race condition issue. Reported-by: luosili Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/tree_connect.c | 42 +++++++++++++++++++++++++++++--- fs/smb/server/mgmt/tree_connect.h | 11 +++++++-- fs/smb/server/mgmt/user_session.c | 1 + fs/smb/server/mgmt/user_session.h | 1 + fs/smb/server/server.c | 2 ++ fs/smb/server/smb2pdu.c | 50 +++++++++++++++++++++++++++++---------- 6 files changed, 90 insertions(+), 17 deletions(-) diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index 408cddf2f094..d2c81a8a11dd 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -73,7 +73,10 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, tree_conn->user = sess->user; tree_conn->share_conf = sc; + tree_conn->t_state = TREE_NEW; status.tree_conn = tree_conn; + atomic_set(&tree_conn->refcount, 1); + init_waitqueue_head(&tree_conn->refcount_q); ret = xa_err(xa_store(&sess->tree_conns, tree_conn->id, tree_conn, GFP_KERNEL)); @@ -93,14 +96,33 @@ out_error: return status; } +void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon) +{ + /* + * Checking waitqueue to releasing tree connect on + * tree disconnect. waitqueue_active is safe because it + * uses atomic operation for condition. + */ + if (!atomic_dec_return(&tcon->refcount) && + waitqueue_active(&tcon->refcount_q)) + wake_up(&tcon->refcount_q); +} + int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, struct ksmbd_tree_connect *tree_conn) { int ret; + write_lock(&sess->tree_conns_lock); + xa_erase(&sess->tree_conns, tree_conn->id); + write_unlock(&sess->tree_conns_lock); + + if (!atomic_dec_and_test(&tree_conn->refcount)) + wait_event(tree_conn->refcount_q, + atomic_read(&tree_conn->refcount) == 0); + ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id); ksmbd_release_tree_conn_id(sess, tree_conn->id); - xa_erase(&sess->tree_conns, tree_conn->id); ksmbd_share_config_put(tree_conn->share_conf); kfree(tree_conn); return ret; @@ -111,11 +133,15 @@ struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess, { struct ksmbd_tree_connect *tcon; + read_lock(&sess->tree_conns_lock); tcon = xa_load(&sess->tree_conns, id); if (tcon) { - if (test_bit(TREE_CONN_EXPIRE, &tcon->status)) + if (tcon->t_state != TREE_CONNECTED) + tcon = NULL; + else if (!atomic_inc_not_zero(&tcon->refcount)) tcon = NULL; } + read_unlock(&sess->tree_conns_lock); return tcon; } @@ -129,8 +155,18 @@ int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess) if (!sess) return -EINVAL; - xa_for_each(&sess->tree_conns, id, tc) + xa_for_each(&sess->tree_conns, id, tc) { + write_lock(&sess->tree_conns_lock); + if (tc->t_state == TREE_DISCONNECTED) { + write_unlock(&sess->tree_conns_lock); + ret = -ENOENT; + continue; + } + tc->t_state = TREE_DISCONNECTED; + write_unlock(&sess->tree_conns_lock); + ret |= ksmbd_tree_conn_disconnect(sess, tc); + } xa_destroy(&sess->tree_conns); return ret; } diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 562d647ad9fa..6377a70b811c 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -14,7 +14,11 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; -#define TREE_CONN_EXPIRE 1 +enum { + TREE_NEW = 0, + TREE_CONNECTED, + TREE_DISCONNECTED +}; struct ksmbd_tree_connect { int id; @@ -27,7 +31,9 @@ struct ksmbd_tree_connect { int maximal_access; bool posix_extensions; - unsigned long status; + atomic_t refcount; + wait_queue_head_t refcount_q; + unsigned int t_state; }; struct ksmbd_tree_conn_status { @@ -46,6 +52,7 @@ struct ksmbd_session; struct ksmbd_tree_conn_status ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, const char *share_name); +void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, struct ksmbd_tree_connect *tree_conn); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index b8be14a96cf6..15f68ee05089 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -355,6 +355,7 @@ static struct ksmbd_session *__session_create(int protocol) xa_init(&sess->ksmbd_chann_list); xa_init(&sess->rpc_handle_list); sess->sequence_number = 1; + rwlock_init(&sess->tree_conns_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f99d475b28db..63cb08fffde8 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -60,6 +60,7 @@ struct ksmbd_session { struct ksmbd_file_table file_table; unsigned long last_active; + rwlock_t tree_conns_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 32347fec33c4..3079e607c5fe 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -241,6 +241,8 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, } while (is_chained == true); send: + if (work->tcon) + ksmbd_tree_connect_put(work->tcon); smb3_preauth_hash_rsp(work); if (work->sess && work->sess->enc && work->encrypted && conn->ops->encrypt_resp) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index fd6f05786ac2..898860adf929 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1993,6 +1993,9 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; + write_lock(&sess->tree_conns_lock); + status.tree_conn->t_state = TREE_CONNECTED; + write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: rsp->Capabilities = 0; @@ -2122,27 +2125,50 @@ int smb2_tree_disconnect(struct ksmbd_work *work) ksmbd_debug(SMB, "request\n"); + if (!tcon) { + ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId); + + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + err = -ENOENT; + goto err_out; + } + + ksmbd_close_tree_conn_fds(work); + + write_lock(&sess->tree_conns_lock); + if (tcon->t_state == TREE_DISCONNECTED) { + write_unlock(&sess->tree_conns_lock); + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + err = -ENOENT; + goto err_out; + } + + WARN_ON_ONCE(atomic_dec_and_test(&tcon->refcount)); + tcon->t_state = TREE_DISCONNECTED; + write_unlock(&sess->tree_conns_lock); + + err = ksmbd_tree_conn_disconnect(sess, tcon); + if (err) { + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + goto err_out; + } + + work->tcon = NULL; + rsp->StructureSize = cpu_to_le16(4); err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_tree_disconnect_rsp)); if (err) { rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; - smb2_set_err_rsp(work); - return err; + goto err_out; } - if (!tcon || test_and_set_bit(TREE_CONN_EXPIRE, &tcon->status)) { - ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId); + return 0; - rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; - smb2_set_err_rsp(work); - return -ENOENT; - } +err_out: + smb2_set_err_rsp(work); + return err; - ksmbd_close_tree_conn_fds(work); - ksmbd_tree_conn_disconnect(sess, tcon); - work->tcon = NULL; - return 0; } /** -- cgit From f9315f17bf778cb8079a29639419fcc8a41a3c84 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 3 Oct 2023 09:39:26 +0200 Subject: gpio: aspeed: fix the GPIO number passed to pinctrl_gpio_set_config() pinctrl_gpio_set_config() expects the GPIO number from the global GPIO numberspace, not the controller-relative offset, which needs to be added to the chip base. Fixes: 5ae4cb94b313 ("gpio: aspeed: Add debounce support") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Andrew Jeffery --- drivers/gpio/gpio-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index da33bbbdacb9..58f107194fda 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -973,7 +973,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset, else if (param == PIN_CONFIG_BIAS_DISABLE || param == PIN_CONFIG_BIAS_PULL_DOWN || param == PIN_CONFIG_DRIVE_STRENGTH) - return pinctrl_gpio_set_config(offset, config); + return pinctrl_gpio_set_config(chip->base + offset, config); else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN || param == PIN_CONFIG_DRIVE_OPEN_SOURCE) /* Return -ENOTSUPP to trigger emulation, as per datasheet */ -- cgit From 0f28ada1fbf0054557cddcdb93ad17f767105208 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Garcia Date: Wed, 6 Sep 2023 11:49:26 +0000 Subject: mcb: remove is_added flag from mcb_device struct When calling mcb_bus_add_devices(), both mcb devices and the mcb bus will attempt to attach a device to a driver because they share the same bus_type. This causes an issue when trying to cast the container of the device to mcb_device struct using to_mcb_device(), leading to a wrong cast when the mcb_bus is added. A crash occurs when freing the ida resources as the bus numbering of mcb_bus gets confused with the is_added flag on the mcb_device struct. The only reason for this cast was to keep an is_added flag on the mcb_device struct that does not seem necessary. The function device_attach() handles already bound devices and the mcb subsystem does nothing special with this is_added flag so remove it completely. Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable Signed-off-by: Jorge Sanjuan Garcia Co-developed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jose Javier Rodriguez Barbarin Link: https://lore.kernel.org/r/20230906114901.63174-2-JoseJavier.Rodriguez@duagon.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 10 +++------- drivers/mcb/mcb-parse.c | 2 -- include/linux/mcb.h | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 978fdfc19a06..0cac5bead84f 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -387,17 +387,13 @@ EXPORT_SYMBOL_NS_GPL(mcb_free_dev, MCB); static int __mcb_bus_add_devices(struct device *dev, void *data) { - struct mcb_device *mdev = to_mcb_device(dev); int retval; - if (mdev->is_added) - return 0; - retval = device_attach(dev); - if (retval < 0) + if (retval < 0) { dev_err(dev, "Error adding device (%d)\n", retval); - - mdev->is_added = true; + return retval; + } return 0; } diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 2aef990f379f..656b6b71c768 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -99,8 +99,6 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, mdev->mem.end = mdev->mem.start + size - 1; mdev->mem.flags = IORESOURCE_MEM; - mdev->is_added = false; - ret = mcb_device_register(bus, mdev); if (ret < 0) goto err; diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; -- cgit From b83ce9cb4a465b8f9a3fa45561b721a9551f60e3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Sep 2023 10:27:23 +0200 Subject: dma-buf: add dma_fence_timestamp helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fence signals there is a very small race window where the timestamp isn't updated yet. sync_file solves this by busy waiting for the timestamp to appear, but on other ocassions didn't handled this correctly. Provide a dma_fence_timestamp() helper function for this and use it in all appropriate cases. Another alternative would be to grab the spinlock when that happens. v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case the accurate timestamp is needed and/or the timestamp is not based on ktime (e.g. hw timestamp) v3 chk: drop the parameter again for unified handling Signed-off-by: Yunxiang Li Signed-off-by: Christian König Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2") Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 13 ++++--------- drivers/dma-buf/sync_file.c | 9 +++------ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/linux/dma-fence.h | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index c625bb2b5d56..628af51c81af 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { if (!dma_fence_is_signaled(tmp)) { ++count; - } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, - &tmp->flags)) { - if (ktime_after(tmp->timestamp, timestamp)) - timestamp = tmp->timestamp; } else { - /* - * Use the current time if the fence is - * currently signaling. - */ - timestamp = ktime_get(); + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } } diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index af57799c86ce..2e9a316c596a 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence, sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); - while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && - !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) - cpu_relax(); info->timestamp_ns = - test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? - ktime_to_ns(fence->timestamp) : - ktime_set(0, 0); + dma_fence_is_signaled(fence) ? + ktime_to_ns(dma_fence_timestamp(fence)) : + ktime_set(0, 0); return info->status; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 506371c42745..5a3a622fc672 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -929,7 +929,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) if (next) { next->s_fence->scheduled.timestamp = - job->s_fence->finished.timestamp; + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..ebe78bd3d121 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit From ec4405ed92036f5bb487b5c2f9a28f9e36a3e3d5 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Thu, 10 Aug 2023 23:18:25 +0300 Subject: thunderbolt: Call tb_switch_put() once DisplayPort bandwidth request is finished When handling DisplayPort bandwidth request tb_switch_find_by_route() is called and it returns a router structure with reference count increased. In order to avoid resource leak call tb_switch_put() when finished. Fixes: 6ce3563520be ("thunderbolt: Add support for DisplayPort bandwidth allocation mode") Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index dd0a1ef8cf12..27bd6ca6f99e 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1907,14 +1907,14 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) in = &sw->ports[ev->port]; if (!tb_port_is_dpin(in)) { tb_port_warn(in, "bandwidth request to non-DP IN adapter\n"); - goto unlock; + goto put_sw; } tb_port_dbg(in, "handling bandwidth allocation request\n"); if (!usb4_dp_port_bandwidth_mode_enabled(in)) { tb_port_warn(in, "bandwidth allocation mode not enabled\n"); - goto unlock; + goto put_sw; } ret = usb4_dp_port_requested_bandwidth(in); @@ -1923,7 +1923,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_port_dbg(in, "no bandwidth request active\n"); else tb_port_warn(in, "failed to read requested bandwidth\n"); - goto unlock; + goto put_sw; } requested_bw = ret; @@ -1932,7 +1932,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tunnel = tb_find_tunnel(tb, TB_TUNNEL_DP, in, NULL); if (!tunnel) { tb_port_warn(in, "failed to find tunnel\n"); - goto unlock; + goto put_sw; } out = tunnel->dst_port; @@ -1959,6 +1959,8 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_recalc_estimated_bandwidth(tb); } +put_sw: + tb_switch_put(sw); unlock: mutex_unlock(&tb->lock); -- cgit From 39fef15b5f2db46619393f9fd20fdd26a2ff49ef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 17:50:03 -0700 Subject: Documentation: embargoed-hardware-issues.rst: Clarify prenotifaction There has been a repeated misunderstanding about what the hardware embargo list is for. Clarify the language in the process so that it is clear that only fixes are coordinated. There is explicitly no prenotification process. The list members are also expected to keep total radio silence during embargoes. Cc: Thomas Gleixner Cc: workflows@vger.kernel.org Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20231004004959.work.258-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index ac7c52f130c9..31000f075707 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -25,15 +25,15 @@ Contact The Linux kernel hardware security team is separate from the regular Linux kernel security team. -The team only handles the coordination of embargoed hardware security -issues. Reports of pure software security bugs in the Linux kernel are not +The team only handles developing fixes for embargoed hardware security +issues. Reports of pure software security bugs in the Linux kernel are not handled by this team and the reporter will be guided to contact the regular Linux kernel security team (:ref:`Documentation/admin-guide/ `) instead. The team can be contacted by email at . This -is a private list of security officers who will help you to coordinate an -issue according to our documented process. +is a private list of security officers who will help you to coordinate a +fix according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -132,11 +132,11 @@ other hardware could be affected. The hardware security team will provide an incident-specific encrypted mailing-list which will be used for initial discussion with the reporter, -further disclosure and coordination. +further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of developers (domain experts) who should be informed initially about the -issue after confirming with the developers that they will adhere to this +issue after confirming with the developers that they will adhere to this Memorandum of Understanding and the documented process. These developers form the initial response team and will be responsible for handling the issue after initial contact. The hardware security team is supporting the @@ -209,13 +209,18 @@ five work days this is taken as silent acknowledgement. After acknowledgement or resolution of an objection the expert is disclosed by the incident team and brought into the development process. +List participants may not communicate about the issue outside of the +private mailing list. List participants may not use any shared resources +(e.g. employer build farms, CI systems, etc) when working on patches. + Coordinated release """"""""""""""""""" The involved parties will negotiate the date and time where the embargo ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. +relevant kernel trees and published. There is no pre-notification process: +fixes are published in public and available to everyone at the same time. While we understand that hardware security issues need coordinated embargo time, the embargo time should be constrained to the minimum time which is -- cgit From 53604854c6f00ce1db4393499125aff94e3d9bbd Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Fri, 29 Sep 2023 08:13:26 -0700 Subject: firmware_loader: Update contact emails for ABI docs Update the firmware_loader documentation and corresponding section in the MAINTAINERs file with a new email address. Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20230929151326.311959-1-russell.h.weight@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-firmware | 14 +++++++------- MAINTAINERS | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware b/Documentation/ABI/testing/sysfs-class-firmware index 978d3d500400..fba87a55f3ca 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware +++ b/Documentation/ABI/testing/sysfs-class-firmware @@ -1,7 +1,7 @@ What: /sys/class/firmware/.../data Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: The data sysfs file is used for firmware-fallback and for firmware uploads. Cat a firmware image to this sysfs file after you echo 1 to the loading sysfs file. When the firmware @@ -13,7 +13,7 @@ Description: The data sysfs file is used for firmware-fallback and for What: /sys/class/firmware/.../cancel Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Write-only. For firmware uploads, write a "1" to this file to request that the transfer of firmware data to the lower-level device be canceled. This request will be rejected (EBUSY) if @@ -23,7 +23,7 @@ Description: Write-only. For firmware uploads, write a "1" to this file to What: /sys/class/firmware/.../error Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. Returns a string describing a failed firmware upload. This string will be in the form of :, where will be one of the status strings described @@ -37,7 +37,7 @@ Description: Read-only. Returns a string describing a failed firmware What: /sys/class/firmware/.../loading Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: The loading sysfs file is used for both firmware-fallback and for firmware uploads. Echo 1 onto the loading file to indicate you are writing a firmware file to the data sysfs node. Echo @@ -49,7 +49,7 @@ Description: The loading sysfs file is used for both firmware-fallback and What: /sys/class/firmware/.../remaining_size Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. For firmware upload, this file contains the size of the firmware data that remains to be transferred to the lower-level device driver. The size value is initialized to @@ -62,7 +62,7 @@ Description: Read-only. For firmware upload, this file contains the size What: /sys/class/firmware/.../status Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. Returns a string describing the current status of a firmware upload. The string will be one of the following: idle, "receiving", "preparing", "transferring", "programming". @@ -70,7 +70,7 @@ Description: Read-only. Returns a string describing the current status of What: /sys/class/firmware/.../timeout Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: This file supports the timeout mechanism for firmware fallback. This file has no affect on firmware uploads. For more information on timeouts please see the documentation diff --git a/MAINTAINERS b/MAINTAINERS index 35977b269d5e..3fcbd6b702a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8093,7 +8093,7 @@ F: include/linux/arm_ffa.h FIRMWARE LOADER (request_firmware) M: Luis Chamberlain -M: Russ Weight +M: Russ Weight L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/firmware_class/ -- cgit From 1146bec0ca34375f963f79f5c4e6b49ed5386aaa Mon Sep 17 00:00:00 2001 From: Irui Wang Date: Tue, 26 Sep 2023 18:19:08 +0800 Subject: media: mediatek: vcodec: Fix encoder access NULL pointer Need to set the private data with encoder device, or will access NULL pointer in encoder handler. Fixes: 1972e32431ed ("media: mediatek: vcodec: Fix possible invalid memory access for encoder") Signed-off-by: Irui Wang Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hans Verkuil --- drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c index d299cc2962a5..ae6290d28f8e 100644 --- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c +++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c @@ -138,7 +138,8 @@ int vpu_enc_init(struct venc_vpu_inst *vpu) vpu->ctx->vpu_inst = vpu; status = mtk_vcodec_fw_ipi_register(vpu->ctx->dev->fw_handler, vpu->id, - vpu_enc_ipi_handler, "venc", NULL); + vpu_enc_ipi_handler, "venc", + vpu->ctx->dev); if (status) { mtk_venc_err(vpu->ctx, "vpu_ipi_register fail %d", status); -- cgit From b2b000069a4c307b09548dc2243f31f3ca0eac9c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 29 Sep 2023 13:42:25 -0700 Subject: net: mana: Fix TX CQE error handling For an unknown TX CQE error type (probably from a newer hardware), still free the SKB, update the queue tail, etc., otherwise the accounting will be wrong. Also, TX errors can be triggered by injecting corrupted packets, so replace the WARN_ONCE to ratelimited error logging. Cc: stable@vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Reviewed-by: Simon Horman Reviewed-by: Shradha Gupta Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 4a16ebff3d1d..5cdcf7561b38 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1317,19 +1317,23 @@ static void mana_poll_tx_cq(struct mana_cq *cq) case CQE_TX_VPORT_IDX_OUT_OF_RANGE: case CQE_TX_VPORT_DISABLED: case CQE_TX_VLAN_TAGGING_VIOLATION: - WARN_ONCE(1, "TX: CQE error %d: ignored.\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: CQE error %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_err++; break; default: - /* If the CQE type is unexpected, log an error, assert, - * and go through the error path. + /* If the CQE type is unknown, log an error, + * and still free the SKB, update tail, etc. */ - WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: unknown CQE type %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_unknown_type++; - return; + break; } if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) -- cgit From 7a54de92657455210d0ca71d4176b553952c871a Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 29 Sep 2023 13:42:26 -0700 Subject: net: mana: Fix the tso_bytes calculation sizeof(struct hop_jumbo_hdr) is not part of tso_bytes, so remove the subtraction from header size. Cc: stable@vger.kernel.org Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting") Signed-off-by: Haiyang Zhang Reviewed-by: Simon Horman Reviewed-by: Shradha Gupta Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 5cdcf7561b38..86e724c3eb89 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -264,8 +264,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ihs = skb_transport_offset(skb) + sizeof(struct udphdr); } else { ihs = skb_tcp_all_headers(skb); - if (ipv6_has_hopopt_jumbo(skb)) - ihs -= sizeof(struct hop_jumbo_hdr); } u64_stats_update_begin(&tx_stats->syncp); -- cgit From a43e8e9ffa0d1de058964edf1a0622cbb7e27cfe Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 29 Sep 2023 13:42:27 -0700 Subject: net: mana: Fix oversized sge0 for GSO packets Handle the case when GSO SKB linear length is too large. MANA NIC requires GSO packets to put only the header part to SGE0, otherwise the TX queue may stop at the HW level. So, use 2 SGEs for the skb linear part which contains more than the packet header. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Reviewed-by: Simon Horman Reviewed-by: Shradha Gupta Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 191 ++++++++++++++++++-------- include/net/mana/mana.h | 5 +- 2 files changed, 138 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 86e724c3eb89..48ea4aeeea5d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -91,63 +91,137 @@ static unsigned int mana_checksum_info(struct sk_buff *skb) return 0; } +static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash, + int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey) +{ + ash->dma_handle[sg_i] = da; + ash->size[sg_i] = sge_len; + + tp->wqe_req.sgl[sg_i].address = da; + tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey; + tp->wqe_req.sgl[sg_i].size = sge_len; +} + static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, - struct mana_tx_package *tp) + struct mana_tx_package *tp, int gso_hs) { struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + int hsg = 1; /* num of SGEs of linear part */ struct gdma_dev *gd = apc->ac->gdma_dev; + int skb_hlen = skb_headlen(skb); + int sge0_len, sge1_len = 0; struct gdma_context *gc; struct device *dev; skb_frag_t *frag; dma_addr_t da; + int sg_i; int i; gc = gd->gdma_context; dev = gc->dev; - da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); + if (gso_hs && gso_hs < skb_hlen) { + sge0_len = gso_hs; + sge1_len = skb_hlen - gso_hs; + } else { + sge0_len = skb_hlen; + } + + da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE); if (dma_mapping_error(dev, da)) return -ENOMEM; - ash->dma_handle[0] = da; - ash->size[0] = skb_headlen(skb); + mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey); - tp->wqe_req.sgl[0].address = ash->dma_handle[0]; - tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; - tp->wqe_req.sgl[0].size = ash->size[0]; + if (sge1_len) { + sg_i = 1; + da = dma_map_single(dev, skb->data + sge0_len, sge1_len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, da)) + goto frag_err; + + mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey); + hsg = 2; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + sg_i = hsg + i; + frag = &skb_shinfo(skb)->frags[i]; da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); - if (dma_mapping_error(dev, da)) goto frag_err; - ash->dma_handle[i + 1] = da; - ash->size[i + 1] = skb_frag_size(frag); - - tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; - tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; - tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; + mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag), + gd->gpa_mkey); } return 0; frag_err: - for (i = i - 1; i >= 0; i--) - dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], + for (i = sg_i - 1; i >= hsg; i--) + dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); - dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + for (i = hsg - 1; i >= 0; i--) + dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); return -ENOMEM; } +/* Handle the case when GSO SKB linear length is too large. + * MANA NIC requires GSO packets to put only the packet header to SGE0. + * So, we need 2 SGEs for the skb linear part which contains more than the + * header. + * Return a positive value for the number of SGEs, or a negative value + * for an error. + */ +static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb, + int gso_hs) +{ + int num_sge = 1 + skb_shinfo(skb)->nr_frags; + int skb_hlen = skb_headlen(skb); + + if (gso_hs < skb_hlen) { + num_sge++; + } else if (gso_hs > skb_hlen) { + if (net_ratelimit()) + netdev_err(ndev, + "TX nonlinear head: hs:%d, skb_hlen:%d\n", + gso_hs, skb_hlen); + + return -EINVAL; + } + + return num_sge; +} + +/* Get the GSO packet's header size */ +static int mana_get_gso_hs(struct sk_buff *skb) +{ + int gso_hs; + + if (skb->encapsulation) { + gso_hs = skb_inner_tcp_all_headers(skb); + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + gso_hs = skb_transport_offset(skb) + + sizeof(struct udphdr); + } else { + gso_hs = skb_tcp_all_headers(skb); + } + } + + return gso_hs; +} + netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) { enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; struct mana_port_context *apc = netdev_priv(ndev); + int gso_hs = 0; /* zero for non-GSO pkts */ u16 txq_idx = skb_get_queue_mapping(skb); struct gdma_dev *gd = apc->ac->gdma_dev; bool ipv4 = false, ipv6 = false; @@ -159,7 +233,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct mana_txq *txq; struct mana_cq *cq; int err, len; - u16 ihs; if (unlikely(!apc->port_is_up)) goto tx_drop; @@ -209,19 +282,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) pkg.wqe_req.client_data_unit = 0; pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); - - if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { - pkg.wqe_req.sgl = pkg.sgl_array; - } else { - pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, - sizeof(struct gdma_sge), - GFP_ATOMIC); - if (!pkg.sgl_ptr) - goto tx_drop_count; - - pkg.wqe_req.sgl = pkg.sgl_ptr; - } if (skb->protocol == htons(ETH_P_IP)) ipv4 = true; @@ -229,6 +289,26 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ipv6 = true; if (skb_is_gso(skb)) { + int num_sge; + + gso_hs = mana_get_gso_hs(skb); + + num_sge = mana_fix_skb_head(ndev, skb, gso_hs); + if (num_sge > 0) + pkg.wqe_req.num_sge = num_sge; + else + goto tx_drop_count; + + u64_stats_update_begin(&tx_stats->syncp); + if (skb->encapsulation) { + tx_stats->tso_inner_packets++; + tx_stats->tso_inner_bytes += skb->len - gso_hs; + } else { + tx_stats->tso_packets++; + tx_stats->tso_bytes += skb->len - gso_hs; + } + u64_stats_update_end(&tx_stats->syncp); + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; @@ -252,26 +332,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } - - if (skb->encapsulation) { - ihs = skb_inner_tcp_all_headers(skb); - u64_stats_update_begin(&tx_stats->syncp); - tx_stats->tso_inner_packets++; - tx_stats->tso_inner_bytes += skb->len - ihs; - u64_stats_update_end(&tx_stats->syncp); - } else { - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { - ihs = skb_transport_offset(skb) + sizeof(struct udphdr); - } else { - ihs = skb_tcp_all_headers(skb); - } - - u64_stats_update_begin(&tx_stats->syncp); - tx_stats->tso_packets++; - tx_stats->tso_bytes += skb->len - ihs; - u64_stats_update_end(&tx_stats->syncp); - } - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { csum_type = mana_checksum_info(skb); @@ -294,11 +354,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) } else { /* Can't do offload of this type of checksum */ if (skb_checksum_help(skb)) - goto free_sgl_ptr; + goto tx_drop_count; } } - if (mana_map_skb(skb, apc, &pkg)) { + WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); + + if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { + pkg.wqe_req.sgl = pkg.sgl_array; + } else { + pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, + sizeof(struct gdma_sge), + GFP_ATOMIC); + if (!pkg.sgl_ptr) + goto tx_drop_count; + + pkg.wqe_req.sgl = pkg.sgl_ptr; + } + + if (mana_map_skb(skb, apc, &pkg, gso_hs)) { u64_stats_update_begin(&tx_stats->syncp); tx_stats->mana_map_err++; u64_stats_update_end(&tx_stats->syncp); @@ -1256,11 +1330,16 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; struct device *dev = gc->dev; - int i; + int hsg, i; + + /* Number of SGEs of linear part */ + hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1; - dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + for (i = 0; i < hsg; i++) + dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); - for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) + for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); } diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9f70b4332238..4d43adf18606 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -103,9 +103,10 @@ struct mana_txq { /* skb data and frags dma mappings */ struct mana_skb_head { - dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + /* GSO pkts may have 2 SGEs for the linear part*/ + dma_addr_t dma_handle[MAX_SKB_FRAGS + 2]; - u32 size[MAX_SKB_FRAGS + 1]; + u32 size[MAX_SKB_FRAGS + 2]; }; #define MANA_HEADROOM sizeof(struct mana_skb_head) -- cgit From 1aa3aaf8953c84bad398adf6c3cabc9d6685bf7d Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 22 Sep 2023 17:51:37 +0000 Subject: binder: fix memory leaks of spam and pending work A transaction complete work is allocated and queued for each transaction. Under certain conditions the work->type might be marked as BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT to notify userspace about potential spamming threads or as BINDER_WORK_TRANSACTION_PENDING when the target is currently frozen. However, these work types are not being handled in binder_release_work() so they will leak during a cleanup. This was reported by syzkaller with the following kmemleak dump: BUG: memory leak unreferenced object 0xffff88810e2d6de0 (size 32): comm "syz-executor338", pid 5046, jiffies 4294968230 (age 13.590s) hex dump (first 32 bytes): e0 6d 2d 0e 81 88 ff ff e0 6d 2d 0e 81 88 ff ff .m-......m-..... 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_trace+0x25/0x90 mm/slab_common.c:1114 [] kmalloc include/linux/slab.h:599 [inline] [] kzalloc include/linux/slab.h:720 [inline] [] binder_transaction+0x573/0x4050 drivers/android/binder.c:3152 [] binder_thread_write+0x6b5/0x1860 drivers/android/binder.c:4010 [] binder_ioctl_write_read drivers/android/binder.c:5066 [inline] [] binder_ioctl+0x1b2c/0x3cf0 drivers/android/binder.c:5352 [] vfs_ioctl fs/ioctl.c:51 [inline] [] __do_sys_ioctl fs/ioctl.c:871 [inline] [] __se_sys_ioctl fs/ioctl.c:857 [inline] [] __x64_sys_ioctl+0xf2/0x140 fs/ioctl.c:857 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix the leaks by kfreeing these work types in binder_release_work() and handle them as a BINDER_WORK_TRANSACTION_COMPLETE cleanup. Cc: stable@vger.kernel.org Fixes: 0567461a7a6e ("binder: return pending info for frozen async txns") Fixes: a7dc1e6f99df ("binder: tell userspace to dump current backtrace when detected oneway spamming") Reported-by: syzbot+7f10c1653e35933c0f1e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7f10c1653e35933c0f1e Suggested-by: Alice Ryhl Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20230922175138.230331-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 367afac5f1bf..92128aae2d06 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4812,6 +4812,8 @@ static void binder_release_work(struct binder_proc *proc, "undelivered TRANSACTION_ERROR: %u\n", e->cmd); } break; + case BINDER_WORK_TRANSACTION_PENDING: + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); -- cgit From 1437e4547edf41689d7135faaca4222ef0081bc1 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 17 Sep 2023 16:18:43 +0000 Subject: HID: multitouch: Add required quirk for Synaptics 0xcd7e device Register the Synaptics device as a special multitouch device with certain quirks that may improve usability of the touchpad device. Reported-by: Rain Closes: https://lore.kernel.org/linux-input/2bbb8e1d-1793-4df1-810f-cb0137341ff4@app.fastmail.com/ Signed-off-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 521b2ffb4244..8db4ae05febc 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2144,6 +2144,10 @@ static const struct hid_device_id mt_devices[] = { USB_DEVICE_ID_MTP_STM)}, /* Synaptics devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, -- cgit From b07b6b27a50e3a740c9aa6260ee4bb3ab29515ab Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 18 Sep 2023 04:54:30 -0700 Subject: HID: nvidia-shield: Fix a missing led_classdev_unregister() in the probe error handling path The commit in Fixes updated the error handling path of thunderstrike_create() and the remove function but not the error handling path of shield_probe(), should an error occur after a successful thunderstrike_create() call. Add the missing call. Make sure it is safe to call in the probe error handling path by preventing the led_classdev from attempting to set the LED brightness to the off state on unregister. Fixes: f88af60e74a5 ("HID: nvidia-shield: Support LED functionality for Thunderstrike") Signed-off-by: Christophe JAILLET Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-nvidia-shield.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 9a3576dbf421..c65a88dc3751 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -801,7 +801,7 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); led->max_brightness = 1; - led->flags = LED_CORE_SUSPENDRESUME; + led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; led->brightness_set = &thunderstrike_led_set_brightness; @@ -1076,6 +1076,7 @@ err_stop: err_haptics: if (ts->haptics_dev) input_unregister_device(ts->haptics_dev); + led_classdev_unregister(&ts->led_dev); return ret; } -- cgit From aa80f391e97a3fa5ca6bd822047950aa0584f6bf Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 18 Sep 2023 04:54:31 -0700 Subject: HID: nvidia-shield: Fix some missing function calls() in the probe error handling path The commit in Fixes updated the error handling path of thunderstrike_create() and the remove function but not the error handling path of shield_probe(), should an error occur after a successful thunderstrike_create() call. Add the missing calls. Fixes: 3ab196f88237 ("HID: nvidia-shield: Add battery support for Thunderstrike") Signed-off-by: Christophe JAILLET Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-nvidia-shield.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index c65a88dc3751..c463e54decbc 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -1058,7 +1058,7 @@ static int shield_probe(struct hid_device *hdev, const struct hid_device_id *id) ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT); if (ret) { hid_err(hdev, "Failed to start HID device\n"); - goto err_haptics; + goto err_ts_create; } ret = hid_hw_open(hdev); @@ -1073,10 +1073,12 @@ static int shield_probe(struct hid_device *hdev, const struct hid_device_id *id) err_stop: hid_hw_stop(hdev); -err_haptics: +err_ts_create: + power_supply_unregister(ts->base.battery_dev.psy); if (ts->haptics_dev) input_unregister_device(ts->haptics_dev); led_classdev_unregister(&ts->led_dev); + ida_free(&thunderstrike_ida, ts->id); return ret; } -- cgit From 95ea4d9fd385fe335b989f22d409df079a042b7a Mon Sep 17 00:00:00 2001 From: Martino Fontana Date: Sun, 24 Sep 2023 16:06:01 +0200 Subject: HID: nintendo: reinitialize USB Pro Controller after resuming from suspend When suspending the computer, a Switch Pro Controller connected via USB will lose its internal status. However, because the USB connection was technically never lost, when resuming the computer, the driver will attempt to communicate with the controller as if nothing happened (and fail). Because of this, the user was forced to manually disconnect the controller (or to press the sync button on the controller to power it off), so that it can be re-initialized. With this patch, the controller will be automatically re-initialized after resuming from suspend. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216233 Signed-off-by: Martino Fontana Reviewed-by: Daniel J. Ogorchock Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 175 ++++++++++++++++++++++++++------------------- 1 file changed, 103 insertions(+), 72 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 250f5d2f888a..10468f727e5b 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -2088,7 +2088,9 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) struct joycon_input_report *report; req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; + mutex_lock(&ctlr->output_mutex); ret = joycon_send_subcmd(ctlr, &req, 0, HZ); + mutex_unlock(&ctlr->output_mutex); if (ret) { hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); return ret; @@ -2117,6 +2119,85 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) return 0; } +static int joycon_init(struct hid_device *hdev) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + int ret = 0; + + mutex_lock(&ctlr->output_mutex); + /* if handshake command fails, assume ble pro controller */ + if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && + !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { + hid_dbg(hdev, "detected USB controller\n"); + /* set baudrate for improved latency */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); + if (ret) { + hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); + goto out_unlock; + } + /* handshake */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); + if (ret) { + hid_err(hdev, "Failed handshake; ret=%d\n", ret); + goto out_unlock; + } + /* + * Set no timeout (to keep controller in USB mode). + * This doesn't send a response, so ignore the timeout. + */ + joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); + } else if (jc_type_is_chrggrip(ctlr)) { + hid_err(hdev, "Failed charging grip handshake\n"); + ret = -ETIMEDOUT; + goto out_unlock; + } + + /* get controller calibration data, and parse it */ + ret = joycon_request_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Analog stick positions may be inaccurate\n"); + } + + /* get IMU calibration data, and parse it */ + ret = joycon_request_imu_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Unable to read IMU calibration data\n"); + } + + /* Set the reporting mode to 0x30, which is the full report mode */ + ret = joycon_set_report_mode(ctlr); + if (ret) { + hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); + goto out_unlock; + } + + /* Enable rumble */ + ret = joycon_enable_rumble(ctlr); + if (ret) { + hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); + goto out_unlock; + } + + /* Enable the IMU */ + ret = joycon_enable_imu(ctlr); + if (ret) { + hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); + goto out_unlock; + } + +out_unlock: + mutex_unlock(&ctlr->output_mutex); + return ret; +} + /* Common handler for parsing inputs */ static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, int size) @@ -2248,85 +2329,19 @@ static int nintendo_hid_probe(struct hid_device *hdev, hid_device_io_start(hdev); - /* Initialize the controller */ - mutex_lock(&ctlr->output_mutex); - /* if handshake command fails, assume ble pro controller */ - if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && - !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { - hid_dbg(hdev, "detected USB controller\n"); - /* set baudrate for improved latency */ - ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); - if (ret) { - hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); - goto err_mutex; - } - /* handshake */ - ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); - if (ret) { - hid_err(hdev, "Failed handshake; ret=%d\n", ret); - goto err_mutex; - } - /* - * Set no timeout (to keep controller in USB mode). - * This doesn't send a response, so ignore the timeout. - */ - joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); - } else if (jc_type_is_chrggrip(ctlr)) { - hid_err(hdev, "Failed charging grip handshake\n"); - ret = -ETIMEDOUT; - goto err_mutex; - } - - /* get controller calibration data, and parse it */ - ret = joycon_request_calibration(ctlr); + ret = joycon_init(hdev); if (ret) { - /* - * We can function with default calibration, but it may be - * inaccurate. Provide a warning, and continue on. - */ - hid_warn(hdev, "Analog stick positions may be inaccurate\n"); - } - - /* get IMU calibration data, and parse it */ - ret = joycon_request_imu_calibration(ctlr); - if (ret) { - /* - * We can function with default calibration, but it may be - * inaccurate. Provide a warning, and continue on. - */ - hid_warn(hdev, "Unable to read IMU calibration data\n"); - } - - /* Set the reporting mode to 0x30, which is the full report mode */ - ret = joycon_set_report_mode(ctlr); - if (ret) { - hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); - goto err_mutex; - } - - /* Enable rumble */ - ret = joycon_enable_rumble(ctlr); - if (ret) { - hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); - goto err_mutex; - } - - /* Enable the IMU */ - ret = joycon_enable_imu(ctlr); - if (ret) { - hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); - goto err_mutex; + hid_err(hdev, "Failed to initialize controller; ret=%d\n", ret); + goto err_close; } ret = joycon_read_info(ctlr); if (ret) { hid_err(hdev, "Failed to retrieve controller info; ret=%d\n", ret); - goto err_mutex; + goto err_close; } - mutex_unlock(&ctlr->output_mutex); - /* Initialize the leds */ ret = joycon_leds_create(ctlr); if (ret) { @@ -2352,8 +2367,6 @@ static int nintendo_hid_probe(struct hid_device *hdev, hid_dbg(hdev, "probe - success\n"); return 0; -err_mutex: - mutex_unlock(&ctlr->output_mutex); err_close: hid_hw_close(hdev); err_stop: @@ -2383,6 +2396,20 @@ static void nintendo_hid_remove(struct hid_device *hdev) hid_hw_stop(hdev); } +#ifdef CONFIG_PM + +static int nintendo_hid_resume(struct hid_device *hdev) +{ + int ret = joycon_init(hdev); + + if (ret) + hid_err(hdev, "Failed to restore controller after resume"); + + return ret; +} + +#endif + static const struct hid_device_id nintendo_hid_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_PROCON) }, @@ -2404,6 +2431,10 @@ static struct hid_driver nintendo_hid_driver = { .probe = nintendo_hid_probe, .remove = nintendo_hid_remove, .raw_event = nintendo_hid_event, + +#ifdef CONFIG_PM + .resume = nintendo_hid_resume, +#endif }; module_hid_driver(nintendo_hid_driver); -- cgit From b328dd02e19cb9d3b35de4322f5363516a20ac8c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 4 Oct 2023 21:10:41 +0200 Subject: HID: sony: remove duplicate NULL check before calling usb_free_urb() usb_free_urb() does the NULL check itself, so there is no need to duplicate it prior to calling. Reported-by: kernel test robot Fixes: e1cd4004cde7c9 ("HID: sony: Fix a potential memory leak in sony_probe()") Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index a02046a78b2d..ebc0aa4e4345 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2155,8 +2155,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) return ret; err: - if (sc->ghl_urb) - usb_free_urb(sc->ghl_urb); + usb_free_urb(sc->ghl_urb); hid_hw_stop(hdev); return ret; -- cgit From 8f02139ad9a7e6e5c05712f8c1501eebed8eacfd Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 3 Oct 2023 08:53:32 -0700 Subject: HID: intel-ish-hid: ipc: Disable and reenable ACPI GPE bit The EHL (Elkhart Lake) based platforms provide a OOB (Out of band) service, which allows to wakup device when the system is in S5 (Soft-Off state). This OOB service can be enabled/disabled from BIOS settings. When enabled, the ISH device gets PME wake capability. To enable PME wakeup, driver also needs to enable ACPI GPE bit. On resume, BIOS will clear the wakeup bit. So driver need to re-enable it in resume function to keep the next wakeup capability. But this BIOS clearing of wakeup bit doesn't decrement internal OS GPE reference count, so this reenabling on every resume will cause reference count to overflow. So first disable and reenable ACPI GPE bit using acpi_disable_gpe(). Fixes: 2e23a70edabe ("HID: intel-ish-hid: ipc: finish power flow for EHL OOB") Reported-by: Kai-Heng Feng Closes: https://lore.kernel.org/lkml/CAAd53p4=oLYiH2YbVSmrPNj1zpMcfp=Wxbasb5vhMXOWCArLCg@mail.gmail.com/T/ Tested-by: Kai-Heng Feng Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/pci-ish.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 55cb25038e63..710fda5f19e1 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -133,6 +133,14 @@ static int enable_gpe(struct device *dev) } wakeup = &adev->wakeup; + /* + * Call acpi_disable_gpe(), so that reference count + * gpe_event_info->runtime_count doesn't overflow. + * When gpe_event_info->runtime_count = 0, the call + * to acpi_disable_gpe() simply return. + */ + acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); + acpi_sts = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); if (ACPI_FAILURE(acpi_sts)) { dev_err(dev, "enable ose_gpe failed\n"); -- cgit From b009aa38a380becd98cc4e01c9b7626a11cb4905 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Tue, 3 Oct 2023 21:07:00 +0200 Subject: HID: Add quirk to ignore the touchscreen battery on HP ENVY 15-eu0556ng Like various other devices using similar hardware, this model reports a perpetually empty battery (0-1%). Join the others and apply HID_BATTERY_QUIRK_IGNORE. Signed-off-by: Fabian Vogt Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7e499992a793..e4d2dfd5d253 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -425,6 +425,7 @@ #define I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100 0x29F5 #define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1 0x2BED #define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2 0x2BEE +#define I2C_DEVICE_ID_HP_ENVY_X360_15_EU0556NG 0x2D02 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 0235cc1690a1..c8b20d44b147 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -409,6 +409,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15_EU0556NG), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- cgit From fcdfc462881d8acf9db77f483b2c821e286ca97b Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 2 Oct 2023 16:08:05 +0200 Subject: net: ethernet: mediatek: disable irq before schedule napi While searching for possible refactor of napi_schedule_prep and __napi_schedule it was notice that the mtk eth driver disable the interrupt for rx and tx AFTER napi is scheduled. While this is a very hard to repro case it might happen to have situation where the interrupt is disabled and never enabled again as the napi completes and the interrupt is enabled before. This is caused by the fact that a napi driven by interrupt expect a logic with: 1. interrupt received. napi prepared -> interrupt disabled -> napi scheduled 2. napi triggered. ring cleared -> interrupt enabled -> wait for new interrupt To prevent this case, disable the interrupt BEFORE the napi is scheduled. Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20231002140805.568-1-ansuelsmth@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3cffd1bd3067..20afe79f380a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3171,8 +3171,8 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) eth->rx_events++; if (likely(napi_schedule_prep(ð->rx_napi))) { - __napi_schedule(ð->rx_napi); mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask); + __napi_schedule(ð->rx_napi); } return IRQ_HANDLED; @@ -3184,8 +3184,8 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) eth->tx_events++; if (likely(napi_schedule_prep(ð->tx_napi))) { - __napi_schedule(ð->tx_napi); mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); + __napi_schedule(ð->tx_napi); } return IRQ_HANDLED; -- cgit From c1ec4b450ab729e30d043e927fdfcc9f764f61b7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 4 Oct 2023 23:08:56 +0800 Subject: soc: renesas: Make ARCH_R9A07G043 (riscv version) depend on NONPORTABLE Drew found "CONFIG_DMA_GLOBAL_POOL=y causes ADMA buffer alloc to fail" the log looks like: mmc0: Unable to allocate ADMA buffers - falling back to standard DMA The logic is: generic riscv defconfig selects ARCH_RENESAS then ARCH_R9A07G043 which selects DMA_GLOBAL_POOL, which assumes all non-dma-coherent riscv platforms have a dma global pool, this assumption seems not correct. And I believe DMA_GLOBAL_POOL should not be selected by ARCH_SOCFAMILIY, instead, only ARCH under some specific conditions can select it globaly, for example NOMMU ARM and so on, because it's designed for special cases such as "nommu cases where non-cacheable memory lives in a fixed place in the physical address map" as pointed out by Robin. Fix the issue by making ARCH_R9A07G043 (riscv version) depend on NONPORTABLE, thus generic defconfig won't select ARCH_R9A07G043 by default. And even for random config case, there will be less debug effort once we see NONPORTABLE is enabled. Reported-by: Drew Fustini Closes: https://lore.kernel.org/linux-riscv/ZRuamJuShOnvP1pr@x1/ Fixes: 484861e09f3e ("soc: renesas: Kconfig: Select the required configs for RZ/Five SoC") Signed-off-by: Jisheng Zhang Reviewed-by: Robin Murphy Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Tested-by: Drew Fustini Link: https://lore.kernel.org/r/20231004150856.2540-1-jszhang@kernel.org Signed-off-by: Geert Uytterhoeven --- drivers/soc/renesas/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig index 5a75ab64d1ed..12040ce116a5 100644 --- a/drivers/soc/renesas/Kconfig +++ b/drivers/soc/renesas/Kconfig @@ -333,6 +333,7 @@ if RISCV config ARCH_R9A07G043 bool "RISC-V Platform support for RZ/Five" + depends on NONPORTABLE select ARCH_RZG2L select AX45MP_L2_CACHE if RISCV_DMA_NONCOHERENT select DMA_GLOBAL_POOL -- cgit From 566aeed6871ac2189b5bfe03e1a5b3b7be5eca38 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Oct 2023 12:35:44 -0700 Subject: net: lan743x: also select PHYLIB Since FIXED_PHY depends on PHYLIB, PHYLIB needs to be set to avoid a kconfig warning: WARNING: unmet direct dependencies detected for FIXED_PHY Depends on [n]: NETDEVICES [=y] && PHYLIB [=n] Selected by [y]: - LAN743X [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROCHIP [=y] && PCI [=y] && PTP_1588_CLOCK_OPTIONAL [=y] Fixes: 73c4d1b307ae ("net: lan743x: select FIXED_PHY") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: lore.kernel.org/r/202309261802.JPbRHwti-lkp@intel.com Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Link: https://lore.kernel.org/r/20231002193544.14529-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 329e374b9539..43ba71e82260 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -46,6 +46,7 @@ config LAN743X tristate "LAN743x support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select PHYLIB select FIXED_PHY select CRC16 select CRC32 -- cgit From 3eef8555891026628aa1cc6dbc01db86df88aa26 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Wed, 4 Oct 2023 16:33:56 +0200 Subject: net: stmmac: remove unneeded stmmac_poll_controller Using netconsole netpoll_poll_dev could be called from interrupt context, thus using disable_irq() would cause the following kernel warning with CONFIG_DEBUG_ATOMIC_SLEEP enabled: BUG: sleeping function called from invalid context at kernel/irq/manage.c:137 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 10, name: ksoftirqd/0 CPU: 0 PID: 10 Comm: ksoftirqd/0 Tainted: G W 5.15.42-00075-g816b502b2298-dirty #117 Hardware name: aml (r1) (DT) Call trace: dump_backtrace+0x0/0x270 show_stack+0x14/0x20 dump_stack_lvl+0x8c/0xac dump_stack+0x18/0x30 ___might_sleep+0x150/0x194 __might_sleep+0x64/0xbc synchronize_irq+0x8c/0x150 disable_irq+0x2c/0x40 stmmac_poll_controller+0x140/0x1a0 netpoll_poll_dev+0x6c/0x220 netpoll_send_skb+0x308/0x390 netpoll_send_udp+0x418/0x760 write_msg+0x118/0x140 [netconsole] console_unlock+0x404/0x500 vprintk_emit+0x118/0x250 dev_vprintk_emit+0x19c/0x1cc dev_printk_emit+0x90/0xa8 __dev_printk+0x78/0x9c _dev_warn+0xa4/0xbc ath10k_warn+0xe8/0xf0 [ath10k_core] ath10k_htt_txrx_compl_task+0x790/0x7fc [ath10k_core] ath10k_pci_napi_poll+0x98/0x1f4 [ath10k_pci] __napi_poll+0x58/0x1f4 net_rx_action+0x504/0x590 _stext+0x1b8/0x418 run_ksoftirqd+0x74/0xa4 smpboot_thread_fn+0x210/0x3c0 kthread+0x1fc/0x210 ret_from_fork+0x10/0x20 Since [0] .ndo_poll_controller is only needed if driver doesn't or partially use NAPI. Because stmmac does so, stmmac_poll_controller can be removed fixing the above warning. [0] commit ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional") Cc: # 5.15.x Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers") Signed-off-by: Remi Pommarel Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1c156a6d8c9170bd6a17825f2277115525b4d50f.1696429960.git.repk@triplefau.lt Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ----------------------- 1 file changed, 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 83c567a89a46..ed1a5a31a491 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6002,33 +6002,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -/* Polling receive - used by NETCONSOLE and other diagnostic tools - * to allow network I/O with interrupts disabled. - */ -static void stmmac_poll_controller(struct net_device *dev) -{ - struct stmmac_priv *priv = netdev_priv(dev); - int i; - - /* If adapter is down, do nothing */ - if (test_bit(STMMAC_DOWN, &priv->state)) - return; - - if (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN) { - for (i = 0; i < priv->plat->rx_queues_to_use; i++) - stmmac_msi_intr_rx(0, &priv->dma_conf.rx_queue[i]); - - for (i = 0; i < priv->plat->tx_queues_to_use; i++) - stmmac_msi_intr_tx(0, &priv->dma_conf.tx_queue[i]); - } else { - disable_irq(dev->irq); - stmmac_interrupt(dev->irq, dev); - enable_irq(dev->irq); - } -} -#endif - /** * stmmac_ioctl - Entry point for the Ioctl * @dev: Device pointer. @@ -6989,9 +6962,6 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_get_stats64 = stmmac_get_stats64, .ndo_setup_tc = stmmac_setup_tc, .ndo_select_queue = stmmac_select_queue, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = stmmac_poll_controller, -#endif .ndo_set_mac_address = stmmac_set_mac_address, .ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid, -- cgit From a5efdbcece83af94180e8d7c0a6e22947318499d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 4 Oct 2023 13:38:11 -0700 Subject: mptcp: fix delegated action races The delegated action infrastructure is prone to the following race: different CPUs can try to schedule different delegated actions on the same subflow at the same time. Each of them will check different bits via mptcp_subflow_delegate(), and will try to schedule the action on the related per-cpu napi instance. Depending on the timing, both can observe an empty delegated list node, causing the same entry to be added simultaneously on two different lists. The root cause is that the delegated actions infra does not provide a single synchronization point. Address the issue reserving an additional bit to mark the subflow as scheduled for delegation. Acquiring such bit guarantee the caller to own the delegated list node, and being able to safely schedule the subflow. Clear such bit only when the subflow scheduling is completed, ensuring proper barrier in place. Additionally swap the meaning of the delegated_action bitmask, to allow the usage of the existing helper to set multiple bit at once. Fixes: bcd97734318d ("mptcp: use delegate action to schedule 3rd ack retrans") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231004-send-net-20231004-v1-1-28de4ac663ae@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 28 ++++++++++++++-------------- net/mptcp/protocol.h | 35 ++++++++++++----------------------- net/mptcp/subflow.c | 10 ++++++++-- 3 files changed, 34 insertions(+), 39 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e252539b1e19..c3b83cb390d9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3425,24 +3425,21 @@ static void schedule_3rdack_retransmission(struct sock *ssk) sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); } -void mptcp_subflow_process_delegated(struct sock *ssk) +void mptcp_subflow_process_delegated(struct sock *ssk, long status) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; - if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) { + if (status & BIT(MPTCP_DELEGATE_SEND)) { mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk, true); else __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); - mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND); } - if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) { + if (status & BIT(MPTCP_DELEGATE_ACK)) schedule_3rdack_retransmission(ssk); - mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK); - } } static int mptcp_hash(struct sock *sk) @@ -3968,14 +3965,17 @@ static int mptcp_napi_poll(struct napi_struct *napi, int budget) struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bh_lock_sock_nested(ssk); - if (!sock_owned_by_user(ssk) && - mptcp_subflow_has_delegated_action(subflow)) - mptcp_subflow_process_delegated(ssk); - /* ... elsewhere tcp_release_cb_override already processed - * the action or will do at next release_sock(). - * In both case must dequeue the subflow here - on the same - * CPU that scheduled it. - */ + if (!sock_owned_by_user(ssk)) { + mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0)); + } else { + /* tcp_release_cb_override already processed + * the action or will do at next release_sock(). + * In both case must dequeue the subflow here - on the same + * CPU that scheduled it. + */ + smp_wmb(); + clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status); + } bh_unlock_sock(ssk); sock_put(ssk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ed61d6850cce..3612545fa62e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -444,9 +444,11 @@ struct mptcp_delegated_action { DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); -#define MPTCP_DELEGATE_SEND 0 -#define MPTCP_DELEGATE_ACK 1 +#define MPTCP_DELEGATE_SCHEDULED 0 +#define MPTCP_DELEGATE_SEND 1 +#define MPTCP_DELEGATE_ACK 2 +#define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ @@ -564,23 +566,24 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } -void mptcp_subflow_process_delegated(struct sock *ssk); +void mptcp_subflow_process_delegated(struct sock *ssk, long actions); static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) { + long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action); struct mptcp_delegated_action *delegated; bool schedule; /* the caller held the subflow bh socket lock */ lockdep_assert_in_softirq(); - /* The implied barrier pairs with mptcp_subflow_delegated_done(), and - * ensures the below list check sees list updates done prior to status - * bit changes + /* The implied barrier pairs with tcp_release_cb_override() + * mptcp_napi_poll(), and ensures the below list check sees list + * updates done prior to delegated status bits changes */ - if (!test_and_set_bit(action, &subflow->delegated_status)) { - /* still on delegated list from previous scheduling */ - if (!list_empty(&subflow->delegated_node)) + old = set_mask_bits(&subflow->delegated_status, 0, set_bits); + if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) { + if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) return; delegated = this_cpu_ptr(&mptcp_delegated_actions); @@ -605,20 +608,6 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) return ret; } -static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow) -{ - return !!READ_ONCE(subflow->delegated_status); -} - -static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action) -{ - /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before - * touching the status bit - */ - smp_wmb(); - clear_bit(action, &subflow->delegated_status); -} - int mptcp_is_enabled(const struct net *net); unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 918c1a235790..9c1f8d1d63d2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1956,9 +1956,15 @@ static void subflow_ulp_clone(const struct request_sock *req, static void tcp_release_cb_override(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + long status; - if (mptcp_subflow_has_delegated_action(subflow)) - mptcp_subflow_process_delegated(ssk); + /* process and clear all the pending actions, but leave the subflow into + * the napi queue. To respect locking, only the same CPU that originated + * the action can touch the list. mptcp_napi_poll will take care of it. + */ + status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0); + if (status) + mptcp_subflow_process_delegated(ssk, status); tcp_release_cb(ssk); } -- cgit From e5ed101a602873d65d2d64edaba93e8c73ec1b0f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 4 Oct 2023 13:38:12 -0700 Subject: mptcp: userspace pm allow creating id 0 subflow This patch drops id 0 limitation in mptcp_nl_cmd_sf_create() to allow creating additional subflows with the local addr ID 0. There is no reason not to allow additional subflows from this local address: we should be able to create new subflows from the initial endpoint. This limitation was breaking fullmesh support from userspace. Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/391 Cc: stable@vger.kernel.org Suggested-by: Matthieu Baerts Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231004-send-net-20231004-v1-2-28de4ac663ae@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b5a8aa4c1ebd..d042d32beb4d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -307,12 +307,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } - if (addr_l.id == 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); - err = -EINVAL; - goto create_err; - } - err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); -- cgit From 8eed6ee362b0099a3390f44b4b2f3be053bdbcee Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 4 Oct 2023 13:38:13 -0700 Subject: MAINTAINERS: update Matthieu's email address Use my kernel.org account instead. The other one will bounce by the end of the year. Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231004-send-net-20231004-v1-3-28de4ac663ae@kernel.org Signed-off-by: Jakub Kicinski --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index a0a6efe87186..c80903efec75 100644 --- a/.mailmap +++ b/.mailmap @@ -377,6 +377,7 @@ Matthew Wilcox Matthew Wilcox Matthew Wilcox Matthias Fuchs +Matthieu Baerts Matthieu CASTET Matti Vaittinen Matt Ranostay diff --git a/MAINTAINERS b/MAINTAINERS index 9275708c9b96..0bb5451e9b86 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14942,7 +14942,7 @@ K: macsec K: \bmdo_ NETWORKING [MPTCP] -M: Matthieu Baerts +M: Matthieu Baerts M: Mat Martineau L: netdev@vger.kernel.org L: mptcp@lists.linux.dev -- cgit From 3820c4fdc247b6f0a4162733bdb8ddf8f2e8a1e4 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 31 Jul 2023 12:37:58 +0200 Subject: nvme-rdma: do not try to stop unallocated queues Trying to stop a queue which hasn't been allocated will result in a warning due to calling mutex_lock() against an uninitialized mutex. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 Call trace: RIP: 0010:__mutex_lock+0x1173/0x14a0 nvme_rdma_stop_queue+0x1b/0xa0 [nvme_rdma] nvme_rdma_teardown_io_queues.part.0+0xb0/0x1d0 [nvme_rdma] nvme_rdma_delete_ctrl+0x50/0x100 [nvme_rdma] nvme_do_delete_ctrl+0x149/0x158 [nvme_core] Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 337a624a537c..a7fea4cbacd7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) __nvme_rdma_stop_queue(queue); -- cgit From c38d23a54445f9a8aa6831fafc9af0496ba02f9e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Oct 2023 21:17:49 +0300 Subject: RDMA/core: Require admin capabilities to set system parameters Like any other set command, require admin permissions to do it. Cc: stable@vger.kernel.org Fixes: 2b34c5580226 ("RDMA/core: Add command to set ib_core device net namspace sharing mode") Link: https://lore.kernel.org/r/75d329fdd7381b52cbdf87910bef16c9965abb1f.1696443438.git.leon@kernel.org Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index d5d3e4f0de77..6d1dbc978759 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2529,6 +2529,7 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_SET] = { .doit = nldev_stat_set_doit, -- cgit From c6fd91276d64039d068f5ceaf7ad335475bb0389 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 8 Aug 2023 08:32:23 +0200 Subject: dt-bindings: iio: rohm,bu27010: add missing vdd-supply to example Bindings require vdd-supply but the example DTS was missing one. This fixes dt_binding_check error: Documentation/devicetree/bindings/iio/light/rohm,bu27010.example.dtb: light-sensor@38: 'vdd-supply' is a required property Fixes: ce2a8c160066 ("dt-bindings: iio: ROHM BU27010 RGBC + flickering sensor") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/20230808063223.80431-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml b/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml index 8376d64a641a..bed42d5d0d94 100644 --- a/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml +++ b/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml @@ -45,5 +45,6 @@ examples: light-sensor@38 { compatible = "rohm,bu27010"; reg = <0x38>; + vdd-supply = <&vdd>; }; }; -- cgit From ea191d0fd361fb569b6c3d19e5410510aa6b6bac Mon Sep 17 00:00:00 2001 From: "GONG, Ruiqi" Date: Thu, 10 Aug 2023 11:59:10 +0800 Subject: iio: irsd200: fix -Warray-bounds bug in irsd200_trigger_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling with gcc 13 with -Warray-bounds enabled: In file included from drivers/iio/proximity/irsd200.c:15: In function ‘iio_push_to_buffers_with_timestamp’, inlined from ‘irsd200_trigger_handler’ at drivers/iio/proximity/irsd200.c:770:2: ./include/linux/iio/buffer.h:42:46: error: array subscript ‘int64_t {aka long long int}[0]’ is partly outside array bounds of ‘s16[1]’ {aka ‘short int[1]’} [-Werror=array-bounds=] 42 | ((int64_t *)data)[ts_offset] = timestamp; | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~ drivers/iio/proximity/irsd200.c: In function ‘irsd200_trigger_handler’: drivers/iio/proximity/irsd200.c:763:13: note: object ‘buf’ of size 2 763 | s16 buf = 0; | ^~~ The problem seems to be that irsd200_trigger_handler() is taking a s16 variable as an int64_t buffer. As Jonathan suggested [1], fix it by extending the buffer to a two-element array of s64. Link: https://github.com/KSPP/linux/issues/331 Link: https://lore.kernel.org/lkml/20230809181329.46c00a5d@jic23-huawei/ [1] Fixes: 3db3562bc66e ("iio: Add driver for Murata IRS-D200") Signed-off-by: GONG, Ruiqi Acked-by: Gustavo A. R. Silva Reviewed-by: Waqar Hameed Tested-by: Waqar Hameed Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230810035910.1334706-1-gongruiqi@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/irsd200.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/irsd200.c b/drivers/iio/proximity/irsd200.c index 5bd791b46d98..bdff91f6b1a3 100644 --- a/drivers/iio/proximity/irsd200.c +++ b/drivers/iio/proximity/irsd200.c @@ -759,14 +759,14 @@ static irqreturn_t irsd200_trigger_handler(int irq, void *pollf) { struct iio_dev *indio_dev = ((struct iio_poll_func *)pollf)->indio_dev; struct irsd200_data *data = iio_priv(indio_dev); - s16 buf = 0; + s64 buf[2] = {}; int ret; - ret = irsd200_read_data(data, &buf); + ret = irsd200_read_data(data, (s16 *)buf); if (ret) goto end; - iio_push_to_buffers_with_timestamp(indio_dev, &buf, + iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns(indio_dev)); end: -- cgit From 7771c8c80d62ad065637ef74ed2962983f6c5f6d Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 29 Aug 2023 11:06:22 +0800 Subject: iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data() cros_ec_sensors_push_data() reads `indio_dev->active_scan_mask` and calls iio_push_to_buffers_with_timestamp() without making sure the `indio_dev` stays in buffer mode. There is a race if `indio_dev` exits buffer mode right before cros_ec_sensors_push_data() accesses them. An use-after-free on `indio_dev->active_scan_mask` was observed. The call trace: [...] _find_next_bit cros_ec_sensors_push_data cros_ec_sensorhub_event blocking_notifier_call_chain cros_ec_irq_thread It was caused by a race condition: one thread just freed `active_scan_mask` at [1]; while another thread tried to access the memory at [2]. Fix it by calling iio_device_claim_buffer_mode() to ensure the `indio_dev` can't exit buffer mode during cros_ec_sensors_push_data(). [1]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/industrialio-buffer.c#L1189 [2]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c#L198 Cc: stable@vger.kernel.org Fixes: aa984f1ba4a4 ("iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20230829030622.1571852-1-tzungbi@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index b72d39fc2434..6bfe5d6847e7 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -190,8 +190,11 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, /* * Ignore samples if the buffer is not set: it is needed if the ODR is * set but the buffer is not enabled yet. + * + * Note: iio_device_claim_buffer_mode() returns -EBUSY if the buffer + * is not enabled. */ - if (!iio_buffer_enabled(indio_dev)) + if (iio_device_claim_buffer_mode(indio_dev) < 0) return 0; out = (s16 *)st->samples; @@ -210,6 +213,7 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, iio_push_to_buffers_with_timestamp(indio_dev, st->samples, timestamp + delta); + iio_device_release_buffer_mode(indio_dev); return 0; } EXPORT_SYMBOL_GPL(cros_ec_sensors_push_data); -- cgit From 850101b3598277794f92a9e363a60a66e0d42890 Mon Sep 17 00:00:00 2001 From: Philipp Rossak Date: Tue, 5 Sep 2023 00:02:04 +0200 Subject: iio: adc: imx8qxp: Fix address for command buffer registers The ADC Command Buffer Register high and low are currently pointing to the wrong address and makes it impossible to perform correct ADC measurements over all channels. According to the datasheet of the imx8qxp the ADC_CMDL register starts at address 0x100 and the ADC_CMDH register starts at address 0x104. This bug seems to be in the kernel since the introduction of this driver. This can be observed by checking all raw voltages of the adc and they are all nearly identical: cat /sys/bus/iio/devices/iio\:device0/in_voltage*_raw 3498 3494 3491 3491 3489 3490 3490 3490 Fixes: 1e23dcaa1a9fa ("iio: imx8qxp-adc: Add driver support for NXP IMX8QXP ADC") Signed-off-by: Philipp Rossak Acked-by: Haibo Chen Link: https://lore.kernel.org/r/20230904220204.23841-1-embed3d@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/imx8qxp-adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/imx8qxp-adc.c b/drivers/iio/adc/imx8qxp-adc.c index f5a0fc9e64c5..fff6e5a2d956 100644 --- a/drivers/iio/adc/imx8qxp-adc.c +++ b/drivers/iio/adc/imx8qxp-adc.c @@ -38,8 +38,8 @@ #define IMX8QXP_ADR_ADC_FCTRL 0x30 #define IMX8QXP_ADR_ADC_SWTRIG 0x34 #define IMX8QXP_ADR_ADC_TCTRL(tid) (0xc0 + (tid) * 4) -#define IMX8QXP_ADR_ADC_CMDH(cid) (0x100 + (cid) * 8) -#define IMX8QXP_ADR_ADC_CMDL(cid) (0x104 + (cid) * 8) +#define IMX8QXP_ADR_ADC_CMDL(cid) (0x100 + (cid) * 8) +#define IMX8QXP_ADR_ADC_CMDH(cid) (0x104 + (cid) * 8) #define IMX8QXP_ADR_ADC_RESFIFO 0x300 #define IMX8QXP_ADR_ADC_TST 0xffc -- cgit From c9b9cfe7d342683f624a89c3b617be18aff879e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 3 Sep 2023 12:30:52 +0100 Subject: iio: imu: bno055: Fix missing Kconfig dependencies This driver uses IIO triggered buffers so it needs to select them in Kconfig. on riscv-32bit: /opt/crosstool/gcc-13.2.0-nolibc/riscv32-linux/bin/riscv32-linux-ld: drivers/iio/imu/bno055/bno055.o: in function `.L367': bno055.c:(.text+0x2c96): undefined reference to `devm_iio_triggered_buffer_setup_ext' Reported-by: Randy Dunlap Closes: https://lore.kernel.org/linux-next/40566b4b-3950-81fe-ff14-871d8c447627@infradead.org/ Fixes: 4aefe1c2bd0c ("iio: imu: add Bosch Sensortec BNO055 core driver") Cc: Andrea Merello Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20230903113052.846298-1-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bno055/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index fa79b1ac4f85..83e53acfbe88 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -2,6 +2,8 @@ config BOSCH_BNO055 tristate + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" -- cgit From 901a293fd96fb9bab843ba4cc7be3094a5aa7c94 Mon Sep 17 00:00:00 2001 From: Lakshmi Yadlapati Date: Tue, 29 Aug 2023 13:02:22 -0500 Subject: iio: pressure: dps310: Adjust Timeout Settings The DPS310 sensor chip has been encountering intermittent errors while reading the sensor device across various system designs. This issue causes the chip to become "stuck," preventing the indication of "ready" status for pressure and temperature measurements in the MEAS_CFG register. To address this issue, this commit fixes the timeout settings to improve sensor stability: - After sending a reset command to the chip, the timeout has been extended from 2.5 ms to 15 ms, aligning with the DPS310 specification. - The read timeout value of the MEAS_CFG register has been adjusted from 20ms to 30ms to match the specification. Signed-off-by: Lakshmi Yadlapati Fixes: 7b4ab4abcea4 ("iio: pressure: dps310: Reset chip after timeout") Link: https://lore.kernel.org/r/20230829180222.3431926-2-lakshmiy@us.ibm.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/dps310.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index b10dbf5cf494..1ff091b2f764 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -57,8 +57,8 @@ #define DPS310_RESET_MAGIC 0x09 #define DPS310_COEF_BASE 0x10 -/* Make sure sleep time is <= 20ms for usleep_range */ -#define DPS310_POLL_SLEEP_US(t) min(20000, (t) / 8) +/* Make sure sleep time is <= 30ms for usleep_range */ +#define DPS310_POLL_SLEEP_US(t) min(30000, (t) / 8) /* Silently handle error in rate value here */ #define DPS310_POLL_TIMEOUT_US(rc) ((rc) <= 0 ? 1000000 : 1000000 / (rc)) @@ -402,8 +402,8 @@ static int dps310_reset_wait(struct dps310_data *data) if (rc) return rc; - /* Wait for device chip access: 2.5ms in specification */ - usleep_range(2500, 12000); + /* Wait for device chip access: 15ms in specification */ + usleep_range(15000, 55000); return 0; } -- cgit From b120dd3a15582fb7a959cecb05e4d9814fcba386 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 12 Sep 2023 11:54:21 +0300 Subject: iio: addac: Kconfig: update ad74413r selections Building ad74413r without selecting IIO_BUFFER and IIO_TRIGGERED_BUFFER generates error with respect to the iio trigger functions that are used within the driver. Update the Kconfig accordingly. Fixes: fea251b6a5db ("iio: addac: add AD74413R driver") Signed-off-by: Antoniu Miclaus Link: https://lore.kernel.org/r/20230912085421.51102-1-antoniu.miclaus@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/addac/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig index 877f9124803c..397544f23b85 100644 --- a/drivers/iio/addac/Kconfig +++ b/drivers/iio/addac/Kconfig @@ -24,6 +24,8 @@ config AD74413R depends on GPIOLIB && SPI select REGMAP_SPI select CRC8 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Analog Devices AD74412R/AD74413R quad-channel software configurable input/output solution. -- cgit From 7e87ab38eed09c9dec56da361d74158159ae84a3 Mon Sep 17 00:00:00 2001 From: Mårten Lindahl Date: Mon, 11 Sep 2023 14:43:01 +0200 Subject: iio: light: vcnl4000: Don't power on/off chip in config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After enabling/disabling interrupts on the vcnl4040 chip the als and/or ps sensor is powered on or off depending on the interrupt enable bits. This is made as a last step in write_event_config. But there is no reason to do this as the runtime PM handles the power state of the sensors. Interfering with this may impact sensor readings. Consider the following: 1. Userspace makes sensor data reading which triggers RPM resume (sensor powered on) and a RPM suspend timeout. The timeout is 2000ms before RPM suspend powers the sensor off if no new reading is made within the timeout period. 2. Userspace disables interrupts => powers sensor off 3. Userspace reads sensor data = 0 because sensor is off and the suspend timeout has not passed. For each new reading made within the timeout period the timeout is renewed with 2000ms and RPM will not make a new resume (device was not suspended). So the sensor will not be powered on. 4. No further userspace reading for 2000ms ends RPM suspend timeout and triggers suspend (powers off already powered off sensor). Powering sensor off in (2) makes all consecutive readings made within 2000ms to the previous reading (3) return invalid data. Skip setting power state when writing new event config. Fixes: 546676121cb9 ("iio: light: vcnl4000: Add interrupt support for vcnl4040") Fixes: bc292aaf9cb4 ("iio: light: vcnl4000: add illuminance irq vcnl4040/4200") Signed-off-by: Mårten Lindahl Link: https://lore.kernel.org/r/20230907-vcnl4000-pm-fix-v2-1-298e01f54db4@axis.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index 3a52b09c2823..fdf763a04b0b 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -1513,7 +1513,6 @@ static int vcnl4040_write_event_config(struct iio_dev *indio_dev, out: mutex_unlock(&data->vcnl4000_lock); - data->chip_spec->set_power_state(data, data->ps_int || data->als_int); return ret; } -- cgit From 7e7dcab620cd6d34939f615cac63fc0ef7e81c72 Mon Sep 17 00:00:00 2001 From: Alisa-Dariana Roman Date: Sun, 24 Sep 2023 18:21:48 +0300 Subject: iio: adc: ad7192: Correct reference voltage The avdd and the reference voltage are two different sources but the reference voltage was assigned according to the avdd supply. Add vref regulator structure and set the reference voltage according to the vref supply from the devicetree. In case vref supply is missing, reference voltage is set according to the avdd supply for compatibility with old devicetrees. Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") Signed-off-by: Alisa-Dariana Roman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230924152149.41884-1-alisadariana@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 69d1103b9508..b64fd365f83f 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -177,6 +177,7 @@ struct ad7192_chip_info { struct ad7192_state { const struct ad7192_chip_info *chip_info; struct regulator *avdd; + struct regulator *vref; struct clk *mclk; u16 int_vref_mv; u32 fclk; @@ -1008,10 +1009,30 @@ static int ad7192_probe(struct spi_device *spi) if (ret) return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n"); - ret = regulator_get_voltage(st->avdd); - if (ret < 0) { - dev_err(&spi->dev, "Device tree error, reference voltage undefined\n"); - return ret; + st->vref = devm_regulator_get_optional(&spi->dev, "vref"); + if (IS_ERR(st->vref)) { + if (PTR_ERR(st->vref) != -ENODEV) + return PTR_ERR(st->vref); + + ret = regulator_get_voltage(st->avdd); + if (ret < 0) + return dev_err_probe(&spi->dev, ret, + "Device tree error, AVdd voltage undefined\n"); + } else { + ret = regulator_enable(st->vref); + if (ret) { + dev_err(&spi->dev, "Failed to enable specified Vref supply\n"); + return ret; + } + + ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->vref); + if (ret) + return ret; + + ret = regulator_get_voltage(st->vref); + if (ret < 0) + return dev_err_probe(&spi->dev, ret, + "Device tree error, Vref voltage undefined\n"); } st->int_vref_mv = ret / 1000; -- cgit From 87b9a0e3ff31d857ed1d13637898c472964aab7d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Sep 2023 11:43:50 -0500 Subject: dt-bindings: iio: adc: adi,ad7292: Fix additionalProperties on channel nodes "additionalProperties: true" is only for incomplete schemas such as bus child nodes in a bus's schema. That doesn't apply to the "channel" nodes in the adi,ad7292 binding, so fix additionalProperties to be false. Signed-off-by: Rob Herring Acked-by: Conor Dooley Acked-by: Marcelo Schmitt Link: https://lore.kernel.org/r/20230926164357.100325-1-robh@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml index 7cc4ddc4e9b7..2aa1f4b063eb 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml @@ -61,7 +61,7 @@ patternProperties: required: - reg - additionalProperties: true + additionalProperties: false allOf: - $ref: /schemas/spi/spi-peripheral-props.yaml# -- cgit From fd39d9668f2ce9f4b05ad55e8c8d80c098073e0b Mon Sep 17 00:00:00 2001 From: Alexander Zangerl Date: Wed, 20 Sep 2023 10:01:10 +1000 Subject: iio: pressure: ms5611: ms5611_prom_is_valid false negative bug The ms5611 driver falsely rejects lots of MS5607-02BA03-50 chips with "PROM integrity check failed" because it doesn't accept a prom crc value of zero as legitimate. According to the datasheet for this chip (and the manufacturer's application note about the PROM CRC), none of the possible values for the CRC are excluded - but the current code in ms5611_prom_is_valid() ends with return crc_orig != 0x0000 && crc == crc_orig Discussed with the driver author (Tomasz Duszynski) and he indicated that at that time (2015) he was dealing with some faulty chip samples which returned blank data under some circumstances and/or followed example code which indicated CRC zero being bad. As far as I can tell this exception should not be applied anymore; We've got a few hundred custom boards here with this chip where large numbers of the prom have a legitimate CRC value 0, and do work fine, but which the current driver code wrongly rejects. Signed-off-by: Alexander Zangerl Fixes: c0644160a8b5 ("iio: pressure: add support for MS5611 pressure and temperature sensor") Link: https://lore.kernel.org/r/2535-1695168070.831792@Ze3y.dhYT.s3fx Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5611_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 627497e61a63..2fc706f9d8ae 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -76,7 +76,7 @@ static bool ms5611_prom_is_valid(u16 *prom, size_t len) crc = (crc >> 12) & 0x000F; - return crc_orig != 0x0000 && crc == crc_orig; + return crc == crc_orig; } static int ms5611_read_prom(struct iio_dev *indio_dev) -- cgit From 10f20628c9b8e924b8046e63b36b2cea4d2c85e4 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 11 Sep 2023 15:16:26 -0700 Subject: drm/msm/dpu: fail dpu_plane_atomic_check() based on mdp clk limits Currently, dpu_plane_atomic_check() does not check whether the plane can process the image without exceeding the per chipset limits for MDP clock. This leads to underflow issues because the SSPP is not able to complete the processing for the data rate of the display. Fail the dpu_plane_atomic_check() if the SSPP cannot process the image without exceeding the MDP clock limits. changes in v2: - use crtc_state's adjusted_mode instead of mode Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/556819/ Link: https://lore.kernel.org/r/20230911221627.9569-1-quic_abhinavk@quicinc.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 98c1b22e9bca..0be195f9149c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -733,9 +733,11 @@ static int dpu_plane_check_inline_rotation(struct dpu_plane *pdpu, static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, - const struct dpu_format *fmt) + const struct dpu_format *fmt, + const struct drm_display_mode *mode) { uint32_t min_src_size; + struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1; @@ -774,6 +776,12 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, return -EINVAL; } + /* max clk check */ + if (_dpu_plane_calc_clk(mode, pipe_cfg) > kms->perf.max_core_clk_rate) { + DPU_DEBUG_PLANE(pdpu, "plane exceeds max mdp core clk limits\n"); + return -E2BIG; + } + return 0; } @@ -899,12 +907,13 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; if (r_pipe->sspp) { - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt); + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, + &crtc_state->adjusted_mode); if (ret) return ret; } -- cgit From aa7dcba3bae6869122828b144a3cfd231718089d Mon Sep 17 00:00:00 2001 From: Renan Guilherme Lebre Ramos Date: Wed, 4 Oct 2023 19:59:00 -0400 Subject: platform/x86: touchscreen_dmi: Add info for the Positivo C4128B Add information for the Positivo C4128B, a notebook/tablet convertible. Link: https://github.com/onitake/gsl-firmware/pull/217 Signed-off-by: Renan Guilherme Lebre Ramos Link: https://lore.kernel.org/r/20231004235900.426240-1-japareaggae@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 363a9848e2b8..0c6733772698 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -771,6 +771,21 @@ static const struct ts_dmi_data pipo_w11_data = { .properties = pipo_w11_props, }; +static const struct property_entry positivo_c4128b_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 4), + PROPERTY_ENTRY_U32("touchscreen-min-y", 13), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1915), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1269), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-positivo-c4128b.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data positivo_c4128b_data = { + .acpi_name = "MSSL1680:00", + .properties = positivo_c4128b_props, +}; + static const struct property_entry pov_mobii_wintab_p800w_v20_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 32), PROPERTY_ENTRY_U32("touchscreen-min-y", 16), @@ -1502,6 +1517,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "MOMO.G.WI71C.MABMRBA02"), }, }, + { + /* Positivo C4128B */ + .driver_data = (void *)&positivo_c4128b_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), + DMI_MATCH(DMI_PRODUCT_NAME, "C4128B-1"), + }, + }, { /* Point of View mobii wintab p800w (v2.0) */ .driver_data = (void *)&pov_mobii_wintab_p800w_v20_data, -- cgit From 9e0bc36ab07c550d791bf17feeb479f1dfc42d89 Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Wed, 19 Jul 2023 21:05:27 +0800 Subject: cpufreq: schedutil: Update next_freq when cpufreq_limits change When cpufreq's policy is 'single', there is a scenario that will cause sg_policy's next_freq to be unable to update. When the CPU's util is always max, the cpufreq will be max, and then if we change the policy's scaling_max_freq to be a lower freq, indeed, the sg_policy's next_freq need change to be the lower freq, however, because the cpu_is_busy, the next_freq would keep the max_freq. For example: The cpu7 is a single CPU: unisoc:/sys/devices/system/cpu/cpufreq/policy7 # while true;do done& [1] 4737 unisoc:/sys/devices/system/cpu/cpufreq/policy7 # taskset -p 80 4737 pid 4737's current affinity mask: ff pid 4737's new affinity mask: 80 unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq 2301000 unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_cur_freq 2301000 unisoc:/sys/devices/system/cpu/cpufreq/policy7 # echo 2171000 > scaling_max_freq unisoc:/sys/devices/system/cpu/cpufreq/policy7 # cat scaling_max_freq 2171000 At this time, the sg_policy's next_freq would stay at 2301000, which is wrong. To fix this, add a check for the ->need_freq_update flag. [ mingo: Clarified the changelog. ] Co-developed-by: Guohua Yan Signed-off-by: Xuewen Yan Signed-off-by: Guohua Yan Signed-off-by: Ingo Molnar Acked-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230719130527.8074-1-xuewen.yan@unisoc.com --- kernel/sched/cpufreq_schedutil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 4492608b7d7f..458d359f5991 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -350,7 +350,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, * Except when the rq is capped by uclamp_max. */ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && - sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && + !sg_policy->need_freq_update) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ -- cgit From 0f8baa3c9802fbfe313c901e1598397b61b91ada Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 5 Oct 2023 13:55:31 -0400 Subject: io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I received a bug report with the following signature: [ 1759.937637] BUG: unable to handle page fault for address: ffffffffffffffe8 [ 1759.944564] #PF: supervisor read access in kernel mode [ 1759.949732] #PF: error_code(0x0000) - not-present page [ 1759.954901] PGD 7ab615067 P4D 7ab615067 PUD 7ab617067 PMD 0 [ 1759.960596] Oops: 0000 1 PREEMPT SMP PTI [ 1759.964804] CPU: 15 PID: 109 Comm: cpuhp/15 Kdump: loaded Tainted: G X ------- — 5.14.0-362.3.1.el9_3.x86_64 #1 [ 1759.976609] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 06/20/2018 [ 1759.985181] RIP: 0010:io_wq_for_each_worker.isra.0+0x24/0xa0 [ 1759.990877] Code: 90 90 90 90 90 90 0f 1f 44 00 00 41 56 41 55 41 54 55 48 8d 6f 78 53 48 8b 47 78 48 39 c5 74 4f 49 89 f5 49 89 d4 48 8d 58 e8 <8b> 13 85 d2 74 32 8d 4a 01 89 d0 f0 0f b1 0b 75 5c 09 ca 78 3d 48 [ 1760.009758] RSP: 0000:ffffb6f403603e20 EFLAGS: 00010286 [ 1760.015013] RAX: 0000000000000000 RBX: ffffffffffffffe8 RCX: 0000000000000000 [ 1760.022188] RDX: ffffb6f403603e50 RSI: ffffffffb11e95b0 RDI: ffff9f73b09e9400 [ 1760.029362] RBP: ffff9f73b09e9478 R08: 000000000000000f R09: 0000000000000000 [ 1760.036536] R10: ffffffffffffff00 R11: ffffb6f403603d80 R12: ffffb6f403603e50 [ 1760.043712] R13: ffffffffb11e95b0 R14: ffffffffb28531e8 R15: ffff9f7a6fbdf548 [ 1760.050887] FS: 0000000000000000(0000) GS:ffff9f7a6fbc0000(0000) knlGS:0000000000000000 [ 1760.059025] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1760.064801] CR2: ffffffffffffffe8 CR3: 00000007ab610002 CR4: 00000000007706e0 [ 1760.071976] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1760.079150] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1760.086325] PKRU: 55555554 [ 1760.089044] Call Trace: [ 1760.091501] [ 1760.093612] ? show_trace_log_lvl+0x1c4/0x2df [ 1760.097995] ? show_trace_log_lvl+0x1c4/0x2df [ 1760.102377] ? __io_wq_cpu_online+0x54/0xb0 [ 1760.106584] ? __die_body.cold+0x8/0xd [ 1760.110356] ? page_fault_oops+0x134/0x170 [ 1760.114479] ? kernelmode_fixup_or_oops+0x84/0x110 [ 1760.119298] ? exc_page_fault+0xa8/0x150 [ 1760.123247] ? asm_exc_page_fault+0x22/0x30 [ 1760.127458] ? __pfx_io_wq_worker_affinity+0x10/0x10 [ 1760.132453] ? __pfx_io_wq_worker_affinity+0x10/0x10 [ 1760.137446] ? io_wq_for_each_worker.isra.0+0x24/0xa0 [ 1760.142527] __io_wq_cpu_online+0x54/0xb0 [ 1760.146558] cpuhp_invoke_callback+0x109/0x460 [ 1760.151029] ? __pfx_io_wq_cpu_offline+0x10/0x10 [ 1760.155673] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 1760.160320] cpuhp_thread_fun+0x8d/0x140 [ 1760.164266] smpboot_thread_fn+0xd3/0x1a0 [ 1760.168297] kthread+0xdd/0x100 [ 1760.171457] ? __pfx_kthread+0x10/0x10 [ 1760.175225] ret_from_fork+0x29/0x50 [ 1760.178826] [ 1760.181022] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc vfat fat dm_multipath intel_rapl_msr intel_rapl_common isst_if_common ipmi_ssif nfit libnvdimm mgag200 i2c_algo_bit ioatdma drm_shmem_helper drm_kms_helper acpi_ipmi syscopyarea x86_pkg_temp_thermal sysfillrect ipmi_si intel_powerclamp sysimgblt ipmi_devintf coretemp acpi_power_meter ipmi_msghandler rapl pcspkr dca intel_pch_thermal intel_cstate ses lpc_ich intel_uncore enclosure hpilo mei_me mei acpi_tad fuse drm xfs sd_mod sg bnx2x nvme nvme_core crct10dif_pclmul crc32_pclmul nvme_common ghash_clmulni_intel smartpqi tg3 t10_pi mdio uas libcrc32c crc32c_intel scsi_transport_sas usb_storage hpwdt wmi dm_mirror dm_region_hash dm_log dm_mod [ 1760.248623] CR2: ffffffffffffffe8 A cpu hotplug callback was issued before wq->all_list was initialized. This results in a null pointer dereference. The fix is to fully setup the io_wq before calling cpuhp_state_add_instance_nocalls(). Signed-off-by: Jeff Moyer Link: https://lore.kernel.org/r/x49y1ghnecs.fsf@segfault.boston.devel.redhat.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 1ecc8c748768..522196dfb0ff 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL); if (!wq) return ERR_PTR(-ENOMEM); - ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) - goto err_wq; refcount_inc(&data->hash->refs); wq->hash = data->hash; @@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) wq->task = get_task_struct(data->task); atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); + if (ret) + goto err; + return wq; err: io_wq_put_hash(data->hash); - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - free_cpumask_var(wq->cpu_mask); -err_wq: kfree(wq); return ERR_PTR(ret); } -- cgit From 6313e096dbfaf1377ba8f5f8ccd720cc36c576c6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Oct 2023 17:29:54 -0700 Subject: KVM: selftests: Zero-initialize entire test_result in memslot perf test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-initialize the entire test_result structure used by memslot_perf_test instead of zeroing only the fields used to guard the pr_info() calls. gcc 13.2.0 is a bit overzealous and incorrectly thinks that rbestslottime's slot_runtime may be used uninitialized. In file included from memslot_perf_test.c:25: memslot_perf_test.c: In function ‘main’: include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_nsec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_nsec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_sec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_sec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ That can't actually happen, at least not without the "result" structure in test_loop() also being used uninitialized, which gcc doesn't complain about, as writes to rbestslottime are all-or-nothing, i.e. slottimens can't be non-zero without slot_runtime being written. if (!data->mem_size && (!rbestslottime->slottimens || result.slottimens < rbestslottime->slottimens)) *rbestslottime = result; Zero-initialize the structures to make gcc happy even though this is likely a compiler bug. The cost to do so is negligible, both in terms of code and runtime overhead. The only downside is that the compiler won't warn about legitimate usage of "uninitialized" data, e.g. the test could end up consuming zeros instead of useful data. However, given that the test is quite mature and unlikely to see substantial changes, the odds of introducing such bugs are relatively low, whereas being able to compile KVM selftests with -Werror detects issues on a regular basis. Reviewed-by: Maciej S. Szmigiero Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231005002954.2887098-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/memslot_perf_test.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 20eb2e730800..8698d1ab60d0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1088,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1097,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1108,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) -- cgit From 152d0bcdf1efcb54a4fa20f694e9c7bbb6d06cbf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 6 Oct 2023 09:41:55 +0800 Subject: dm crypt: Fix reqsize in crypt_iv_eboiv_gen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A skcipher_request object is made up of struct skcipher_request followed by a variable-sized trailer. The allocation of the skcipher_request and IV in crypt_iv_eboiv_gen is missing the memory for struct skcipher_request. Fix it by adding it to reqsize. Fixes: e3023094dffb ("dm crypt: Avoid using MAX_CIPHER_BLOCKSIZE") Cc: #6.5+ Reported-by: Tatu Heikkilä Reviewed-by: Mike Snitzer Signed-off-by: Herbert Xu --- drivers/md/dm-crypt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f2662c21a6df..5315fd261c23 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -753,7 +753,8 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, int err; u8 *buf; - reqsize = ALIGN(crypto_skcipher_reqsize(tfm), __alignof__(__le64)); + reqsize = sizeof(*req) + crypto_skcipher_reqsize(tfm); + reqsize = ALIGN(reqsize, __alignof__(__le64)); req = kmalloc(reqsize + cc->iv_size, GFP_NOIO); if (!req) -- cgit From 9af867c05b5d9f54f190743a1c761d70038c83b3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Oct 2023 17:58:57 +0200 Subject: HID: i2c-hid: fix handling of unpopulated devices A recent commit reordered probe so that the interrupt line is now requested before making sure that the device exists. This breaks machines like the Lenovo ThinkPad X13s which rely on the HID driver to probe second-source devices and only register the variant that is actually populated. Specifically, the interrupt line may now already be (temporarily) claimed when doing asynchronous probing of the touchpad: genirq: Flags mismatch irq 191. 00082008 (hid-over-i2c) vs. 00082008 (hid-over-i2c) i2c_hid_of 21-0015: Could not register for hid-over-i2c interrupt, irq = 191, ret = -16 i2c_hid_of: probe of 21-0015 failed with error -16 Fix this by restoring the old behaviour of first making sure the device exists before requesting the interrupt line. Note that something like this should probably be implemented also for "panel followers", whose actual probe is currently effectively deferred until the DRM panel is probed (e.g. by powering down the device after making sure it exists and only then register it as a follower). Fixes: 675cd877c952 ("HID: i2c-hid: Rearrange probe() to power things up later") Cc: Douglas Anderson Cc: Maxime Ripard Signed-off-by: Johan Hovold Tested-by: Dennis Gilmore Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Link: https://lore.kernel.org/r/20231002155857.24584-1-johan+linaro@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-core.c | 144 +++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 63 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 9601c0605fd9..2735cd585af0 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -998,45 +998,29 @@ static int i2c_hid_core_resume(struct i2c_hid *ihid) return hid_driver_reset_resume(hid); } -/** - * __do_i2c_hid_core_initial_power_up() - First time power up of the i2c-hid device. - * @ihid: The ihid object created during probe. - * - * This function is called at probe time. - * - * The initial power on is where we do some basic validation that the device - * exists, where we fetch the HID descriptor, and where we create the actual - * HID devices. - * - * Return: 0 or error code. +/* + * Check that the device exists and parse the HID descriptor. */ -static int __do_i2c_hid_core_initial_power_up(struct i2c_hid *ihid) +static int __i2c_hid_core_probe(struct i2c_hid *ihid) { struct i2c_client *client = ihid->client; struct hid_device *hid = ihid->hid; int ret; - ret = i2c_hid_core_power_up(ihid); - if (ret) - return ret; - /* Make sure there is something at this address */ ret = i2c_smbus_read_byte(client); if (ret < 0) { i2c_hid_dbg(ihid, "nothing at this address: %d\n", ret); - ret = -ENXIO; - goto err; + return -ENXIO; } ret = i2c_hid_fetch_hid_descriptor(ihid); if (ret < 0) { dev_err(&client->dev, "Failed to fetch the HID Descriptor\n"); - goto err; + return ret; } - enable_irq(client->irq); - hid->version = le16_to_cpu(ihid->hdesc.bcdVersion); hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID); hid->product = le16_to_cpu(ihid->hdesc.wProductID); @@ -1050,17 +1034,49 @@ static int __do_i2c_hid_core_initial_power_up(struct i2c_hid *ihid) ihid->quirks = i2c_hid_lookup_quirk(hid->vendor, hid->product); + return 0; +} + +static int i2c_hid_core_register_hid(struct i2c_hid *ihid) +{ + struct i2c_client *client = ihid->client; + struct hid_device *hid = ihid->hid; + int ret; + + enable_irq(client->irq); + ret = hid_add_device(hid); if (ret) { if (ret != -ENODEV) hid_err(client, "can't add hid device: %d\n", ret); - goto err; + disable_irq(client->irq); + return ret; } return 0; +} + +static int i2c_hid_core_probe_panel_follower(struct i2c_hid *ihid) +{ + int ret; + + ret = i2c_hid_core_power_up(ihid); + if (ret) + return ret; -err: + ret = __i2c_hid_core_probe(ihid); + if (ret) + goto err_power_down; + + ret = i2c_hid_core_register_hid(ihid); + if (ret) + goto err_power_down; + + return 0; + +err_power_down: i2c_hid_core_power_down(ihid); + return ret; } @@ -1077,7 +1093,7 @@ static void ihid_core_panel_prepare_work(struct work_struct *work) * steps. */ if (!hid->version) - ret = __do_i2c_hid_core_initial_power_up(ihid); + ret = i2c_hid_core_probe_panel_follower(ihid); else ret = i2c_hid_core_resume(ihid); @@ -1136,7 +1152,6 @@ static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) struct device *dev = &ihid->client->dev; int ret; - ihid->is_panel_follower = true; ihid->panel_follower.funcs = &i2c_hid_core_panel_follower_funcs; /* @@ -1156,30 +1171,6 @@ static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) return 0; } -static int i2c_hid_core_initial_power_up(struct i2c_hid *ihid) -{ - /* - * If we're a panel follower, we'll register and do our initial power - * up when the panel turns on; otherwise we do it right away. - */ - if (drm_is_panel_follower(&ihid->client->dev)) - return i2c_hid_core_register_panel_follower(ihid); - else - return __do_i2c_hid_core_initial_power_up(ihid); -} - -static void i2c_hid_core_final_power_down(struct i2c_hid *ihid) -{ - /* - * If we're a follower, the act of unfollowing will cause us to be - * powered down. Otherwise we need to manually do it. - */ - if (ihid->is_panel_follower) - drm_panel_remove_follower(&ihid->panel_follower); - else - i2c_hid_core_suspend(ihid, true); -} - int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, u16 hid_descriptor_address, u32 quirks) { @@ -1211,6 +1202,7 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, ihid->ops = ops; ihid->client = client; ihid->wHIDDescRegister = cpu_to_le16(hid_descriptor_address); + ihid->is_panel_follower = drm_is_panel_follower(&client->dev); init_waitqueue_head(&ihid->wait); mutex_init(&ihid->reset_lock); @@ -1224,14 +1216,10 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, return ret; device_enable_async_suspend(&client->dev); - ret = i2c_hid_init_irq(client); - if (ret < 0) - goto err_buffers_allocated; - hid = hid_allocate_device(); if (IS_ERR(hid)) { ret = PTR_ERR(hid); - goto err_irq; + goto err_free_buffers; } ihid->hid = hid; @@ -1242,19 +1230,42 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, hid->bus = BUS_I2C; hid->initial_quirks = quirks; - ret = i2c_hid_core_initial_power_up(ihid); + /* Power on and probe unless device is a panel follower. */ + if (!ihid->is_panel_follower) { + ret = i2c_hid_core_power_up(ihid); + if (ret < 0) + goto err_destroy_device; + + ret = __i2c_hid_core_probe(ihid); + if (ret < 0) + goto err_power_down; + } + + ret = i2c_hid_init_irq(client); + if (ret < 0) + goto err_power_down; + + /* + * If we're a panel follower, we'll register when the panel turns on; + * otherwise we do it right away. + */ + if (ihid->is_panel_follower) + ret = i2c_hid_core_register_panel_follower(ihid); + else + ret = i2c_hid_core_register_hid(ihid); if (ret) - goto err_mem_free; + goto err_free_irq; return 0; -err_mem_free: - hid_destroy_device(hid); - -err_irq: +err_free_irq: free_irq(client->irq, ihid); - -err_buffers_allocated: +err_power_down: + if (!ihid->is_panel_follower) + i2c_hid_core_power_down(ihid); +err_destroy_device: + hid_destroy_device(hid); +err_free_buffers: i2c_hid_free_buffers(ihid); return ret; @@ -1266,7 +1277,14 @@ void i2c_hid_core_remove(struct i2c_client *client) struct i2c_hid *ihid = i2c_get_clientdata(client); struct hid_device *hid; - i2c_hid_core_final_power_down(ihid); + /* + * If we're a follower, the act of unfollowing will cause us to be + * powered down. Otherwise we need to manually do it. + */ + if (ihid->is_panel_follower) + drm_panel_remove_follower(&ihid->panel_follower); + else + i2c_hid_core_suspend(ihid, true); hid = ihid->hid; hid_destroy_device(hid); -- cgit From 869b6ea1609f655a43251bf41757aa44e5350a8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Oct 2023 15:32:01 +0200 Subject: quota: Fix slow quotaoff Eric has reported that commit dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") heavily increases runtime of generic/270 xfstest for ext4 in nojournal mode. The reason for this is that ext4 in nojournal mode leaves dquots dirty until the last dqput() and thus the cleanup done in quota_release_workfn() has to write them all. Due to the way quota_release_workfn() is written this results in synchronize_srcu() call for each dirty dquot which makes the dquot cleanup when turning quotas off extremely slow. To be able to avoid synchronize_srcu() for each dirty dquot we need to rework how we track dquots to be cleaned up. Instead of keeping the last dquot reference while it is on releasing_dquots list, we drop it right away and mark the dquot with new DQ_RELEASING_B bit instead. This way we can we can remove dquot from releasing_dquots list when new reference to it is acquired and thus there's no need to call synchronize_srcu() each time we drop dq_list_lock. References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64 Reported-by: Eric Whitney Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/quota/dquot.c | 66 ++++++++++++++++++++++++++++-------------------- include/linux/quota.h | 4 ++- include/linux/quotaops.h | 2 +- 3 files changed, 43 insertions(+), 29 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9e72bfe8bbad..31e897ad5e6a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt) * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * - * When the last reference of a dquot will be dropped, the dquot will be - * added to releasing_dquots. We'd then queue work item which would call + * When the last reference of a dquot is dropped, the dquot is added to + * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the - * dquots on the list. Both releasing_dquots and free_dquots use the - * dq_free list_head in the dquot struct. When a dquot is removed from - * releasing_dquots, a reference count is always subtracted, and if - * dq_count == 0 at that point, the dquot will be added to the free_dquots. + * dquots on the list. Each cleaned up dquot is moved to free_dquots list. + * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot + * struct. * - * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, - * and this list is searched whenever we need an available dquot. Dquots are - * removed from the list as soon as they are used again, and - * dqstats.free_dquots gives the number of dquots on the list. When - * dquot is invalidated it's completely released from memory. + * Unused and cleaned up dquots are in the free_dquots list and this list is + * searched whenever we need an available dquot. Dquots are removed from the + * list as soon as they are used again and dqstats.free_dquots gives the number + * of dquots on the list. When dquot is invalidated it's completely released + * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to @@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot) static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); + set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) @@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - if (!atomic_read(&dquot->dq_count)) + if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); + else + clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) @@ -581,12 +583,6 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - /* dquot in releasing_dquots, flush and retry */ - if (!list_empty(&dquot->dq_free)) { - spin_unlock(&dq_list_lock); - goto restart; - } - atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* @@ -605,6 +601,15 @@ restart: * restart. */ goto restart; } + /* + * The last user already dropped its reference but dquot didn't + * get fully cleaned up yet. Restart the scan which flushes the + * work cleaning up released dquots. + */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + goto restart; + } /* * Quota now has no users and it has been written on last * dqput() @@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type) dq_dirty); WARN_ON(!dquot_active(dquot)); + /* If the dquot is releasing we should not touch it */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + flush_delayed_work("a_release_work); + spin_lock(&dq_list_lock); + continue; + } /* Now we have active dquot from which someone is * holding reference so we can safely just increase @@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work) /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); + synchronize_srcu(&dquot_srcu); restart: - synchronize_srcu(&dquot_srcu); spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); - /* Dquot got used again? */ - if (atomic_read(&dquot->dq_count) > 1) { - remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); - continue; - } + WARN_ON_ONCE(atomic_read(&dquot->dq_count)); + /* + * Note that DQ_RELEASING_B protects us from racing with + * invalidate_dquots() calls so we are safe to work with the + * dquot even after we drop dq_list_lock. + */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ @@ -834,7 +846,6 @@ restart: } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); @@ -875,6 +886,7 @@ void dqput(struct dquot *dquot) BUG_ON(!list_empty(&dquot->dq_free)); #endif put_releasing_dquots(dquot); + atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, "a_release_work, 1); } @@ -963,7 +975,7 @@ we_slept: dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is - * already finished or it will be canceled due to dq_count > 1 test */ + * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } -- cgit From 5d542b850d40cb08a38ad4bb2a944dbf1b7b0683 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Tue, 3 Oct 2023 15:21:38 +0100 Subject: ALSA: hda: cs35l41: Cleanup and fix double free in firmware request There is an unlikely but possible double free when loading firmware, and a missing free calls if a firmware is successfully requested but the coefficient file request fails, leading to the fallback firmware request occurring without clearing the previously loaded firmware. Fixes: cd40dad2ca91 ("ALSA: hda: cs35l41: Ensure firmware/tuning pairs are always loaded") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309291331.0JUUQnPT-lkp@intel.com/ Signed-off-by: Stefan Binding Link: https://lore.kernel.org/r/20231003142138.180108-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda.c | 115 ++++++++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 36 deletions(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index f9b77353c266..c6031f744099 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -185,10 +185,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->speaker_id, "wmfw"); if (!ret) { /* try cirrus/part-dspN-fwtype-sub<-spkidN><-ampname>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, cs35l41->amp_name, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, cs35l41->amp_name, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub<-ampname>.wmfw */ @@ -197,10 +201,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->amp_name, -1, "wmfw"); if (!ret) { /* try cirrus/part-dspN-fwtype-sub<-spkidN><-ampname>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, cs35l41->amp_name, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, cs35l41->amp_name, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub<-spkidN>.wmfw */ @@ -215,10 +223,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->amp_name, cs35l41->speaker_id, "bin"); if (ret) /* try cirrus/part-dspN-fwtype-sub<-spkidN>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, - coeff_filename, CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, NULL, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, + coeff_filename, CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, NULL, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub.wmfw */ @@ -233,12 +245,50 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->speaker_id, "bin"); if (ret) /* try cirrus/part-dspN-fwtype-sub<-spkidN>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, - coeff_filename, CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, NULL, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, + coeff_filename, CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, NULL, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + } + + return ret; +coeff_err: + release_firmware(*wmfw_firmware); + kfree(*wmfw_filename); + return ret; +} + +static int cs35l41_fallback_firmware_file(struct cs35l41_hda *cs35l41, + const struct firmware **wmfw_firmware, + char **wmfw_filename, + const struct firmware **coeff_firmware, + char **coeff_filename) +{ + int ret; + + /* Handle fallback */ + dev_warn(cs35l41->dev, "Falling back to default firmware.\n"); + + /* fallback try cirrus/part-dspN-fwtype.wmfw */ + ret = cs35l41_request_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, + CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "wmfw"); + if (ret) + goto err; + + /* fallback try cirrus/part-dspN-fwtype.bin */ + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "bin"); + if (ret) { + release_firmware(*wmfw_firmware); + kfree(*wmfw_filename); + goto err; } + return 0; +err: + dev_warn(cs35l41->dev, "Unable to find firmware and tuning\n"); return ret; } @@ -254,7 +304,6 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, ret = cs35l41_request_firmware_files_spkid(cs35l41, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename); goto out; - } /* try cirrus/part-dspN-fwtype-sub<-ampname>.wmfw */ @@ -267,6 +316,9 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, CS35L41_FIRMWARE_ROOT, cs35l41->acpi_subsystem_id, cs35l41->amp_name, -1, "bin"); + if (ret) + goto coeff_err; + goto out; } @@ -286,32 +338,23 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, CS35L41_FIRMWARE_ROOT, cs35l41->acpi_subsystem_id, NULL, -1, "bin"); + if (ret) + goto coeff_err; } out: - if (!ret) - return 0; + if (ret) + /* if all attempts at finding firmware fail, try fallback */ + goto fallback; - /* Handle fallback */ - dev_warn(cs35l41->dev, "Falling back to default firmware.\n"); + return 0; +coeff_err: release_firmware(*wmfw_firmware); kfree(*wmfw_filename); - - /* fallback try cirrus/part-dspN-fwtype.wmfw */ - ret = cs35l41_request_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, - CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "wmfw"); - if (!ret) - /* fallback try cirrus/part-dspN-fwtype.bin */ - ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "bin"); - - if (ret) { - release_firmware(*wmfw_firmware); - kfree(*wmfw_filename); - dev_warn(cs35l41->dev, "Unable to find firmware and tuning\n"); - } - return ret; +fallback: + return cs35l41_fallback_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, + coeff_firmware, coeff_filename); } #if IS_ENABLED(CONFIG_EFI) -- cgit From 6a83d6f3bb3c329a73e3483651fb77b78bac1878 Mon Sep 17 00:00:00 2001 From: WhaleChang Date: Fri, 6 Oct 2023 12:48:49 +0800 Subject: ALSA: usb-audio: Fix microphone sound on Opencomm2 Headset When a Opencomm2 Headset is connected to a Bluetooth USB dongle, the audio playback functions properly, but the microphone does not work. In the dmesg logs, there are messages indicating that the init_pitch function fails when the capture process begins. The microphone only functions when the ep pitch control is not set. Toggling the pitch control off bypasses the init_piatch function and allows the microphone to work. Signed-off-by: WhaleChang Link: https://lore.kernel.org/r/20231006044852.4181022-1-whalechang@google.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 598659d761cc..d4bbef70d2f7 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1994,7 +1994,11 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip, /* mic works only when ep packet size is set to wMaxPacketSize */ fp->attributes |= UAC_EP_CS_ATTR_FILL_MAX; break; - + case USB_ID(0x3511, 0x2b1e): /* Opencomm2 UC USB Bluetooth dongle */ + /* mic works only when ep pitch control is not set */ + if (stream == SNDRV_PCM_STREAM_CAPTURE) + fp->attributes &= ~UAC_EP_CS_ATTR_PITCH_CONTROL; + break; } } -- cgit From ccbd88be057a38531f835e8a04948ebf80cb0c5d Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 6 Oct 2023 14:47:37 +0800 Subject: ALSA: hda/realtek: Change model for Intel RVP board Intel RVP board (0x12cc) has Headset Mic issue for reboot. If system plugged headset when system reboot the headset Mic was gone. Fixes: 1a93f10c5b12 ("ALSA: hda/realtek: Add "Intel Reference board" and "NUC 13" SSID in the ALC256") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/28112f54c0c6496f97ac845645bc0256@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 48bb57bd8d11..3eeecf67c17b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9861,7 +9861,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC225_FIXUP_HEADSET_JACK), + SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), @@ -10098,7 +10098,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), - SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC225_FIXUP_HEADSET_JACK), + SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 -- cgit From f200bab3756fe81493a1b280180dafa1d9ccdcf7 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Wed, 4 Oct 2023 14:17:06 +0300 Subject: phy: lynx-28g: cancel the CDR check work item on the remove path The blamed commit added the CDR check work item but didn't cancel it on the remove path. Fix this by adding a remove function which takes care of it. Fixes: 8f73b37cf3fb ("phy: add support for the Layerscape SerDes 28G") Signed-off-by: Ioana Ciornei Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/phy/freescale/phy-fsl-lynx-28g.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index 4f036c77284e..c6323669f119 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -604,6 +604,14 @@ static int lynx_28g_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(provider); } +static void lynx_28g_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct lynx_28g_priv *priv = dev_get_drvdata(dev); + + cancel_delayed_work_sync(&priv->cdr_check); +} + static const struct of_device_id lynx_28g_of_match_table[] = { { .compatible = "fsl,lynx-28g" }, { }, @@ -612,6 +620,7 @@ MODULE_DEVICE_TABLE(of, lynx_28g_of_match_table); static struct platform_driver lynx_28g_driver = { .probe = lynx_28g_probe, + .remove_new = lynx_28g_remove, .driver = { .name = "lynx-28g", .of_match_table = lynx_28g_of_match_table, -- cgit From 0ac87fe54a171d18c5fb5345e3ee8d14e1b06f4b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 4 Oct 2023 14:17:07 +0300 Subject: phy: lynx-28g: lock PHY while performing CDR lock workaround lynx_28g_cdr_lock_check() runs once per second in a workqueue to reset the lane receiver if the CDR has not locked onto bit transitions in the RX stream. But the PHY consumer may do stuff with the PHY simultaneously, and that isn't okay. Block concurrent generic PHY calls by holding the PHY mutex from this workqueue. Fixes: 8f73b37cf3fb ("phy: add support for the Layerscape SerDes 28G") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/phy/freescale/phy-fsl-lynx-28g.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index c6323669f119..d5385d36735d 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -508,11 +508,12 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) for (i = 0; i < LYNX_28G_NUM_LANE; i++) { lane = &priv->lane[i]; - if (!lane->init) - continue; + mutex_lock(&lane->phy->mutex); - if (!lane->powered_up) + if (!lane->init || !lane->powered_up) { + mutex_unlock(&lane->phy->mutex); continue; + } rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); if (!(rrstctl & LYNX_28G_LNaRRSTCTL_CDR_LOCK)) { @@ -521,6 +522,8 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); } while (!(rrstctl & LYNX_28G_LNaRRSTCTL_RST_DONE)); } + + mutex_unlock(&lane->phy->mutex); } queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, msecs_to_jiffies(1000)); -- cgit From 139ad1143151a07be93bf741d4ea7c89e59f89ce Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 4 Oct 2023 14:17:08 +0300 Subject: phy: lynx-28g: serialize concurrent phy_set_mode_ext() calls to shared registers The protocol converter configuration registers PCC8, PCCC, PCCD (implemented by the driver), as well as others, control protocol converters from multiple lanes (each represented as a different struct phy). So, if there are simultaneous calls to phy_set_mode_ext() to lanes sharing the same PCC register (either for the "old" or for the "new" protocol), corruption of the values programmed to hardware is possible, because lynx_28g_rmw() has no locking. Add a spinlock in the struct lynx_28g_priv shared by all lanes, and take the global spinlock from the phy_ops :: set_mode() implementation. There are no other callers which modify PCC registers. Fixes: 8f73b37cf3fb ("phy: add support for the Layerscape SerDes 28G") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/phy/freescale/phy-fsl-lynx-28g.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index d5385d36735d..e2187767ce00 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -127,6 +127,10 @@ struct lynx_28g_lane { struct lynx_28g_priv { void __iomem *base; struct device *dev; + /* Serialize concurrent access to registers shared between lanes, + * like PCCn + */ + spinlock_t pcc_lock; struct lynx_28g_pll pll[LYNX_28G_NUM_PLL]; struct lynx_28g_lane lane[LYNX_28G_NUM_LANE]; @@ -397,6 +401,8 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) if (powered_up) lynx_28g_power_off(phy); + spin_lock(&priv->pcc_lock); + switch (submode) { case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_1000BASEX: @@ -413,6 +419,8 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) lane->interface = submode; out: + spin_unlock(&priv->pcc_lock); + /* Power up the lane if necessary */ if (powered_up) lynx_28g_power_on(phy); @@ -596,6 +604,7 @@ static int lynx_28g_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); + spin_lock_init(&priv->pcc_lock); INIT_DELAYED_WORK(&priv->cdr_check, lynx_28g_cdr_lock_check); queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, -- cgit From 5652d1741574eb89cc02576e50ee3e348bd6dd77 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Wed, 4 Oct 2023 11:19:03 +0200 Subject: net: dsa: qca8k: fix regmap bulk read/write methods on big endian systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c766e077d927 ("net: dsa: qca8k: convert to regmap read/write API") introduced bulk read/write methods to qca8k's regmap. The regmap bulk read/write methods get the register address in a buffer passed as a void pointer parameter (the same buffer contains also the read/written values). The register address occupies only as many bytes as it requires at the beginning of this buffer. For example if the .reg_bits member in regmap_config is 16 (as is the case for this driver), the register address occupies only the first 2 bytes in this buffer, so it can be cast to u16. But the original commit implementing these bulk read/write methods cast the buffer to u32: u32 reg = *(u32 *)reg_buf & U16_MAX; taking the first 4 bytes. This works on little endian systems where the first 2 bytes of the buffer correspond to the low 16-bits, but it obviously cannot work on big endian systems. Fix this by casting the beginning of the buffer to u16 as u32 reg = *(u16 *)reg_buf; Fixes: c766e077d927 ("net: dsa: qca8k: convert to regmap read/write API") Signed-off-by: Marek Behún Tested-by: Christian Marangi Reviewed-by: Christian Marangi Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index de1dc22cf683..d2df30640269 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -505,8 +505,8 @@ qca8k_bulk_read(void *ctx, const void *reg_buf, size_t reg_len, void *val_buf, size_t val_len) { int i, count = val_len / sizeof(u32), ret; - u32 reg = *(u32 *)reg_buf & U16_MAX; struct qca8k_priv *priv = ctx; + u32 reg = *(u16 *)reg_buf; if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val_buf, val_len)) @@ -527,8 +527,8 @@ qca8k_bulk_gather_write(void *ctx, const void *reg_buf, size_t reg_len, const void *val_buf, size_t val_len) { int i, count = val_len / sizeof(u32), ret; - u32 reg = *(u32 *)reg_buf & U16_MAX; struct qca8k_priv *priv = ctx; + u32 reg = *(u16 *)reg_buf; u32 *val = (u32 *)val_buf; if (priv->mgmt_master && -- cgit From 526c8ee04bdbd4d8d19a583b1f3b06700229a815 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Wed, 4 Oct 2023 11:19:04 +0200 Subject: net: dsa: qca8k: fix potential MDIO bus conflict when accessing internal PHYs via management frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Besides the QCA8337 switch the Turris 1.x device has on it's MDIO bus also Micron ethernet PHY (dedicated to the WAN port). We've been experiencing a strange behavior of the WAN ethernet interface, wherein the WAN PHY started timing out the MDIO accesses, for example when the interface was brought down and then back up. Bisecting led to commit 2cd548566384 ("net: dsa: qca8k: add support for phy read/write with mgmt Ethernet"), which added support to access the QCA8337 switch's internal PHYs via management ethernet frames. Connecting the MDIO bus pins onto an oscilloscope, I was able to see that the MDIO bus was active whenever a request to read/write an internal PHY register was done via an management ethernet frame. My theory is that when the switch core always communicates with the internal PHYs via the MDIO bus, even when externally we request the access via ethernet. This MDIO bus is the same one via which the switch and internal PHYs are accessible to the board, and the board may have other devices connected on this bus. An ASCII illustration may give more insight: +---------+ +----| | | | WAN PHY | | +--| | | | +---------+ | | | | +----------------------------------+ | | | QCA8337 | MDC | | | +-------+ | ------o-+--|--------o------------o--| | | MDIO | | | | | PHY 1 |-|--to RJ45 --------o--|---o----+---------o--+--| | | | | | | | +-------+ | | +-------------+ | o--| | | | | MDIO MDC | | | | PHY 2 |-|--to RJ45 eth1 | | | o--+--| | | -----------|-|port0 | | | +-------+ | | | | | o--| | | | | switch core | | | | PHY 3 |-|--to RJ45 | +-------------+ o--+--| | | | | | +-------+ | | | o--| ... | | +----------------------------------+ When we send a request to read an internal PHY register via an ethernet management frame via eth1, the switch core receives the ethernet frame on port 0 and then communicates with the internal PHY via MDIO. At this time, other potential devices, such as the WAN PHY on Turris 1.x, cannot use the MDIO bus, since it may cause a bus conflict. Fix this issue by locking the MDIO bus even when we are accessing the PHY registers via ethernet management frames. Fixes: 2cd548566384 ("net: dsa: qca8k: add support for phy read/write with mgmt Ethernet") Signed-off-by: Marek Behún Reviewed-by: Christian Marangi Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index d2df30640269..4ce68e655a63 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -666,6 +666,15 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, goto err_read_skb; } + /* It seems that accessing the switch's internal PHYs via management + * packets still uses the MDIO bus within the switch internally, and + * these accesses can conflict with external MDIO accesses to other + * devices on the MDIO bus. + * We therefore need to lock the MDIO bus onto which the switch is + * connected. + */ + mutex_lock(&priv->bus->mdio_lock); + /* Actually start the request: * 1. Send mdio master packet * 2. Busy Wait for mdio master command @@ -678,6 +687,7 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, mgmt_master = priv->mgmt_master; if (!mgmt_master) { mutex_unlock(&mgmt_eth_data->mutex); + mutex_unlock(&priv->bus->mdio_lock); ret = -EINVAL; goto err_mgmt_master; } @@ -765,6 +775,7 @@ exit: QCA8K_ETHERNET_TIMEOUT); mutex_unlock(&mgmt_eth_data->mutex); + mutex_unlock(&priv->bus->mdio_lock); return ret; -- cgit From c4d49196ceec80e30e8d981410d73331b49b7850 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 4 Oct 2023 15:19:37 +0200 Subject: net: sched: cls_u32: Fix allocation size in u32_init() commit d61491a51f7e ("net/sched: cls_u32: Replace one-element array with flexible-array member") incorrecly replaced an instance of `sizeof(*tp_c)` with `struct_size(tp_c, hlist->ht, 1)`. This results in a an over-allocation of 8 bytes. This change is wrong because `hlist` in `struct tc_u_common` is a pointer: net/sched/cls_u32.c: struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; int refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; }; So, the use of `struct_size()` makes no sense: we don't need to allocate any extra space for a flexible-array member. `sizeof(*tp_c)` is just fine. So, `struct_size(tp_c, hlist->ht, 1)` translates to: sizeof(*tp_c) + sizeof(tp_c->hlist->ht) == sizeof(struct tc_u_common) + sizeof(struct tc_u_knode *) == 144 + 8 == 0x98 (byes) ^^^ | unnecessary extra allocation size $ pahole -C tc_u_common net/sched/cls_u32.o struct tc_u_common { struct tc_u_hnode * hlist; /* 0 8 */ void * ptr; /* 8 8 */ int refcnt; /* 16 4 */ /* XXX 4 bytes hole, try to pack */ struct idr handle_idr; /* 24 96 */ /* --- cacheline 1 boundary (64 bytes) was 56 bytes ago --- */ struct hlist_node hnode; /* 120 16 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ long int knodes; /* 136 8 */ /* size: 144, cachelines: 3, members: 6 */ /* sum members: 140, holes: 1, sum holes: 4 */ /* last cacheline: 16 bytes */ }; And with `sizeof(*tp_c)`, we have: sizeof(*tp_c) == sizeof(struct tc_u_common) == 144 == 0x90 (bytes) which is the correct and original allocation size. Fix this issue by replacing `struct_size(tp_c, hlist->ht, 1)` with `sizeof(*tp_c)`, and avoid allocating 8 too many bytes. The following difference in binary output is expected and reflects the desired change: | net/sched/cls_u32.o | @@ -6148,7 +6148,7 @@ | include/linux/slab.h:599 | 2cf5: mov 0x0(%rip),%rdi # 2cfc | 2cf8: R_X86_64_PC32 kmalloc_caches+0xc |- 2cfc: mov $0x98,%edx |+ 2cfc: mov $0x90,%edx Reported-by: Alejandro Colomar Closes: https://lore.kernel.org/lkml/09b4a2ce-da74-3a19-6961-67883f634d98@kernel.org/ Signed-off-by: Gustavo A. R. Silva Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index da4c179a4d41..6663e971a13e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -366,7 +366,7 @@ static int u32_init(struct tcf_proto *tp) idr_init(&root_ht->handle_idr); if (tp_c == NULL) { - tp_c = kzalloc(struct_size(tp_c, hlist->ht, 1), GFP_KERNEL); + tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; -- cgit From d9c2ba65e651467de739324d978b04ed8729f483 Mon Sep 17 00:00:00 2001 From: Lukas Magel Date: Sun, 27 Aug 2023 09:22:05 +0000 Subject: can: isotp: isotp_sendmsg(): fix TX state detection and wait behavior With patch [1], isotp_poll was updated to also queue the poller in the so->wait queue, which is used for send state changes. Since the queue now also contains polling tasks that are not interested in sending, the queue fill state can no longer be used as an indication of send readiness. As a consequence, nonblocking writes can lead to a race and lock-up of the socket if there is a second task polling the socket in parallel. With this patch, isotp_sendmsg does not consult wq_has_sleepers but instead tries to atomically set so->tx.state and waits on so->wait if it is unable to do so. This behavior is in alignment with isotp_poll, which also checks so->tx.state to determine send readiness. V2: - Revert direct exit to goto err_event_drop [1] https://lore.kernel.org/all/20230331125511.372783-1-michal.sojka@cvut.cz Reported-by: Maxime Jayat Closes: https://lore.kernel.org/linux-can/11328958-453f-447f-9af8-3b5824dfb041@munic.io/ Signed-off-by: Lukas Magel Reviewed-by: Oliver Hartkopp Fixes: 79e19fa79cb5 ("can: isotp: isotp_ops: fix poll() to not report false EPOLLOUT events") Link: https://github.com/pylessard/python-udsoncan/issues/178#issuecomment-1743786590 Link: https://lore.kernel.org/all/20230827092205.7908-1-lukas.magel@posteo.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index f02b5d3e4733..d1c6f206f429 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -948,21 +948,18 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (!so->bound || so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; -wait_free_buffer: - /* we do not support multiple buffers - for now */ - if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT)) - return -EAGAIN; + while (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { + /* we do not support multiple buffers - for now */ + if (msg->msg_flags & MSG_DONTWAIT) + return -EAGAIN; - /* wait for complete transmission of current pdu */ - err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (err) - goto err_event_drop; - - if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { if (so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; - goto wait_free_buffer; + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; } /* PDU size > default => try max_pdu_size */ -- cgit From 1f223208ebdef84f21c15e9958c005a93c871aa2 Mon Sep 17 00:00:00 2001 From: John Watts Date: Wed, 6 Sep 2023 09:13:43 +1000 Subject: can: sun4i_can: Only show Kconfig if ARCH_SUNXI is set When adding the RISCV option I didn't gate it behind ARCH_SUNXI. As a result this option shows up with Allwinner support isn't enabled. Fix that by requiring ARCH_SUNXI to be set if RISCV is set. Fixes: 8abb95250ae6 ("can: sun4i_can: Add support for the Allwinner D1") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-sunxi/CAMuHMdV2m54UAH0X2dG7stEg=grFihrdsz4+o7=_DpBMhjTbkw@mail.gmail.com/ Signed-off-by: John Watts Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/20230905231342.2042759-2-contact@jookia.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 649453a3c858..f8cde9f9f554 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -190,7 +190,7 @@ config CAN_SLCAN config CAN_SUN4I tristate "Allwinner A10 CAN controller" - depends on MACH_SUN4I || MACH_SUN7I || RISCV || COMPILE_TEST + depends on MACH_SUN4I || MACH_SUN7I || (RISCV && ARCH_SUNXI) || COMPILE_TEST help Say Y here if you want to use CAN controller found on Allwinner A10/A20/D1 SoCs. -- cgit From 23ed2be5404da7cee6a519fa69bf22d0f69da4e4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 26 Jul 2023 19:24:57 +0800 Subject: arm64: dts: imx93: add the Flex-CAN stop mode by GPR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit imx93 A0 chip use the internal q-channel handshake signal in LPCG and CCM to automatically handle the Flex-CAN stop mode. But this method meet issue when do the system PM stress test. IC can't fix it easily. So in the new imx93 A1 chip, IC drop this method, and involve back the old way,use the GPR method to trigger the Flex-CAN stop mode signal. Now NXP claim to drop imx93 A0, and only support imx93 A1. So here add the stop mode through GPR. This patch also fix a typo for aonmix_ns_gpr. Signed-off-by: Haibo Chen Link: https://lore.kernel.org/all/20230726112458.3524165-1-haibo.chen@nxp.com Signed-off-by: Marc Kleine-Budde --- arch/arm64/boot/dts/freescale/imx93.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 6f85a05ee7e1..dcf6e4846ac9 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -185,7 +185,7 @@ #size-cells = <1>; ranges; - anomix_ns_gpr: syscon@44210000 { + aonmix_ns_gpr: syscon@44210000 { compatible = "fsl,imx93-aonmix-ns-syscfg", "syscon"; reg = <0x44210000 0x1000>; }; @@ -319,6 +319,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&aonmix_ns_gpr 0x14 0>; status = "disabled"; }; @@ -591,6 +592,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&wakeupmix_gpr 0x0c 2>; status = "disabled"; }; -- cgit From b5efb4e6fbb06da928526eca746f3de243c12ab2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 2 Oct 2023 18:02:06 +0200 Subject: can: sja1000: Always restart the Tx queue after an overrun Upstream commit 717c6ec241b5 ("can: sja1000: Prevent overrun stalls with a soft reset on Renesas SoCs") fixes an issue with Renesas own SJA1000 CAN controller reception: the Rx buffer is only 5 messages long, so when the bus loaded (eg. a message every 50us), overrun may easily happen. Upon an overrun situation, due to a possible internal crosstalk situation, the controller enters a frozen state which only can be unlocked with a soft reset (experimentally). The solution was to offload a call to sja1000_start() in a threaded handler. This needs to happen in process context as this operation requires to sleep. sja1000_start() basically enters "reset mode", performs a proper software reset and returns back into "normal mode". Since this fix was introduced, we no longer observe any stalls in reception. However it was sporadically observed that the transmit path would now freeze. Further investigation blamed the fix mentioned above, and especially the reset operation. Reproducing the reset in a loop helped identifying what could possibly go wrong. The sja1000 is a single Tx queue device, which leverages the netdev helpers to process one Tx message at a time. The logic is: the queue is stopped, the message sent to the transceiver, once properly transmitted the controller sets a status bit which triggers an interrupt, in the interrupt handler the transmission status is checked and the queue woken up. Unfortunately, if an overrun happens, we might perform the soft reset precisely between the transmission of the buffer to the transceiver and the advent of the transmission status bit. We would then stop the transmission operation without re-enabling the queue, leading to all further transmissions to be ignored. The reset interrupt can only happen while the device is "open", and after a reset we anyway want to resume normal operations, no matter if a packet to transmit got dropped in the process, so we shall wake up the queue. Restarting the device and waking-up the queue is exactly what sja1000_set_mode(CAN_MODE_START) does. In order to be consistent about the queue state, we must acquire a lock both in the reset handler and in the transmit path to ensure serialization of both operations. It turns out, a lock is already held when entering the transmit path, so we can just acquire/release it as well with the regular net helpers inside the threaded interrupt handler and this way we should be safe. As the reset handler might still be called after the transmission of a frame to the transceiver but before it actually gets transmitted, we must ensure we don't leak the skb, so we free it (the behavior is consistent, no matter if there was an skb on the stack or not). Fixes: 717c6ec241b5 ("can: sja1000: Prevent overrun stalls with a soft reset on Renesas SoCs") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/all/20231002160206.190953-1-miquel.raynal@bootlin.com [mkl: fixed call to can_free_echo_skb()] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 0ada0e160e93..743c2eb62b87 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -392,7 +392,13 @@ static irqreturn_t sja1000_reset_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; netdev_dbg(dev, "performing a soft reset upon overrun\n"); - sja1000_start(dev); + + netif_tx_lock(dev); + + can_free_echo_skb(dev, 0, NULL); + sja1000_set_mode(dev, CAN_MODE_START); + + netif_tx_unlock(dev); return IRQ_HANDLED; } -- cgit From 63ead535570f13d0e06fda3f2d020c8f5394e998 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 26 Jul 2023 19:24:58 +0800 Subject: can: flexcan: remove the auto stop mode for IMX93 IMX93 A0 chip involve the internal q-channel handshake in LPCG and CCM to automatically handle the Flex-CAN IPG STOP signal. Only after FLEX-CAN enter stop mode then can support the self-wakeup feature. But meet issue when do the continue system PM stress test. When config the CAN as wakeup source, the first time after system suspend, any data on CAN bus can wakeup the system, this is as expect. But the second time when system suspend, data on CAN bus can't wakeup the system. If continue this test, we find in odd time system enter suspend, CAN can wakeup the system, but in even number system enter suspend, CAN can't wakeup the system. IC find a bug in the auto stop mode logic, and can't fix it easily. So for the new imx93 A1, IC drop the auto stop mode and involve the GPR to support stop mode (used before). IC define a bit in GPR which can trigger the IPG STOP signal to Flex-CAN, let it go into stop mode. And NXP claim to drop IMX93 A0, and only support IMX93 A1. So this patch remove the auto stop mode, and add flag FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR to imx93. Signed-off-by: Haibo Chen Link: https://lore.kernel.org/all/20230726112458.3524165-2-haibo.chen@nxp.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan/flexcan-core.c | 46 ++++++++++------------------------ drivers/net/can/flexcan/flexcan.h | 2 -- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index add39e922b89..d15f85a40c1e 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -348,7 +348,7 @@ static struct flexcan_devtype_data fsl_imx8mp_devtype_data = { static struct flexcan_devtype_data fsl_imx93_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX | - FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_AUTO_STOP_MODE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR | FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC | FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR, @@ -544,11 +544,6 @@ static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv) } else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) { regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, 1 << priv->stm.req_bit, 1 << priv->stm.req_bit); - } else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) { - /* For the auto stop mode, software do nothing, hardware will cover - * all the operation automatically after system go into low power mode. - */ - return 0; } return flexcan_low_power_enter_ack(priv); @@ -574,12 +569,6 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv) reg_mcr &= ~FLEXCAN_MCR_SLF_WAK; priv->write(reg_mcr, ®s->mcr); - /* For the auto stop mode, hardware will exist stop mode - * automatically after system go out of low power mode. - */ - if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - return 0; - return flexcan_low_power_exit_ack(priv); } @@ -1994,13 +1983,18 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) ret = flexcan_setup_stop_mode_scfw(pdev); else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) ret = flexcan_setup_stop_mode_gpr(pdev); - else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - ret = 0; else /* return 0 directly if doesn't support stop mode feature */ return 0; - if (ret) + /* If ret is -EINVAL, this means SoC claim to support stop mode, but + * dts file lack the stop mode property definition. For this case, + * directly return 0, this will skip the wakeup capable setting and + * will not block the driver probe. + */ + if (ret == -EINVAL) + return 0; + else if (ret) return ret; device_set_wakeup_capable(&pdev->dev, true); @@ -2320,16 +2314,8 @@ static int __maybe_unused flexcan_noirq_suspend(struct device *device) if (netif_running(dev)) { int err; - if (device_may_wakeup(device)) { + if (device_may_wakeup(device)) flexcan_enable_wakeup_irq(priv, true); - /* For auto stop mode, need to keep the clock on before - * system go into low power mode. After system go into - * low power mode, hardware will config the flexcan into - * stop mode, and gate off the clock automatically. - */ - if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - return 0; - } err = pm_runtime_force_suspend(device); if (err) @@ -2347,15 +2333,9 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device) if (netif_running(dev)) { int err; - /* For the wakeup in auto stop mode, no need to gate on the - * clock here, hardware will do this automatically. - */ - if (!(device_may_wakeup(device) && - priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE)) { - err = pm_runtime_force_resume(device); - if (err) - return err; - } + err = pm_runtime_force_resume(device); + if (err) + return err; if (device_may_wakeup(device)) flexcan_enable_wakeup_irq(priv, false); diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index 91402977780b..025c3417031f 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -68,8 +68,6 @@ #define FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR BIT(15) /* Device supports RX via FIFO */ #define FLEXCAN_QUIRK_SUPPORT_RX_FIFO BIT(16) -/* auto enter stop mode to support wakeup */ -#define FLEXCAN_QUIRK_AUTO_STOP_MODE BIT(17) struct flexcan_devtype_data { u32 quirks; /* quirks needed for different IP cores */ -- cgit From a9967c9ad290c086e1445b660263375985dffb3a Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Tue, 19 Sep 2023 11:54:01 +0200 Subject: can: tcan4x5x: Fix id2_register for tcan4553 Fix id2_register content for tcan4553. This slipped through my testing. Reported-by: Sean Anderson Closes: https://lore.kernel.org/lkml/a94e6fc8-4f08-7877-2ba0-29b9c2780136@seco.com/ Fixes: 142c6dc6d9d7 ("can: tcan4x5x: Add support for tcan4552/4553") Signed-off-by: Markus Schneider-Pargmann Reviewed-by: Simon Horman Link: https://lore.kernel.org/all/20230919095401.1312259-1-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 8a4143809d33..ae8c42f5debd 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -125,7 +125,7 @@ static const struct tcan4x5x_version_info tcan4x5x_versions[] = { }, [TCAN4553] = { .name = "4553", - .id2_register = 0x32353534, + .id2_register = 0x33353534, }, /* generic version with no id2_register at the end */ [TCAN4X5X] = { -- cgit From 5b44abbc39ca15df80d0da4756078c98c831090f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 4 Oct 2023 13:16:24 +0200 Subject: platform/x86: hp-wmi:: Mark driver struct with __refdata to prevent section mismatch warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described in the added code comment, a reference to .exit.text is ok for drivers registered via module_platform_driver_probe(). Make this explicit to prevent a section mismatch warning: WARNING: modpost: drivers/platform/x86/hp/hp-wmi: section mismatch in reference: hp_wmi_driver+0x8 (section: .data) -> hp_wmi_bios_remove (section: .exit.text) Fixes: c165b80cfecc ("hp-wmi: fix handling of platform device") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231004111624.2667753-1-u.kleine-koenig@pengutronix.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/hp/hp-wmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index e76e5458db35..8ebb7be52ee7 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -1548,7 +1548,13 @@ static const struct dev_pm_ops hp_wmi_pm_ops = { .restore = hp_wmi_resume_handler, }; -static struct platform_driver hp_wmi_driver = { +/* + * hp_wmi_bios_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver hp_wmi_driver __refdata = { .driver = { .name = "hp-wmi", .pm = &hp_wmi_pm_ops, -- cgit From 7ec4cd3c1a12dc08c60d5e376c2c05aae23f1e41 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 5 Oct 2023 07:56:14 +0000 Subject: platform: mellanox: Fix a resource leak in an error handling path in probing flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missed resource deallocation in rollback flows. Currently if an error occurs after a successful mlxplat_i2c_main_init(), mlxplat_i2c_main_exit() call is missed in rollback flow. Thus, some resources are not de-allocated. Move mlxplat_pre_exit() call from mlxplat_remove() into mlxplat_i2c_main_exit(). Call mlxplat_i2c_main_exit() instead of calling mlxplat_pre_exit() in mlxplat_probe() error handling flow. Unregister 'priv->pdev_i2c' device in mlxplat_i2c_main_init() cleanup flow if this device was successfully registered. Fixes: 158cd8320776 ("platform: mellanox: Split logic in init and exit flow") Reported-by: Christophe JAILLET Closes: https://lore.kernel.org/lkml/70165032-796e-6f5c-6748-f514e3b9d08c@linux.intel.com/T/ Signed-off-by: Vadim Pasternak Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231005075616.42777-2-vadimp@nvidia.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/mlx-platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 3d96dbf79a72..a2ffe4157df1 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -6514,6 +6514,7 @@ static int mlxplat_i2c_main_init(struct mlxplat_priv *priv) return 0; fail_mlxplat_i2c_mux_topology_init: + platform_device_unregister(priv->pdev_i2c); fail_platform_i2c_register: fail_mlxplat_mlxcpld_verify_bus_topology: return err; @@ -6521,6 +6522,7 @@ fail_mlxplat_mlxcpld_verify_bus_topology: static void mlxplat_i2c_main_exit(struct mlxplat_priv *priv) { + mlxplat_pre_exit(priv); mlxplat_i2c_mux_topology_exit(priv); if (priv->pdev_i2c) platform_device_unregister(priv->pdev_i2c); @@ -6597,7 +6599,7 @@ static int mlxplat_probe(struct platform_device *pdev) fail_register_reboot_notifier: fail_regcache_sync: - mlxplat_pre_exit(priv); + mlxplat_i2c_main_exit(priv); fail_mlxplat_i2c_main_init: fail_regmap_write: fail_alloc: @@ -6614,7 +6616,6 @@ static int mlxplat_remove(struct platform_device *pdev) pm_power_off = NULL; if (mlxplat_reboot_nb) unregister_reboot_notifier(mlxplat_reboot_nb); - mlxplat_pre_exit(priv); mlxplat_i2c_main_exit(priv); mlxplat_post_exit(); return 0; -- cgit From dac501397b9d81e4782232c39f94f4307b137452 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 5 Oct 2023 20:26:38 +0200 Subject: HID: logitech-hidpp: Fix kernel crash on receiver USB disconnect hidpp_connect_event() has *four* time-of-check vs time-of-use (TOCTOU) races when it races with itself. hidpp_connect_event() primarily runs from a workqueue but it also runs on probe() and if a "device-connected" packet is received by the hw when the thread running hidpp_connect_event() from probe() is waiting on the hw, then a second thread running hidpp_connect_event() will be started from the workqueue. This opens the following races (note the below code is simplified): 1. Retrieving + printing the protocol (harmless race): if (!hidpp->protocol_major) { hidpp_root_get_protocol_version() hidpp->protocol_major = response.rap.params[0]; } We can actually see this race hit in the dmesg in the abrt output attached to rhbz#2227968: [ 3064.624215] logitech-hidpp-device 0003:046D:4071.0049: HID++ 4.5 device connected. [ 3064.658184] logitech-hidpp-device 0003:046D:4071.0049: HID++ 4.5 device connected. Testing with extra logging added has shown that after this the 2 threads take turn grabbing the hw access mutex (send_mutex) so they ping-pong through all the other TOCTOU cases managing to hit all of them: 2. Updating the name to the HIDPP name (harmless race): if (hidpp->name == hdev->name) { ... hidpp->name = new_name; } 3. Initializing the power_supply class for the battery (problematic!): hidpp_initialize_battery() { if (hidpp->battery.ps) return 0; probe_battery(); /* Blocks, threads take turns executing this */ hidpp->battery.desc.properties = devm_kmemdup(dev, hidpp_battery_props, cnt, GFP_KERNEL); hidpp->battery.ps = devm_power_supply_register(&hidpp->hid_dev->dev, &hidpp->battery.desc, cfg); } 4. Creating delayed input_device (potentially problematic): if (hidpp->delayed_input) return; hidpp->delayed_input = hidpp_allocate_input(hdev); The really big problem here is 3. Hitting the race leads to the following sequence: hidpp->battery.desc.properties = devm_kmemdup(dev, hidpp_battery_props, cnt, GFP_KERNEL); hidpp->battery.ps = devm_power_supply_register(&hidpp->hid_dev->dev, &hidpp->battery.desc, cfg); ... hidpp->battery.desc.properties = devm_kmemdup(dev, hidpp_battery_props, cnt, GFP_KERNEL); hidpp->battery.ps = devm_power_supply_register(&hidpp->hid_dev->dev, &hidpp->battery.desc, cfg); So now we have registered 2 power supplies for the same battery, which looks a bit weird from userspace's pov but this is not even the really big problem. Notice how: 1. This is all devm-maganaged 2. The hidpp->battery.desc struct is shared between the 2 power supplies 3. hidpp->battery.desc.properties points to the result from the second devm_kmemdup() This causes a use after free scenario on USB disconnect of the receiver: 1. The last registered power supply class device gets unregistered 2. The memory from the last devm_kmemdup() call gets freed, hidpp->battery.desc.properties now points to freed memory 3. The first registered power supply class device gets unregistered, this involves sending a remove uevent to userspace which invokes power_supply_uevent() to fill the uevent data 4. power_supply_uevent() uses hidpp->battery.desc.properties which now points to freed memory leading to backtraces like this one: Sep 22 20:01:35 eric kernel: BUG: unable to handle page fault for address: ffffb2140e017f08 ... Sep 22 20:01:35 eric kernel: Workqueue: usb_hub_wq hub_event Sep 22 20:01:35 eric kernel: RIP: 0010:power_supply_uevent+0xee/0x1d0 ... Sep 22 20:01:35 eric kernel: ? asm_exc_page_fault+0x26/0x30 Sep 22 20:01:35 eric kernel: ? power_supply_uevent+0xee/0x1d0 Sep 22 20:01:35 eric kernel: ? power_supply_uevent+0x10d/0x1d0 Sep 22 20:01:35 eric kernel: dev_uevent+0x10f/0x2d0 Sep 22 20:01:35 eric kernel: kobject_uevent_env+0x291/0x680 Sep 22 20:01:35 eric kernel: power_supply_unregister+0x8e/0xa0 Sep 22 20:01:35 eric kernel: release_nodes+0x3d/0xb0 Sep 22 20:01:35 eric kernel: devres_release_group+0xfc/0x130 Sep 22 20:01:35 eric kernel: hid_device_remove+0x56/0xa0 Sep 22 20:01:35 eric kernel: device_release_driver_internal+0x19f/0x200 Sep 22 20:01:35 eric kernel: bus_remove_device+0xc6/0x130 Sep 22 20:01:35 eric kernel: device_del+0x15c/0x3f0 Sep 22 20:01:35 eric kernel: ? __queue_work+0x1df/0x440 Sep 22 20:01:35 eric kernel: hid_destroy_device+0x4b/0x60 Sep 22 20:01:35 eric kernel: logi_dj_remove+0x9a/0x100 [hid_logitech_dj 5c91534a0ead2b65e04dd799a0437e3b99b21bc4] Sep 22 20:01:35 eric kernel: hid_device_remove+0x44/0xa0 Sep 22 20:01:35 eric kernel: device_release_driver_internal+0x19f/0x200 Sep 22 20:01:35 eric kernel: bus_remove_device+0xc6/0x130 Sep 22 20:01:35 eric kernel: device_del+0x15c/0x3f0 Sep 22 20:01:35 eric kernel: ? __queue_work+0x1df/0x440 Sep 22 20:01:35 eric kernel: hid_destroy_device+0x4b/0x60 Sep 22 20:01:35 eric kernel: usbhid_disconnect+0x47/0x60 [usbhid 727dcc1c0b94e6b4418727a468398ac3bca492f3] Sep 22 20:01:35 eric kernel: usb_unbind_interface+0x90/0x270 Sep 22 20:01:35 eric kernel: device_release_driver_internal+0x19f/0x200 Sep 22 20:01:35 eric kernel: bus_remove_device+0xc6/0x130 Sep 22 20:01:35 eric kernel: device_del+0x15c/0x3f0 Sep 22 20:01:35 eric kernel: ? kobject_put+0xa0/0x1d0 Sep 22 20:01:35 eric kernel: usb_disable_device+0xcd/0x1e0 Sep 22 20:01:35 eric kernel: usb_disconnect+0xde/0x2c0 Sep 22 20:01:35 eric kernel: usb_disconnect+0xc3/0x2c0 Sep 22 20:01:35 eric kernel: hub_event+0xe80/0x1c10 There have been quite a few bug reports (see Link tags) about this crash. Fix all the TOCTOU issues, including the really bad power-supply related system crash on USB disconnect, by making probe() use the workqueue for running hidpp_connect_event() too, so that it can never run more then once. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2227221 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2227968 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2227968 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2242189 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217412#c58 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231005182638.3776-1-hdegoede@redhat.com Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index ff077df0babf..a209d51bd247 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4515,7 +4515,8 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) goto hid_hw_init_fail; } - hidpp_connect_event(hidpp); + schedule_work(&hidpp->work); + flush_work(&hidpp->work); if (will_restart) { /* Reset the HID node state */ -- cgit From d7614a2733f5e354c075be178b068a241d5d8b11 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 Oct 2023 17:11:05 -0300 Subject: media: dt-bindings: imx7-csi: Make power-domains not required for imx8mq On i.MX8MQ the MIPI CSI block does have an associated power-domain, but the CSI bridge does not. Remove the power-domains requirement from the i.MX8MQ CSI bridge to fix the following schema warning: imx8mq-librem5-r4.dtb: csi@30a90000: 'power-domains' is a required property from schema $id: http://devicetree.org/schemas/media/nxp,imx7-csi.yaml# Fixes: de655386845a ("media: dt-bindings: media: imx7-csi: Document i.MX8M power-domains property") Signed-off-by: Fabio Estevam Acked-by: Conor Dooley Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20231004201105.2323758-1-festevam@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml index 358019e85d90..326284e151f6 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml @@ -59,7 +59,6 @@ allOf: compatible: contains: enum: - - fsl,imx8mq-csi - fsl,imx8mm-csi then: required: -- cgit From c132d9c79e25cdbcd4eeb3c6009f746e137cfad0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Sep 2023 16:26:34 -0500 Subject: dt-bindings: bus: fsl,imx8qxp-pixel-link-msi-bus: Drop child 'reg' property A bus schema based on simple-pm-bus shouldn't define how many 'reg' entries a child device has. That is a property of the device. Drop the 'reg' entry. Reviewed-by: Liu Ying Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230925212639.1975002-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml index b568d0ce438d..7e1ffc551046 100644 --- a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml +++ b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml @@ -73,9 +73,6 @@ patternProperties: "^.*@[0-9a-f]+$": description: Devices attached to the bus type: object - properties: - reg: - maxItems: 1 required: - reg -- cgit From a47f580c8a31a7f3f20676bc2aa7e6b1403648af Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Sep 2023 16:27:58 -0500 Subject: media: dt-bindings: Add missing unevaluatedProperties on child node schemas Just as unevaluatedProperties or additionalProperties are required at the top level of schemas, they should (and will) also be required for child node schemas. That ensures only documented properties are present for any node. Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230925212803.1976803-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml | 1 + Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml | 2 ++ Documentation/devicetree/bindings/media/samsung,fimc.yaml | 1 + 3 files changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml index ffccf5f3c9e3..642f9b15d359 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml @@ -54,6 +54,7 @@ properties: port: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false properties: endpoint: diff --git a/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml b/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml index c5cab549ee8e..1c476b635b69 100644 --- a/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml +++ b/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml @@ -69,6 +69,7 @@ properties: properties: port@0: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Input port properties: @@ -89,6 +90,7 @@ properties: port@1: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Output port properties: diff --git a/Documentation/devicetree/bindings/media/samsung,fimc.yaml b/Documentation/devicetree/bindings/media/samsung,fimc.yaml index 79ff6d83a9fd..b3486c38a05b 100644 --- a/Documentation/devicetree/bindings/media/samsung,fimc.yaml +++ b/Documentation/devicetree/bindings/media/samsung,fimc.yaml @@ -57,6 +57,7 @@ properties: patternProperties: "^port@[01]$": $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Camera A and camera B inputs. -- cgit From 4ef718d66c3470705d1d2339f877085cb1755ac0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Sep 2023 10:53:40 -0500 Subject: dt-bindings: PCI: brcm,iproc-pcie: Fix example indentation The example's indentation is off. While fixing this, the 'bus' node is unnecessary and can be dropped. It is also preferred to split up unrelated examples to their own entries. Reviewed-by: Florian Fainelli Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230926155351.31117-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/pci/brcm,iproc-pcie.yaml | 124 ++++++++++----------- 1 file changed, 60 insertions(+), 64 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml index 0972868735fc..0cb5bd6cffa1 100644 --- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml @@ -117,68 +117,64 @@ unevaluatedProperties: false examples: - | - #include - - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie0: pcie@18012000 { - compatible = "brcm,iproc-pcie"; - reg = <0x18012000 0x1000>; - - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; - - linux,pci-domain = <0>; - - bus-range = <0x00 0xff>; - - #address-cells = <3>; - #size-cells = <2>; - device_type = "pci"; - ranges = <0x81000000 0 0 0x28000000 0 0x00010000>, - <0x82000000 0 0x20000000 0x20000000 0 0x04000000>; - - phys = <&phy 0 5>; - phy-names = "pcie-phy"; - - brcm,pcie-ob; - brcm,pcie-ob-axi-offset = <0x00000000>; - - msi-parent = <&msi0>; - - /* iProc event queue based MSI */ - msi0: msi { - compatible = "brcm,iproc-msi"; - msi-controller; - interrupt-parent = <&gic>; - interrupts = , - , - , - ; - }; - }; - - pcie1: pcie@18013000 { - compatible = "brcm,iproc-pcie"; - reg = <0x18013000 0x1000>; - - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; - - linux,pci-domain = <1>; - - bus-range = <0x00 0xff>; - - #address-cells = <3>; - #size-cells = <2>; - device_type = "pci"; - ranges = <0x81000000 0 0 0x48000000 0 0x00010000>, - <0x82000000 0 0x40000000 0x40000000 0 0x04000000>; - - phys = <&phy 1 6>; - phy-names = "pcie-phy"; - }; + #include + + pcie@18012000 { + compatible = "brcm,iproc-pcie"; + reg = <0x18012000 0x1000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; + + linux,pci-domain = <0>; + + bus-range = <0x00 0xff>; + + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + ranges = <0x81000000 0 0 0x28000000 0 0x00010000>, + <0x82000000 0 0x20000000 0x20000000 0 0x04000000>; + + phys = <&phy 0 5>; + phy-names = "pcie-phy"; + + brcm,pcie-ob; + brcm,pcie-ob-axi-offset = <0x00000000>; + + msi-parent = <&msi0>; + + /* iProc event queue based MSI */ + msi0: msi { + compatible = "brcm,iproc-msi"; + msi-controller; + interrupt-parent = <&gic>; + interrupts = , + , + , + ; + }; + }; + - | + pcie@18013000 { + compatible = "brcm,iproc-pcie"; + reg = <0x18013000 0x1000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; + + linux,pci-domain = <1>; + + bus-range = <0x00 0xff>; + + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + ranges = <0x81000000 0 0 0x48000000 0 0x00010000>, + <0x82000000 0 0x40000000 0x40000000 0 0x04000000>; + + phys = <&phy 1 6>; + phy-names = "pcie-phy"; }; -- cgit From e2745e634c6506ba02ea219b3b1c70a5ebc2cfb0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Sep 2023 10:53:41 -0500 Subject: dt-bindings: PCI: brcm,iproc-pcie: Drop common pci-bus properties Drop the unnecessary listing of properties already defined in pci-bus.yaml. Unless there are additional constraints, it is not necessary. Acked-by: Conor Dooley Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230926155351.31117-2-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/pci/brcm,iproc-pcie.yaml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml index 0cb5bd6cffa1..6730d68fedc7 100644 --- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml @@ -34,13 +34,6 @@ properties: description: > Base address and length of the PCIe controller I/O register space - interrupt-map: true - - interrupt-map-mask: true - - "#interrupt-cells": - const: 1 - ranges: minItems: 1 maxItems: 2 @@ -54,16 +47,8 @@ properties: items: - const: pcie-phy - bus-range: true - dma-coherent: true - "#address-cells": true - - "#size-cells": true - - device_type: true - brcm,pcie-ob: type: boolean description: > -- cgit From d6e201f88ae0ebeae82e16a2775ca301e07560d9 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Sep 2023 10:56:09 -0500 Subject: dt-bindings: PCI: brcm,iproc-pcie: Fix 'msi' child node schema The 'msi' child node schema is missing constraints on additional properties. It turns out it is incomplete and properties for it are documented in the parent node by mistake. Move the reference to msi-controller.yaml and the custom properties to the 'msi' node. Adding 'unevaluatedProperties' ensures all the properties in the 'msi' node are documented. With the schema corrected, a minimal interrupt controller node is needed to properly decode the interrupt properties since the example has multiple interrupt parents. Acked-by: Conor Dooley Reviewed-by: Florian Fainelli Fixes: 905b986d099c ("dt-bindings: pci: Convert iProc PCIe to YAML") Link: https://lore.kernel.org/r/20230926155613.33904-3-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/pci/brcm,iproc-pcie.yaml | 24 ++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml index 6730d68fedc7..0e07ab61a48d 100644 --- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml @@ -12,7 +12,6 @@ maintainers: allOf: - $ref: /schemas/pci/pci-bus.yaml# - - $ref: /schemas/interrupt-controller/msi-controller.yaml# properties: compatible: @@ -63,20 +62,24 @@ properties: msi: type: object + $ref: /schemas/interrupt-controller/msi-controller.yaml# + unevaluatedProperties: false + properties: compatible: items: - const: brcm,iproc-msi - msi-parent: true + interrupts: + maxItems: 4 - msi-controller: true + brcm,pcie-msi-inten: + type: boolean + description: + Needs to be present for some older iProc platforms that require the + interrupt enable registers to be set explicitly to enable MSI - brcm,pcie-msi-inten: - type: boolean - description: > - Needs to be present for some older iProc platforms that require the - interrupt enable registers to be set explicitly to enable MSI + msi-parent: true dependencies: brcm,pcie-ob-axi-offset: ["brcm,pcie-ob"] @@ -104,6 +107,11 @@ examples: - | #include + gic: interrupt-controller { + interrupt-controller; + #interrupt-cells = <3>; + }; + pcie@18012000 { compatible = "brcm,iproc-pcie"; reg = <0x18012000 0x1000>; -- cgit From 19007c629c63c76ae0f8adee9dc076bda4788b46 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 4 Oct 2023 18:39:27 +0200 Subject: dt-bindings: trivial-devices: Fix MEMSIC MXC4005 compatible string The correct name of this chip is MXC4005, not MX4005. This is confirmed both by the manufacturer website and by the title of the original commit, which added other MXCxxxx devices as well but only this one misses a "c" in the compatible string. Signed-off-by: Luca Ceresoli Fixes: d9bf5d37fd58 ("dt-bindings:trivial-devices: Add memsic,mxc4005/mxc6255/mxc6655 entries") Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231004-mxc4005-device-tree-support-v1-1-e7c0faea72e4@bootlin.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/trivial-devices.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index cd58179ae337..430a814f64a5 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -232,7 +232,7 @@ properties: # MEMSIC magnetometer - memsic,mmc35240 # MEMSIC 3-axis accelerometer - - memsic,mx4005 + - memsic,mxc4005 # MEMSIC 2-axis 8-bit digital accelerometer - memsic,mxc6225 # MEMSIC 2-axis 8-bit digital accelerometer -- cgit From cb3a7f63e99f849a3382cb0a2d29ca8a90d22ec7 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 3 Oct 2023 13:13:43 +0200 Subject: MAINTAINERS: Add Angelo as MediaTek SoC co-maintainer I am a top reviewer mainly for MediaTek SoC related patches in most subsystems and I've also been upstreaming both improvements, fixes and new drivers and devicetrees when required. The MediaTek scene saw a generous increase in number of patches that are sent to the lists every week, increasing the amount of required efforts to maintain the MTK bits overall, and we will possibly see even more of that. For this reason, and also because of suggestions and encouragement coming from the community, I'm stepping up to be a co-maintainer of MediaTek SoCs support. Acked-by: Matthias Brugger Signed-off-by: AngeloGioacchino Del Regno Acked-by: Mathieu Poirier Link: https://lore.kernel.org/r/20230929082009.71843-1-angelogioacchino.delregno@collabora.com Link: https://lore.kernel.org/r/20231003-mediatek-fixes-v6-7-v1-1-dad7cd62a8ff@collabora.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b19995690904..b6a1b7a86875 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2327,7 +2327,7 @@ F: drivers/rtc/rtc-mt7622.c ARM/Mediatek SoC support M: Matthias Brugger -R: AngeloGioacchino Del Regno +M: AngeloGioacchino Del Regno L: linux-kernel@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) -- cgit From 25389c03c21c9587dd21c768d1cbfa514a3ca211 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 3 Oct 2023 13:13:44 +0200 Subject: arm64: dts: mediatek: mt8195-demo: fix the memory size to 8GB The onboard dram of mt8195-demo board is 8GB. Cc: stable@vger.kernel.org # 6.1, 6.4, 6.5 Fixes: 6147314aeedc ("arm64: dts: mediatek: Add device-tree for MT8195 Demo board") Signed-off-by: Macpaul Lin Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230905034511.11232-1-macpaul.lin@mediatek.com Link: https://lore.kernel.org/r/20231003-mediatek-fixes-v6-7-v1-2-dad7cd62a8ff@collabora.com Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index b2485ddfd33b..ff363ab925e9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -48,7 +48,7 @@ memory@40000000 { device_type = "memory"; - reg = <0 0x40000000 0 0x80000000>; + reg = <0 0x40000000 0x2 0x00000000>; }; reserved-memory { -- cgit From 6cd2a30b96a4b2d270bc1ef1611429dc3fa63327 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 3 Oct 2023 13:13:45 +0200 Subject: arm64: dts: mediatek: mt8195-demo: update and reorder reserved memory regions The dts file of the MediaTek MT8195 demo board has been updated to include new reserved memory regions. These reserved memory regions are: - SCP - VPU, - Sound DMA - APU. These regions are defined with the "shared-dma-pool" compatible property. In addition, the existing reserved memory regions have been reordered by their addresses to improve readability and maintainability of the DTS file. Cc: stable@vger.kernel.org # 6.1, 6.4, 6.5 Fixes: e4a417520101 ("arm64: dts: mediatek: mt8195-demo: fix the memory size of node secmon") Signed-off-by: Macpaul Lin Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230905034511.11232-2-macpaul.lin@mediatek.com Link: https://lore.kernel.org/r/20231003-mediatek-fixes-v6-7-v1-3-dad7cd62a8ff@collabora.com Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 37 ++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index ff363ab925e9..5d635085fe3f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -56,13 +56,8 @@ #size-cells = <2>; ranges; - /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ - bl31_secmon_reserved: secmon@54600000 { - no-map; - reg = <0 0x54600000 0x0 0x200000>; - }; - - /* 12 MiB reserved for OP-TEE (BL32) + /* + * 12 MiB reserved for OP-TEE (BL32) * +-----------------------+ 0x43e0_0000 * | SHMEM 2MiB | * +-----------------------+ 0x43c0_0000 @@ -75,6 +70,34 @@ no-map; reg = <0 0x43200000 0 0x00c00000>; }; + + scp_mem: memory@50000000 { + compatible = "shared-dma-pool"; + reg = <0 0x50000000 0 0x2900000>; + no-map; + }; + + vpu_mem: memory@53000000 { + compatible = "shared-dma-pool"; + reg = <0 0x53000000 0 0x1400000>; /* 20 MB */ + }; + + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ + bl31_secmon_mem: memory@54600000 { + no-map; + reg = <0 0x54600000 0x0 0x200000>; + }; + + snd_dma_mem: memory@60000000 { + compatible = "shared-dma-pool"; + reg = <0 0x60000000 0 0x1100000>; + no-map; + }; + + apu_mem: memory@62000000 { + compatible = "shared-dma-pool"; + reg = <0 0x62000000 0 0x1400000>; /* 20 MB */ + }; }; }; -- cgit From 963c3b0c47ec29b4c49c9f45965cd066f419d17f Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 3 Oct 2023 13:13:46 +0200 Subject: arm64: dts: mediatek: fix t-phy unit name dtbs_check throws a warning at t-phy nodes: Warning (unit_address_vs_reg): /t-phy@1a243000: node has a unit name, but no reg or ranges property Warning (unit_address_vs_reg): /soc/t-phy@11c00000: node has a unit name, but no reg or ranges property The ranges is empty thus removing the `@1a243000`, `@11c00000` from the node name. Fixes: 6029cae696c8 ("arm64: dts: mediatek: mt7622: harmonize node names and compatibles") Fixes: 918aed7abd2d ("arm64: dts: mt7986: add pcie related device nodes") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814093931.9298-2-eugen.hristev@collabora.com Link: https://lore.kernel.org/r/20231003-mediatek-fixes-v6-7-v1-4-dad7cd62a8ff@collabora.com Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 36ef2dbe8add..3ee9266fa8e9 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -905,7 +905,7 @@ status = "disabled"; }; - sata_phy: t-phy@1a243000 { + sata_phy: t-phy { compatible = "mediatek,mt7622-tphy", "mediatek,generic-tphy-v1"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 68539ea788df..24eda00e320d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -434,7 +434,7 @@ }; }; - pcie_phy: t-phy@11c00000 { + pcie_phy: t-phy { compatible = "mediatek,mt7986-tphy", "mediatek,generic-tphy-v2"; #address-cells = <2>; -- cgit From d192615c307ec9f74cd0582880ece698533eb99b Mon Sep 17 00:00:00 2001 From: "Nícolas F. R. A. Prado" Date: Tue, 3 Oct 2023 13:13:47 +0200 Subject: arm64: dts: mediatek: mt8195: Set DSU PMU status to fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DSU PMU allows monitoring performance events in the DSU cluster, which is done by configuring and reading back values from the DSU PMU system registers. However, for write-access to be allowed by ELs lower than EL3, the EL3 firmware needs to update the setting on the ACTLR3_EL3 register, as it is disallowed by default. That configuration is not done on the firmware used by the MT8195 SoC, as a consequence, booting a MT8195-based machine like mt8195-cherry-tomato-r2 with CONFIG_ARM_DSU_PMU enabled hangs the kernel just as it writes to the CLUSTERPMOVSCLR_EL1 register, since the instruction faults to EL3, and BL31 apparently just re-runs the instruction over and over. Mark the DSU PMU node in the Devicetree with status "fail", as the machine doesn't have a suitable firmware to make use of it from the kernel, and allowing its driver to probe would hang the kernel. Fixes: 37f2582883be ("arm64: dts: Add mediatek SoC mt8195 and evaluation board") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230720200753.322133-1-nfraprado@collabora.com Link: https://lore.kernel.org/r/20231003-mediatek-fixes-v6-7-v1-5-dad7cd62a8ff@collabora.com Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 4dbbf8fdab75..43011bc41da7 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -313,6 +313,7 @@ interrupts = ; cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>; + status = "fail"; }; dmic_codec: dmic-codec { -- cgit From aba0e909dc20eceb1de985474af459f82e7b0b82 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 5 Oct 2023 15:50:16 +0300 Subject: devlink: Hold devlink lock on health reporter dump get Devlink health dump get callback should take devlink lock as any other devlink callback. Otherwise, since devlink_mutex was removed, this callback is not protected from a race of the reporter being destroyed while handling the callback. Add devlink lock to the callback and to any call for devlink_health_do_dump(). This should be safe as non of the drivers dump callback implementation takes devlink lock. As devlink lock is added to any callback of dump, the reporter dump_lock is now redundant and can be removed. Fixes: d3efc2a6a6d8 ("net: devlink: remove devlink_mutex") Signed-off-by: Moshe Shemesh Reviewed-by: Jiri Pirko Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/1696510216-189379-1-git-send-email-moshe@nvidia.com Signed-off-by: Jakub Kicinski --- net/devlink/health.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/net/devlink/health.c b/net/devlink/health.c index 638cad8d5c65..51e6e81e31bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -58,7 +58,6 @@ struct devlink_health_reporter { struct devlink *devlink; struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; - struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; bool auto_recover; bool auto_dump; @@ -125,7 +124,6 @@ __devlink_health_reporter_create(struct devlink *devlink, reporter->graceful_period = graceful_period; reporter->auto_recover = !!ops->recover; reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); return reporter; } @@ -226,7 +224,6 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); static void devlink_health_reporter_free(struct devlink_health_reporter *reporter) { - mutex_destroy(&reporter->dump_lock); if (reporter->dump_fmsg) devlink_fmsg_free(reporter->dump_fmsg); kfree(reporter); @@ -625,10 +622,10 @@ int devlink_health_report(struct devlink_health_reporter *reporter, } if (reporter->auto_dump) { - mutex_lock(&reporter->dump_lock); + devl_lock(devlink); /* store current dump of current error, for later analysis */ devlink_health_do_dump(reporter, priv_ctx, NULL); - mutex_unlock(&reporter->dump_lock); + devl_unlock(devlink); } if (!reporter->auto_recover) @@ -1262,7 +1259,7 @@ out: } static struct devlink_health_reporter * -devlink_health_reporter_get_from_cb(struct netlink_callback *cb) +devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); struct devlink_health_reporter *reporter; @@ -1272,10 +1269,12 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) return NULL; - devl_unlock(devlink); reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); - devlink_put(devlink); + if (!reporter) { + devl_unlock(devlink); + devlink_put(devlink); + } return reporter; } @@ -1284,16 +1283,20 @@ int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_health_reporter *reporter; + struct devlink *devlink; int err; - reporter = devlink_health_reporter_get_from_cb(cb); + reporter = devlink_health_reporter_get_from_cb_lock(cb); if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + devlink = reporter->devlink; + if (!reporter->ops->dump) { + devl_unlock(devlink); + devlink_put(devlink); return -EOPNOTSUPP; + } - mutex_lock(&reporter->dump_lock); if (!state->idx) { err = devlink_health_do_dump(reporter, NULL, cb->extack); if (err) @@ -1309,7 +1312,8 @@ int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET); unlock: - mutex_unlock(&reporter->dump_lock); + devl_unlock(devlink); + devlink_put(devlink); return err; } @@ -1326,9 +1330,7 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, if (!reporter->ops->dump) return -EOPNOTSUPP; - mutex_lock(&reporter->dump_lock); devlink_health_dump_clear(reporter); - mutex_unlock(&reporter->dump_lock); return 0; } -- cgit From 3da5d2de92387a8322965c7fb1365f7cae690e5a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 6 Oct 2023 19:05:57 -0400 Subject: MAINTAINERS: update the dm-devel mailing list dm-devel@redhat.com has migrated to dm-devel@lists.linux.dev Signed-off-by: Mike Snitzer --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bf0f54c24f81..8b17e29a75c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5985,8 +5985,8 @@ F: include/linux/devm-helpers.h DEVICE-MAPPER (LVM) M: Alasdair Kergon M: Mike Snitzer -M: dm-devel@redhat.com -L: dm-devel@redhat.com +M: dm-devel@lists.linux.dev +L: dm-devel@lists.linux.dev S: Maintained W: http://sources.redhat.com/dm Q: http://patchwork.kernel.org/project/dm-devel/list/ -- cgit From e6864af61493113558c502b5cd0d754c19b93277 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 5 Oct 2023 10:12:00 +0900 Subject: ravb: Fix up dma_free_coherent() call in ravb_remove() In ravb_remove(), dma_free_coherent() should be call after unregister_netdev(). Otherwise, this controller is possible to use the freed buffer. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20231005011201.14368-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 7df9f9f8e134..9e2e801049cc 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2891,8 +2891,6 @@ static int ravb_remove(struct platform_device *pdev) clk_disable_unprepare(priv->gptp_clk); clk_disable_unprepare(priv->refclk); - dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, - priv->desc_bat_dma); /* Set reset mode */ ravb_write(ndev, CCC_OPC_RESET, CCC); unregister_netdev(ndev); @@ -2900,6 +2898,8 @@ static int ravb_remove(struct platform_device *pdev) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); ravb_mdio_release(priv); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, + priv->desc_bat_dma); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(priv->rstc); -- cgit From 3971442870713de527684398416970cf025b4f89 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 5 Oct 2023 10:12:01 +0900 Subject: ravb: Fix use-after-free issue in ravb_tx_timeout_work() The ravb_stop() should call cancel_work_sync(). Otherwise, ravb_tx_timeout_work() is possible to use the freed priv after ravb_remove() was called like below: CPU0 CPU1 ravb_tx_timeout() ravb_remove() unregister_netdev() free_netdev(ndev) // free priv ravb_tx_timeout_work() // use priv unregister_netdev() will call .ndo_stop() so that ravb_stop() is called. And, after phy_stop() is called, netif_carrier_off() is also called. So that .ndo_tx_timeout() will not be called after phy_stop(). Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reported-by: Zheng Wang Closes: https://lore.kernel.org/netdev/20230725030026.1664873-1-zyytlz.wz@163.com/ Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20231005011201.14368-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9e2e801049cc..0ef0b88b7145 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2167,6 +2167,8 @@ static int ravb_close(struct net_device *ndev) of_phy_deregister_fixed_link(np); } + cancel_work_sync(&priv->work); + if (info->multi_irqs) { free_irq(priv->tx_irqs[RAVB_NC], ndev); free_irq(priv->rx_irqs[RAVB_NC], ndev); -- cgit From a4fe78386afb94780f8e6fcd10a67c4d4dfe4da8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:49 +0200 Subject: bpf: Fix BPF_PROG_QUERY last field check While working on the ebpf-go [0] library integration for bpf_mprog and tcx, Lorenz noticed that two subsequent BPF_PROG_QUERY requests currently fail. A typical workflow is to first gather the bpf_mprog count without passing program/ link arrays, followed by the second request which contains the actual array pointers. The initial call populates count and revision fields. The second call gets rejected due to a BPF_PROG_QUERY_LAST_FIELD bug which should point to query.revision instead of query.link_attach_flags since the former is really the last member. It was not noticed in libbpf as bpf_prog_query_opts() always calls bpf(2) with an on-stack bpf_attr that is memset() each time (and therefore query.revision was reset to zero). [0] https://ebpf-go.dev Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support") Reported-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index eb01c31ed591..453a43695a23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3913,7 +3913,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } -#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags +#define BPF_PROG_QUERY_LAST_FIELD query.revision static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) -- cgit From edfa9af0a73ecc2000d7bb81d0b0fd3158cc9a65 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:50 +0200 Subject: bpf: Handle bpf_mprog_query with NULL entry Improve consistency for bpf_mprog_query() API and let the latter also handle a NULL entry as can be the case for tcx. Instead of returning -ENOENT, we copy a count of 0 and revision of 1 to user space, so that this can be fed into a subsequent bpf_mprog_attach() call as expected_revision. A BPF self- test as part of this series has been added to assert this case. Suggested-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- kernel/bpf/mprog.c | 10 ++++++---- kernel/bpf/tcx.c | 8 +------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index 007d98c799e2..1394168062e8 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -401,14 +401,16 @@ int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, struct bpf_mprog_cp *cp; struct bpf_prog *prog; const u32 flags = 0; + u32 id, count = 0; + u64 revision = 1; int i, ret = 0; - u32 id, count; - u64 revision; if (attr->query.query_flags || attr->query.attach_flags) return -EINVAL; - revision = bpf_mprog_revision(entry); - count = bpf_mprog_total(entry); + if (entry) { + revision = bpf_mprog_revision(entry); + count = bpf_mprog_total(entry); + } if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) return -EFAULT; if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c index 13f0b5dc8262..1338a13a8b64 100644 --- a/kernel/bpf/tcx.c +++ b/kernel/bpf/tcx.c @@ -123,7 +123,6 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { bool ingress = attr->query.attach_type == BPF_TCX_INGRESS; struct net *net = current->nsproxy->net_ns; - struct bpf_mprog_entry *entry; struct net_device *dev; int ret; @@ -133,12 +132,7 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) ret = -ENODEV; goto out; } - entry = tcx_entry_fetch(dev, ingress); - if (!entry) { - ret = -ENOENT; - goto out; - } - ret = bpf_mprog_query(attr, uattr, entry); + ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress)); out: rtnl_unlock(); return ret; -- cgit From ba62d61128bda71fd02622f320ac59d861fc4baa Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Sat, 7 Oct 2023 00:06:51 +0200 Subject: bpf: Refuse unused attributes in bpf_prog_{attach,detach} The recently added tcx attachment extended the BPF UAPI for attaching and detaching by a couple of fields. Those fields are currently only supported for tcx, other types like cgroups and flow dissector silently ignore the new fields except for the new flags. This is problematic once we extend bpf_mprog to older attachment types, since it's hard to figure out whether the syscall really was successful if the kernel silently ignores non-zero values. Explicitly reject non-zero fields relevant to bpf_mprog for attachment types which don't use the latter yet. Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support") Signed-off-by: Lorenz Bauer Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-3-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- kernel/bpf/syscall.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 453a43695a23..d77b2f8b9364 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3796,7 +3796,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; struct bpf_prog *prog; - u32 mask; int ret; if (CHECK_ATTR(BPF_PROG_ATTACH)) @@ -3805,10 +3804,16 @@ static int bpf_prog_attach(const union bpf_attr *attr) ptype = attach_type_to_prog_type(attr->attach_type); if (ptype == BPF_PROG_TYPE_UNSPEC) return -EINVAL; - mask = bpf_mprog_supported(ptype) ? - BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE; - if (attr->attach_flags & ~mask) - return -EINVAL; + if (bpf_mprog_supported(ptype)) { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) + return -EINVAL; + } else { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) + return -EINVAL; + if (attr->relative_fd || + attr->expected_revision) + return -EINVAL; + } prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); if (IS_ERR(prog)) @@ -3878,6 +3883,10 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); } + } else if (attr->attach_flags || + attr->relative_fd || + attr->expected_revision) { + return -EINVAL; } switch (ptype) { -- cgit From f9b08790fa695f6304c2ad531bf9d249c63b5c33 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:52 +0200 Subject: selftests/bpf: Test bpf_mprog query API via libbpf and raw syscall Add a new test case which performs double query of the bpf_mprog through libbpf API, but also via raw bpf(2) syscall. This is testing to gather first the count and then in a subsequent probe the full information with the program array without clearing passed structs in between. # ./vmtest.sh -- ./test_progs -t tc_opts [...] ./test_progs -t tc_opts [ 1.398818] tsc: Refined TSC clocksource calibration: 3407.999 MHz [ 1.400263] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fd336761, max_idle_ns: 440795243819 ns [ 1.402734] clocksource: Switched to clocksource tsc [ 1.426639] bpf_testmod: loading out-of-tree module taints kernel. [ 1.428112] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #252 tc_opts_after:OK #253 tc_opts_append:OK #254 tc_opts_basic:OK #255 tc_opts_before:OK #256 tc_opts_chain_classic:OK #257 tc_opts_chain_mixed:OK #258 tc_opts_delete_empty:OK #259 tc_opts_demixed:OK #260 tc_opts_detach:OK #261 tc_opts_detach_after:OK #262 tc_opts_detach_before:OK #263 tc_opts_dev_cleanup:OK #264 tc_opts_invalid:OK #265 tc_opts_max:OK #266 tc_opts_mixed:OK #267 tc_opts_prepend:OK #268 tc_opts_query:OK <--- (new test) #269 tc_opts_replace:OK #270 tc_opts_revision:OK Summary: 19/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-4-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 167 +++++++++++++++++++++++ 1 file changed, 167 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 99af79ea21a9..aeec10bb3396 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2462,3 +2462,170 @@ void serial_test_tc_opts_max(void) test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true); test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false); } + +static void test_tc_opts_query_target(int target) +{ + const size_t attr_size = offsetofend(union bpf_attr, query); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + union bpf_attr attr; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + /* Test 1: Double query via libbpf API */ + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids, NULL, "prog_ids"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + /* Test 2: Double query via bpf_attr & bpf(2) directly */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query(void) +{ + test_tc_opts_query_target(BPF_TCX_INGRESS); + test_tc_opts_query_target(BPF_TCX_EGRESS); +} -- cgit From b77368269dda3f5d09c3ffa1b59021f6b74a2fa2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:53 +0200 Subject: selftests/bpf: Adapt assert_mprog_count to always expect 0 count Simplify __assert_mprog_count() to remove the -ENOENT corner case as the bpf_prog_query() now returns 0 when no bpf_mprog is attached. This also allows to convert a few test cases from using raw __assert_mprog_count() over to plain assert_mprog_count() helper. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-5-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/tc_helpers.h | 11 ++++------- tools/testing/selftests/bpf/prog_tests/tc_links.c | 2 +- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 6 +++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 6c93215be8a3..c88dce27a958 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -45,7 +45,7 @@ static inline __u32 ifindex_from_link_fd(int fd) return link_info.tcx.ifindex; } -static inline void __assert_mprog_count(int target, int expected, bool miniq, int ifindex) +static inline void __assert_mprog_count(int target, int expected, int ifindex) { __u32 count = 0, attach_flags = 0; int err; @@ -53,20 +53,17 @@ static inline void __assert_mprog_count(int target, int expected, bool miniq, in err = bpf_prog_query(ifindex, target, 0, &attach_flags, NULL, &count); ASSERT_EQ(count, expected, "count"); - if (!expected && !miniq) - ASSERT_EQ(err, -ENOENT, "prog_query"); - else - ASSERT_EQ(err, 0, "prog_query"); + ASSERT_EQ(err, 0, "prog_query"); } static inline void assert_mprog_count(int target, int expected) { - __assert_mprog_count(target, expected, false, loopback); + __assert_mprog_count(target, expected, loopback); } static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected) { - __assert_mprog_count(target, expected, false, ifindex); + __assert_mprog_count(target, expected, ifindex); } #endif /* TC_HELPERS */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 74fc1fe9ee26..073fbdbea968 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -1667,7 +1667,7 @@ static void test_tc_chain_mixed(int target) if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); ASSERT_OK(system(ping_cmd), ping_cmd); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index aeec10bb3396..2174bea3427e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -635,7 +635,7 @@ cleanup_detach: if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, chain_tc_old, loopback); + assert_mprog_count(target, 0); cleanup: if (tc_attached) { tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; @@ -2250,7 +2250,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old) BPF_TC_INGRESS : BPF_TC_EGRESS; err = bpf_tc_hook_create(&tc_hook); ASSERT_OK(err, "bpf_tc_hook_create"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); } err = bpf_prog_detach_opts(0, loopback, target, &optd); ASSERT_EQ(err, -ENOENT, "prog_detach"); @@ -2352,7 +2352,7 @@ static void test_tc_chain_mixed(int target) cleanup_opts: err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); ASSERT_OK(system(ping_cmd), ping_cmd); -- cgit From 685446b0629b53a7574886fde40126d47c51d7d4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:54 +0200 Subject: selftests/bpf: Test query on empty mprog and pass revision into attach Add a new test case to query on an empty bpf_mprog and pass the revision directly into expected_revision for attachment to assert that this does succeed. ./test_progs -t tc_opts [ 1.406778] tsc: Refined TSC clocksource calibration: 3407.990 MHz [ 1.408863] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fcaf6eb0, max_idle_ns: 440795321766 ns [ 1.412419] clocksource: Switched to clocksource tsc [ 1.428671] bpf_testmod: loading out-of-tree module taints kernel. [ 1.430260] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #252 tc_opts_after:OK #253 tc_opts_append:OK #254 tc_opts_basic:OK #255 tc_opts_before:OK #256 tc_opts_chain_classic:OK #257 tc_opts_chain_mixed:OK #258 tc_opts_delete_empty:OK #259 tc_opts_demixed:OK #260 tc_opts_detach:OK #261 tc_opts_detach_after:OK #262 tc_opts_detach_before:OK #263 tc_opts_dev_cleanup:OK #264 tc_opts_invalid:OK #265 tc_opts_max:OK #266 tc_opts_mixed:OK #267 tc_opts_prepend:OK #268 tc_opts_query:OK #269 tc_opts_query_attach:OK <--- (new test) #270 tc_opts_replace:OK #271 tc_opts_revision:OK Summary: 20/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-6-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 2174bea3427e..ba91b0226839 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2629,3 +2629,62 @@ void serial_test_tc_opts_query(void) test_tc_opts_query_target(BPF_TCX_INGRESS); test_tc_opts_query_target(BPF_TCX_EGRESS); } + +static void test_tc_opts_query_attach_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct test_tc_link *skel; + __u32 prog_ids[2]; + __u32 fd1, id1; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + id1 = id_from_prog_fd(fd1); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 0, "count"); + ASSERT_EQ(optq.revision, 1, "revision"); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = optq.revision, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query_attach(void) +{ + test_tc_opts_query_attach_target(BPF_TCX_INGRESS); + test_tc_opts_query_attach_target(BPF_TCX_EGRESS); +} -- cgit From 37345b8535b44daa4021426fa0ea8d6ed6142112 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Oct 2023 00:06:55 +0200 Subject: selftests/bpf: Make seen_tc* variable tests more robust Martin reported that on his local dev machine the test_tc_chain_mixed() fails as "test_tc_chain_mixed:FAIL:seen_tc5 unexpected seen_tc5: actual 1 != expected 0" and others occasionally, too. However, when running in a more isolated setup (qemu in particular), it works fine for him. The reason is that there is a small race-window where seen_tc* could turn into true for various test cases when there is background traffic, e.g. after the asserts they often get reset. In such case when subsequent detach takes place, unrelated background traffic could have already flipped the bool to true beforehand. Add a small helper tc_skel_reset_all_seen() to reset all bools before we do the ping test. At this point, everything is set up as expected and therefore no race can occur. All tc_{opts,links} tests continue to pass after this change. Reported-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20231006220655.1653-7-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/tc_helpers.h | 5 ++ tools/testing/selftests/bpf/prog_tests/tc_links.c | 62 ++++++++-------------- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 39 +++++++------- 3 files changed, 46 insertions(+), 60 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index c88dce27a958..67f985f7d215 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -66,4 +66,9 @@ static inline void assert_mprog_count_ifindex(int ifindex, int target, int expec __assert_mprog_count(target, expected, ifindex); } +static inline void tc_skel_reset_all_seen(struct test_tc_link *skel) +{ + memset(skel->bss, 0, sizeof(*skel->bss)); +} + #endif /* TC_HELPERS */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 073fbdbea968..bc9841144685 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -65,6 +65,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -97,6 +98,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -187,6 +189,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -194,9 +197,6 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -246,6 +246,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -342,6 +343,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -349,9 +351,6 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, .relative_fd = bpf_program__fd(skel->progs.tc1), @@ -401,6 +400,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -502,6 +502,7 @@ static void test_tc_links_revision_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -581,22 +582,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -707,16 +706,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_REPLACE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -781,16 +777,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "link_detach")) goto cleanup; @@ -812,16 +805,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1); if (!ASSERT_OK(err, "link_update_self")) goto cleanup; @@ -843,6 +833,7 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1254,6 +1245,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1261,9 +1253,6 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, ); @@ -1311,6 +1300,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1411,6 +1401,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1418,9 +1409,6 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, ); @@ -1468,6 +1456,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1637,38 +1626,33 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4); if (!ASSERT_OK(err, "link_update")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__detach(skel->links.tc6); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); @@ -1758,22 +1742,20 @@ static void test_tc_links_ingress(int target, bool chain_tc_old, assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index ba91b0226839..ca506d2fcf58 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -59,6 +59,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -83,6 +84,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -163,6 +165,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -219,6 +222,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -313,6 +317,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -369,6 +374,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -514,6 +520,7 @@ static void test_tc_opts_revision_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -608,22 +615,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_prog_detach_opts(fd2, loopback, target, &optd); if (!ASSERT_OK(err, "prog_detach")) goto cleanup_detach; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -730,16 +735,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]"); ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd2, @@ -767,16 +769,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE | BPF_F_BEFORE, .replace_prog_fd = fd3, @@ -805,6 +804,7 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1084,6 +1084,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1124,6 +1125,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1222,6 +1224,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1262,6 +1265,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -2316,16 +2320,13 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd3, @@ -2339,21 +2340,19 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - cleanup_opts: err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); -- cgit From 81a61051e0ce5fd7e09225c0d5985da08c7954a7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 5 Oct 2023 10:56:42 +0300 Subject: serial: core: Fix checks for tx runtime PM state Maximilian reported that surface_serial_hub serdev tx does not work during system suspend. During system suspend, runtime PM gets disabled in __device_suspend_late(), and tx is unable to wake-up the serial core port device that we use to check if tx is safe to start. Johan summarized the regression noting that serdev tx no longer always works as earlier when the serdev device is runtime PM active. The serdev device and the serial core controller devices are siblings of the serial port hardware device. The runtime PM usage count from serdev device does not propagate to the serial core device siblings, it only propagates to the parent. In addition to the tx issue for suspend, testing for the serial core port device can cause an unnecessary delay in enabling tx while waiting for the serial core port device to wake-up. The serial core port device wake-up is only needed to flush pending tx when the serial port hardware device was in runtime PM suspended state. To fix the regression, we need to check the runtime PM state of the parent serial port hardware device for tx instead of the serial core port device. As the serial port device drivers may or may not implement runtime PM, we need to also add a check for pm_runtime_enabled(). Reported-by: Maximilian Luz Cc: stable Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Signed-off-by: Tony Lindgren Tested-by: Maximilian Luz Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231005075644.25936-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ca26a8aef2cb..d5ba6e90bd95 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -156,7 +156,7 @@ static void __uart_start(struct uart_state *state) * enabled, serial_port_runtime_resume() calls start_tx() again * after enabling the device. */ - if (pm_runtime_active(&port_dev->dev)) + if (!pm_runtime_enabled(port->dev) || pm_runtime_active(port->dev)) port->ops->start_tx(port); pm_runtime_mark_last_busy(&port_dev->dev); pm_runtime_put_autosuspend(&port_dev->dev); -- cgit From db712c0089bd8e9e47c286ed772d86fb187d0854 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 6 Oct 2023 13:10:58 +0100 Subject: dt-bindings: interrupt-controller: renesas,rzg2l-irqc: Document RZ/G2UL SoC Document RZ/G2UL (R9A07G043U) IRQC bindings. The IRQC block on RZ/G2UL SoC is almost identical to one found on the RZ/G2L SoC the only difference being it can support BUS_ERR_INT for which it has additional registers. Hence new generic compatible string "renesas,r9a07g043u-irqc" is added for RZ/G2UL SoC. Now that we have additional interrupt for RZ/G2UL and RZ/Five SoC interrupt-names property is added so that we can parse them based on names. While at it updated the example node to four spaces and added interrupt-names property. Signed-off-by: Lad Prabhakar Reviewed-by: Rob Herring Reviewed-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231006121058.13890-1-prabhakar.mahadev-lad.rj@bp.renesas.com --- .../interrupt-controller/renesas,rzg2l-irqc.yaml | 225 ++++++++++++++++----- 1 file changed, 170 insertions(+), 55 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml index ea7db3618b23..2ef3081eaaf3 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml @@ -19,13 +19,11 @@ description: | - NMI edge select (NMI is not treated as NMI exception and supports fall edge and stand-up edge detection interrupts) -allOf: - - $ref: /schemas/interrupt-controller.yaml# - properties: compatible: items: - enum: + - renesas,r9a07g043u-irqc # RZ/G2UL - renesas,r9a07g044-irqc # RZ/G2{L,LC} - renesas,r9a07g054-irqc # RZ/V2L - const: renesas,rzg2l-irqc @@ -45,7 +43,96 @@ properties: maxItems: 1 interrupts: - maxItems: 41 + minItems: 41 + items: + - description: NMI interrupt + - description: IRQ0 interrupt + - description: IRQ1 interrupt + - description: IRQ2 interrupt + - description: IRQ3 interrupt + - description: IRQ4 interrupt + - description: IRQ5 interrupt + - description: IRQ6 interrupt + - description: IRQ7 interrupt + - description: GPIO interrupt, TINT0 + - description: GPIO interrupt, TINT1 + - description: GPIO interrupt, TINT2 + - description: GPIO interrupt, TINT3 + - description: GPIO interrupt, TINT4 + - description: GPIO interrupt, TINT5 + - description: GPIO interrupt, TINT6 + - description: GPIO interrupt, TINT7 + - description: GPIO interrupt, TINT8 + - description: GPIO interrupt, TINT9 + - description: GPIO interrupt, TINT10 + - description: GPIO interrupt, TINT11 + - description: GPIO interrupt, TINT12 + - description: GPIO interrupt, TINT13 + - description: GPIO interrupt, TINT14 + - description: GPIO interrupt, TINT15 + - description: GPIO interrupt, TINT16 + - description: GPIO interrupt, TINT17 + - description: GPIO interrupt, TINT18 + - description: GPIO interrupt, TINT19 + - description: GPIO interrupt, TINT20 + - description: GPIO interrupt, TINT21 + - description: GPIO interrupt, TINT22 + - description: GPIO interrupt, TINT23 + - description: GPIO interrupt, TINT24 + - description: GPIO interrupt, TINT25 + - description: GPIO interrupt, TINT26 + - description: GPIO interrupt, TINT27 + - description: GPIO interrupt, TINT28 + - description: GPIO interrupt, TINT29 + - description: GPIO interrupt, TINT30 + - description: GPIO interrupt, TINT31 + - description: Bus error interrupt + + interrupt-names: + minItems: 41 + items: + - const: nmi + - const: irq0 + - const: irq1 + - const: irq2 + - const: irq3 + - const: irq4 + - const: irq5 + - const: irq6 + - const: irq7 + - const: tint0 + - const: tint1 + - const: tint2 + - const: tint3 + - const: tint4 + - const: tint5 + - const: tint6 + - const: tint7 + - const: tint8 + - const: tint9 + - const: tint10 + - const: tint11 + - const: tint12 + - const: tint13 + - const: tint14 + - const: tint15 + - const: tint16 + - const: tint17 + - const: tint18 + - const: tint19 + - const: tint20 + - const: tint21 + - const: tint22 + - const: tint23 + - const: tint24 + - const: tint25 + - const: tint26 + - const: tint27 + - const: tint28 + - const: tint29 + - const: tint30 + - const: tint31 + - const: bus-err clocks: maxItems: 2 @@ -73,6 +160,23 @@ required: - power-domains - resets +allOf: + - $ref: /schemas/interrupt-controller.yaml# + + - if: + properties: + compatible: + contains: + const: renesas,r9a07g043u-irqc + then: + properties: + interrupts: + minItems: 42 + interrupt-names: + minItems: 42 + required: + - interrupt-names + unevaluatedProperties: false examples: @@ -81,55 +185,66 @@ examples: #include irqc: interrupt-controller@110a0000 { - compatible = "renesas,r9a07g044-irqc", "renesas,rzg2l-irqc"; - reg = <0x110a0000 0x10000>; - #interrupt-cells = <2>; - #address-cells = <0>; - interrupt-controller; - interrupts = , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - ; - clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, - <&cpg CPG_MOD R9A07G044_IA55_PCLK>; - clock-names = "clk", "pclk"; - power-domains = <&cpg>; - resets = <&cpg R9A07G044_IA55_RESETN>; + compatible = "renesas,r9a07g044-irqc", "renesas,rzg2l-irqc"; + reg = <0x110a0000 0x10000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31"; + clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, + <&cpg CPG_MOD R9A07G044_IA55_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G044_IA55_RESETN>; }; -- cgit From 977f7c2b275667777cd42ab0e61461617b652b05 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 27 Jul 2023 10:36:23 +0200 Subject: dt-bindings: interrupt-controller: renesas,irqc: Add r8a779f0 support Document support for the Interrupt Controller for External Devices (INT-EX) in the Renesas R-Car S4-8 (R8A779F0) SoC. Signed-off-by: Geert Uytterhoeven Reviewed-by: Kieran Bingham Reviewed-by: Yoshihiro Shimoda Acked-by: Conor Dooley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/9467a1c67d5d240211f88336973fa968d39cc860.1690446928.git.geert+renesas@glider.be --- Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml index 95033cb514fb..b417341fc8ae 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml @@ -37,6 +37,7 @@ properties: - renesas,intc-ex-r8a77990 # R-Car E3 - renesas,intc-ex-r8a77995 # R-Car D3 - renesas,intc-ex-r8a779a0 # R-Car V3U + - renesas,intc-ex-r8a779f0 # R-Car S4-8 - renesas,intc-ex-r8a779g0 # R-Car V4H - const: renesas,irqc -- cgit From 5e5c636c69bdba04033161bbb111fbb6f1f6661e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 6 Oct 2023 14:59:25 +0200 Subject: dt-bindings: interrupt-controller: arm,gic-v3: Add dma-noncoherent property The GIC v3 specifications allow redistributors and ITSes interconnect ports used to access memory to be wired up in a way that makes the respective initiators/memory observers non-coherent. Add the standard dma-noncoherent property to the GICv3 bindings to allow firmware to describe the redistributors/ITSes components and interconnect ports behaviour in system designs where the redistributors and ITSes are not coherent with the CPU. Reviewed-by: Rob Herring Signed-off-by: Lorenzo Pieralisi Cc: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231006125929.48591-2-lpieralisi@kernel.org --- .../devicetree/bindings/interrupt-controller/arm,gic-v3.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml index 2bc38479a41e..0f4a062c9d6f 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml @@ -106,6 +106,12 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 maximum: 4096 + dma-noncoherent: + description: + Present if the GIC redistributors permit programming shareability + and cacheability attributes but are connected to a non-coherent + downstream interconnect. + msi-controller: description: Only present if the Message Based Interrupt functionality is @@ -193,6 +199,12 @@ patternProperties: compatible: const: arm,gic-v3-its + dma-noncoherent: + description: + Present if the GIC ITS permits programming shareability and + cacheability attributes but is connected to a non-coherent + downstream interconnect. + msi-controller: true "#msi-cells": -- cgit From 9585a495ac936049dba141e8f9d99159ca06d46a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Oct 2023 14:59:27 +0200 Subject: irqchip/gic-v3-its: Split allocation from initialisation of its_node In order to pave the way for more fancy quirk handling without making more of a mess of this terrible driver, split the allocation of the ITS descriptor (its_node) from the actual probing. This will allow firmware-specific hooks to be added between these two points. Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231006125929.48591-4-lpieralisi@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 149 +++++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 60 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e0c2b10d154d..5e57b605b7c6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4952,7 +4952,7 @@ out_unmap: return NULL; } -static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) +static int its_init_domain(struct its_node *its) { struct irq_domain *inner_domain; struct msi_domain_info *info; @@ -4966,7 +4966,7 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) inner_domain = irq_domain_create_hierarchy(its_parent, its->msi_domain_flags, 0, - handle, &its_domain_ops, + its->fwnode_handle, &its_domain_ops, info); if (!inner_domain) { kfree(info); @@ -5017,8 +5017,7 @@ static int its_init_vpe_domain(void) return 0; } -static int __init its_compute_its_list_map(struct resource *res, - void __iomem *its_base) +static int __init its_compute_its_list_map(struct its_node *its) { int its_number; u32 ctlr; @@ -5032,15 +5031,15 @@ static int __init its_compute_its_list_map(struct resource *res, its_number = find_first_zero_bit(&its_list_map, GICv4_ITS_LIST_MAX); if (its_number >= GICv4_ITS_LIST_MAX) { pr_err("ITS@%pa: No ITSList entry available!\n", - &res->start); + &its->phys_base); return -EINVAL; } - ctlr = readl_relaxed(its_base + GITS_CTLR); + ctlr = readl_relaxed(its->base + GITS_CTLR); ctlr &= ~GITS_CTLR_ITS_NUMBER; ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT; - writel_relaxed(ctlr, its_base + GITS_CTLR); - ctlr = readl_relaxed(its_base + GITS_CTLR); + writel_relaxed(ctlr, its->base + GITS_CTLR); + ctlr = readl_relaxed(its->base + GITS_CTLR); if ((ctlr & GITS_CTLR_ITS_NUMBER) != (its_number << GITS_CTLR_ITS_NUMBER_SHIFT)) { its_number = ctlr & GITS_CTLR_ITS_NUMBER; its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT; @@ -5048,75 +5047,50 @@ static int __init its_compute_its_list_map(struct resource *res, if (test_and_set_bit(its_number, &its_list_map)) { pr_err("ITS@%pa: Duplicate ITSList entry %d\n", - &res->start, its_number); + &its->phys_base, its_number); return -EINVAL; } return its_number; } -static int __init its_probe_one(struct resource *res, - struct fwnode_handle *handle, int numa_node) +static int __init its_probe_one(struct its_node *its) { - struct its_node *its; - void __iomem *its_base; - u64 baser, tmp, typer; + u64 baser, tmp; struct page *page; u32 ctlr; int err; - its_base = its_map_one(res, &err); - if (!its_base) - return err; - - pr_info("ITS %pR\n", res); - - its = kzalloc(sizeof(*its), GFP_KERNEL); - if (!its) { - err = -ENOMEM; - goto out_unmap; - } - - raw_spin_lock_init(&its->lock); - mutex_init(&its->dev_alloc_lock); - INIT_LIST_HEAD(&its->entry); - INIT_LIST_HEAD(&its->its_device_list); - typer = gic_read_typer(its_base + GITS_TYPER); - its->typer = typer; - its->base = its_base; - its->phys_base = res->start; if (is_v4(its)) { - if (!(typer & GITS_TYPER_VMOVP)) { - err = its_compute_its_list_map(res, its_base); + if (!(its->typer & GITS_TYPER_VMOVP)) { + err = its_compute_its_list_map(its); if (err < 0) - goto out_free_its; + goto out; its->list_nr = err; pr_info("ITS@%pa: Using ITS number %d\n", - &res->start, err); + &its->phys_base, err); } else { - pr_info("ITS@%pa: Single VMOVP capable\n", &res->start); + pr_info("ITS@%pa: Single VMOVP capable\n", &its->phys_base); } if (is_v4_1(its)) { - u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer); + u32 svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer); - its->sgir_base = ioremap(res->start + SZ_128K, SZ_64K); + its->sgir_base = ioremap(its->phys_base + SZ_128K, SZ_64K); if (!its->sgir_base) { err = -ENOMEM; - goto out_free_its; + goto out; } - its->mpidr = readl_relaxed(its_base + GITS_MPIDR); + its->mpidr = readl_relaxed(its->base + GITS_MPIDR); pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n", - &res->start, its->mpidr, svpet); + &its->phys_base, its->mpidr, svpet); } } - its->numa_node = numa_node; - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, get_order(ITS_CMD_QUEUE_SZ)); if (!page) { @@ -5125,12 +5099,9 @@ static int __init its_probe_one(struct resource *res, } its->cmd_base = (void *)page_address(page); its->cmd_write = its->cmd_base; - its->fwnode_handle = handle; its->get_msi_base = its_irq_get_msi_base; its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI; - its_enable_quirks(its); - err = its_alloc_tables(its); if (err) goto out_free_cmd; @@ -5174,7 +5145,7 @@ static int __init its_probe_one(struct resource *res, ctlr |= GITS_CTLR_ImDe; writel_relaxed(ctlr, its->base + GITS_CTLR); - err = its_init_domain(handle, its); + err = its_init_domain(its); if (err) goto out_free_tables; @@ -5191,11 +5162,8 @@ out_free_cmd: out_unmap_sgir: if (its->sgir_base) iounmap(its->sgir_base); -out_free_its: - kfree(its); -out_unmap: - iounmap(its_base); - pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err); +out: + pr_err("ITS@%pa: failed probing (%d)\n", &its->phys_base, err); return err; } @@ -5356,10 +5324,53 @@ static const struct of_device_id its_device_id[] = { {}, }; +static struct its_node __init *its_node_init(struct resource *res, + struct fwnode_handle *handle, int numa_node) +{ + void __iomem *its_base; + struct its_node *its; + int err; + + its_base = its_map_one(res, &err); + if (!its_base) + return NULL; + + pr_info("ITS %pR\n", res); + + its = kzalloc(sizeof(*its), GFP_KERNEL); + if (!its) + goto out_unmap; + + raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); + INIT_LIST_HEAD(&its->entry); + INIT_LIST_HEAD(&its->its_device_list); + + its->typer = gic_read_typer(its_base + GITS_TYPER); + its->base = its_base; + its->phys_base = res->start; + + its->numa_node = numa_node; + its->fwnode_handle = handle; + + return its; + +out_unmap: + iounmap(its_base); + return NULL; +} + +static void its_node_destroy(struct its_node *its) +{ + iounmap(its->base); + kfree(its); +} + static int __init its_of_probe(struct device_node *node) { struct device_node *np; struct resource res; + int err; /* * Make sure *all* the ITS are reset before we probe any, as @@ -5369,8 +5380,6 @@ static int __init its_of_probe(struct device_node *node) */ for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { - int err; - if (!of_device_is_available(np) || !of_property_read_bool(np, "msi-controller") || of_address_to_resource(np, 0, &res)) @@ -5383,6 +5392,8 @@ static int __init its_of_probe(struct device_node *node) for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { + struct its_node *its; + if (!of_device_is_available(np)) continue; if (!of_property_read_bool(np, "msi-controller")) { @@ -5396,7 +5407,17 @@ static int __init its_of_probe(struct device_node *node) continue; } - its_probe_one(&res, &np->fwnode, of_node_to_nid(np)); + + its = its_node_init(&res, &np->fwnode, of_node_to_nid(np)); + if (!its) + return -ENOMEM; + + its_enable_quirks(its); + err = its_probe_one(its); + if (err) { + its_node_destroy(its); + return err; + } } return 0; } @@ -5508,6 +5529,7 @@ static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, { struct acpi_madt_generic_translator *its_entry; struct fwnode_handle *dom_handle; + struct its_node *its; struct resource res; int err; @@ -5532,11 +5554,18 @@ static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, goto dom_err; } - err = its_probe_one(&res, dom_handle, - acpi_get_its_numa_node(its_entry->translation_id)); + its = its_node_init(&res, dom_handle, + acpi_get_its_numa_node(its_entry->translation_id)); + if (!its) { + err = -ENOMEM; + goto node_err; + } + + err = its_probe_one(its); if (!err) return 0; +node_err: iort_deregister_domain_token(its_entry->translation_id); dom_err: irq_domain_free_fwnode(dom_handle); -- cgit From 3a0fff0fb6a3861fa05416f21858cf0c75cbf944 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 6 Oct 2023 14:59:26 +0200 Subject: irqchip/gic-v3: Enable non-coherent redistributors/ITSes DT probing The GIC architecture specification defines a set of registers for redistributors and ITSes that control the sharebility and cacheability attributes of redistributors/ITSes initiator ports on the interconnect (GICR_[V]PROPBASER, GICR_[V]PENDBASER, GITS_BASER). Architecturally the GIC provides a means to drive shareability and cacheability attributes signals and related IWB/OWB/ISH barriers but it is not mandatory for designs to wire up the corresponding interconnect signals that control the cacheability/shareability of transactions. Redistributors and ITSes interconnect ports can be connected to non-coherent interconnects that are not able to manage the shareability/cacheability attributes; this implicitly makes the redistributors and ITSes non-coherent observers. So far, the GIC driver on probe executes a write to "probe" for the redistributors and ITSes registers shareability bitfields by writing a value (ie InnerShareable - the shareability domain the CPUs are in) and check it back to detect whether the value sticks or not; this hinges on a GIC programming model behaviour that predates the current specifications, that just define shareability bits as writeable but do not guarantee that writing certain shareability values enable the expected behaviour for the redistributors/ITSes memory interconnect ports. To enable non-coherent GIC designs, introduce the "dma-noncoherent" device tree property to allow firmware to describe redistributors and ITSes as non-coherent observers on the memory interconnect and use the property to force the shareability attributes to be programmed into the redistributors and ITSes registers through the GIC quirks mechanism. Signed-off-by: Lorenzo Pieralisi Cc: Robin Murphy Cc: Mark Rutland Cc: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231006125929.48591-3-lpieralisi@kernel.org --- drivers/irqchip/irq-gic-common.h | 4 ++++ drivers/irqchip/irq-gic-v3-its.c | 21 +++++++++++++++++---- drivers/irqchip/irq-gic-v3.c | 13 +++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index 3db4592cda1c..f407cce9ecaa 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -29,4 +29,8 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); +#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) +#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) +#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) + #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5e57b605b7c6..75a2dd550625 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -44,10 +44,6 @@ #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) #define ITS_FLAGS_FORCE_NON_SHAREABLE (1ULL << 3) -#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) -#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) -#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) - #define RD_LOCAL_LPI_ENABLED BIT(0) #define RD_LOCAL_PENDTABLE_PREALLOCATED BIT(1) #define RD_LOCAL_MEMRESERVE_DONE BIT(2) @@ -4754,6 +4750,14 @@ static bool __maybe_unused its_enable_rk3588001(void *data) return true; } +static bool its_set_non_coherent(void *data) +{ + struct its_node *its = data; + + its->flags |= ITS_FLAGS_FORCE_NON_SHAREABLE; + return true; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -4808,6 +4812,11 @@ static const struct gic_quirk its_quirks[] = { .init = its_enable_rk3588001, }, #endif + { + .desc = "ITS: non-coherent attribute", + .property = "dma-noncoherent", + .init = its_set_non_coherent, + }, { } }; @@ -4817,6 +4826,10 @@ static void its_enable_quirks(struct its_node *its) u32 iidr = readl_relaxed(its->base + GITS_IIDR); gic_enable_quirks(iidr, its_quirks, its); + + if (is_of_node(its->fwnode_handle)) + gic_enable_of_quirks(to_of_node(its->fwnode_handle), + its_quirks, its); } static int its_save_disable(void) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..f59ac9586b7b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1857,6 +1857,14 @@ static bool gic_enable_quirk_arm64_2941627(void *data) return true; } +static bool rd_set_non_coherent(void *data) +{ + struct gic_chip_data *d = data; + + d->rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + return true; +} + static const struct gic_quirk gic_quirks[] = { { .desc = "GICv3: Qualcomm MSM8996 broken firmware", @@ -1923,6 +1931,11 @@ static const struct gic_quirk gic_quirks[] = { .mask = 0xff0f0fff, .init = gic_enable_quirk_arm64_2941627, }, + { + .desc = "GICv3: non-coherent attribute", + .property = "dma-noncoherent", + .init = rd_set_non_coherent, + }, { } }; -- cgit From e13cd66bd821be417c498a34928652db4ac6b436 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 3 Oct 2023 10:13:51 +0530 Subject: irqchip/riscv-intc: Mark all INTC nodes as initialized The RISC-V INTC local interrupts are per-HART (or per-CPU) so we create INTC IRQ domain only for the INTC node belonging to the boot HART. This means only the boot HART INTC node will be marked as initialized and other INTC nodes won't be marked which results downstream interrupt controllers (such as PLIC, IMSIC and APLIC direct-mode) not being probed due to missing device suppliers. To address this issue, we mark all INTC node for which we don't create IRQ domain as initialized. Reported-by: Dmitry Dunaev Signed-off-by: Anup Patel Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230926102801.1591126-1-dunaev@tecon.ru Link: https://lore.kernel.org/r/20231003044403.1974628-4-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-intc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index 4adeee1bc391..e8d01b14ccdd 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -155,8 +155,16 @@ static int __init riscv_intc_init(struct device_node *node, * for each INTC DT node. We only need to do INTC initialization * for the INTC DT node belonging to boot CPU (or boot HART). */ - if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) + if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) { + /* + * The INTC nodes of each CPU are suppliers for downstream + * interrupt controllers (such as PLIC, IMSIC and APLIC + * direct-mode) so we should mark an INTC node as initialized + * if we are not creating IRQ domain for it. + */ + fwnode_dev_initialized(of_fwnode_handle(node), true); return 0; + } return riscv_intc_init_common(of_node_to_fwnode(node)); } -- cgit From 8554cba1d6dbd3c74e0549e28ddbaccbb1d6b30a Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Tue, 3 Oct 2023 12:20:03 -0400 Subject: irqchip/stm32-exti: add missing DT IRQ flag translation The STM32F4/7 EXTI driver was missing the xlate callback, so IRQ trigger flags specified in the device tree were being ignored. This was preventing the RTC alarm interrupt from working, because it must be set to trigger on the rising edge to function correctly. Signed-off-by: Ben Wolsieffer Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231003162003.1649967-1-ben.wolsieffer@hefring.com --- drivers/irqchip/irq-stm32-exti.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index d8ba5fba7450..971240e2e31b 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -460,6 +460,7 @@ static const struct irq_domain_ops irq_exti_domain_ops = { .map = irq_map_generic_chip, .alloc = stm32_exti_alloc, .free = stm32_exti_free, + .xlate = irq_domain_xlate_twocell, }; static void stm32_irq_ack(struct irq_data *d) -- cgit From 8a4f44f3e9b05c38606b2ae02f933d6b64a340dd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 27 Sep 2023 14:57:32 +0200 Subject: irqchip/renesas-rzg2l: Convert to irq_data_get_irq_chip_data() Use the existing irq_data_get_irq_chip_data() helper instead of open-coding the same operation. Signed-off-by: Geert Uytterhoeven Reviewed-by: Lad Prabhakar Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/8e47cc6400e5a82c854c855948d2665a3a3197e3.1695819391.git.geert+renesas@glider.be --- drivers/irqchip/irq-renesas-rzg2l.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 2cee5477be6b..96f4e322ed6b 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -130,8 +130,8 @@ static void rzg2l_irqc_irq_enable(struct irq_data *d) unsigned int hw_irq = irqd_to_hwirq(d); if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) { + unsigned long tint = (uintptr_t)irq_data_get_irq_chip_data(d); struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); - unsigned long tint = (uintptr_t)d->chip_data; u32 offset = hw_irq - IRQC_TINT_START; u32 tssr_offset = TSSR_OFFSET(offset); u8 tssr_index = TSSR_INDEX(offset); -- cgit From c1097091b72255b2f9373260579133c5ce134dd1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Oct 2023 15:13:01 +0100 Subject: MAINTAINERS: Add myself as the ARM GIC maintainer The ARM GIC maintenance is currently covered by the blanket IRQCHIP DRIVERS entry, which I'm about to remove myself from. It is unlikely that anyone is mad enough to pick this up, so I'll keep doing that for the foreseable future. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20231002141302.3409485-2-maz@kernel.org --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..a8599e2fede5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1585,6 +1585,17 @@ F: arch/arm/include/asm/arch_timer.h F: arch/arm64/include/asm/arch_timer.h F: drivers/clocksource/arm_arch_timer.c +ARM GENERIC INTERRUPT CONTROLLER DRIVERS +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/interrupt-controller/arm,gic* +F: arch/arm/include/asm/arch_gicv3.h +F: arch/arm64/include/asm/arch_gicv3.h +F: drivers/irqchip/irq-gic*.[ch] +F: include/linux/irqchip/arm-gic*.h +F: include/linux/irqchip/arm-vgic-info.h + ARM HDLCD DRM DRIVER M: Liviu Dudau S: Supported -- cgit From b673fe1a6229a49be5394f4e539055d9ce685615 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Oct 2023 15:13:02 +0100 Subject: MAINTAINERS: Remove myself from the general IRQ subsystem maintenance It is pretty obvious that I haven't done much on the IRQ side for a while, and it is unlikely that I'll have more bandwidth for it any time soon. People keep sending me patches that I end-up reviewing in a cursory manner, which isn't great for anyone. So in everyone's interest, I'm removing myself from the list of maintainers and leave the irqchip and irqdomain subsystems in Thomas' capable hands. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231002141302.3409485-3-maz@kernel.org --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a8599e2fede5..f2bc21ae2562 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11075,7 +11075,7 @@ F: Documentation/devicetree/bindings/sound/irondevice,* F: sound/soc/codecs/sma* IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) -M: Marc Zyngier +M: Thomas Gleixner S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core F: Documentation/core-api/irq/irq-domain.rst @@ -11094,7 +11094,6 @@ F: lib/group_cpus.c IRQCHIP DRIVERS M: Thomas Gleixner -M: Marc Zyngier L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core -- cgit From d3b3c637e4eb8d3bbe53e5692aee66add72f9851 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 19 Sep 2023 15:26:35 +0200 Subject: parisc: Fix crash with nr_cpus=1 option John David Anglin reported that giving "nr_cpus=1" on the command line causes a crash, while "maxcpus=1" works. Reported-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.18+ --- arch/parisc/kernel/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 4098f9a0964b..2019c1f04bd0 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -440,7 +440,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (cpu_online(cpu)) return 0; - if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle)) + if (num_online_cpus() < nr_cpu_ids && + num_online_cpus() < setup_max_cpus && + smp_boot_one_cpu(cpu, tidle)) return -EIO; return cpu_online(cpu) ? 0 : -EIO; -- cgit From 914988e099fc658436fbd7b8f240160c352b6552 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 19 Sep 2023 17:51:40 +0000 Subject: parisc: Restore __ldcw_align for PA-RISC 2.0 processors Back in 2005, Kyle McMartin removed the 16-byte alignment for ldcw semaphores on PA 2.0 machines (CONFIG_PA20). This broke spinlocks on pre PA8800 processors. The main symptom was random faults in mmap'd memory (e.g., gcc compilations, etc). Unfortunately, the errata for this ldcw change is lost. The issue is the 16-byte alignment required for ldcw semaphore instructions can only be reduced to natural alignment when the ldcw operation can be handled coherently in cache. Only PA8800 and PA8900 processors actually support doing the operation in cache. Aligning the spinlock dynamically adds two integer instructions to each spinlock. Tested on rp3440, c8000 and a500. Signed-off-by: John David Anglin Link: https://lore.kernel.org/linux-parisc/6b332788-2227-127f-ba6d-55e99ecf4ed8@bell.net/T/#t Link: https://lore.kernel.org/linux-parisc/20050609050702.GB4641@roadwarrior.mcmartin.ca/ Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/include/asm/ldcw.h | 37 +++++++++++++++++--------------- arch/parisc/include/asm/spinlock_types.h | 5 ----- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 6d28b5514699..ee9e071859b2 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -2,39 +2,42 @@ #ifndef __PARISC_LDCW_H #define __PARISC_LDCW_H -#ifndef CONFIG_PA20 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, and GCC only guarantees 8-byte alignment for stack locals, we can't be assured of 16-byte alignment for atomic lock data even if we specify "__attribute ((aligned(16)))" in the type declaration. So, we use a struct containing an array of four ints for the atomic lock type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ + for the semaphore. */ + +/* From: "Jim Hull" + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is implemented, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). + + Although the cache control hint is accepted by all PA 2.0 processors, + it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still + require 16-byte alignment. If the address is unaligned, the operation + of the instruction is undefined. The ldcw instruction does not generate + unaligned data reference traps so misaligned accesses are not detected. + This hid the problem for years. So, restore the 16-byte alignment dropped + by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ #define __PA_LDCW_ALIGNMENT 16 -#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ & ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) -#define __LDCW "ldcw" -#else /*CONFIG_PA20*/ -/* From: "Jim Hull" - I've attached a summary of the change, but basically, for PA 2.0, as - long as the ",CO" (coherent operation) completer is specified, then the - 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead - they only require "natural" alignment (4-byte for ldcw, 8-byte for - ldcd). */ - -#define __PA_LDCW_ALIGNMENT 4 -#define __PA_LDCW_ALIGN_ORDER 2 -#define __ldcw_align(a) (&(a)->slock) +#ifdef CONFIG_PA20 #define __LDCW "ldcw,co" - -#endif /*!CONFIG_PA20*/ +#else +#define __LDCW "ldcw" +#endif /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. We don't explicitly expose that "*a" may be written as reload diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index efd06a897c6a..7b986b09dba8 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -9,15 +9,10 @@ #ifndef __ASSEMBLY__ typedef struct { -#ifdef CONFIG_PA20 - volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL } -#else volatile unsigned int lock[4]; # define __ARCH_SPIN_LOCK_UNLOCKED \ { { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \ __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } } -#endif } arch_spinlock_t; -- cgit From f990874b1c98fe8e57ee9385669f501822979258 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 7 Oct 2023 11:30:49 +0800 Subject: ieee802154: ca8210: Fix a potential UAF in ca8210_probe If of_clk_add_provider() fails in ca8210_register_ext_clock(), it calls clk_unregister() to release priv->clk and returns an error. However, the caller ca8210_probe() then calls ca8210_remove(), where priv->clk is freed again in ca8210_unregister_ext_clock(). In this case, a use-after-free may happen in the second time we call clk_unregister(). Fix this by removing the first clk_unregister(). Also, priv->clk could be an error code on failure of clk_register_fixed_rate(). Use IS_ERR_OR_NULL to catch this case in ca8210_unregister_ext_clock(). Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Dinghao Liu Message-ID: <20231007033049.22353-1-dinghao.liu@zju.edu.cn> Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index aebb19f1b3a4..4ec0dab38872 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2740,7 +2740,6 @@ static int ca8210_register_ext_clock(struct spi_device *spi) struct device_node *np = spi->dev.of_node; struct ca8210_priv *priv = spi_get_drvdata(spi); struct ca8210_platform_data *pdata = spi->dev.platform_data; - int ret = 0; if (!np) return -EFAULT; @@ -2757,18 +2756,8 @@ static int ca8210_register_ext_clock(struct spi_device *spi) dev_crit(&spi->dev, "Failed to register external clk\n"); return PTR_ERR(priv->clk); } - ret = of_clk_add_provider(np, of_clk_src_simple_get, priv->clk); - if (ret) { - clk_unregister(priv->clk); - dev_crit( - &spi->dev, - "Failed to register external clock as clock provider\n" - ); - } else { - dev_info(&spi->dev, "External clock set as clock provider\n"); - } - return ret; + return of_clk_add_provider(np, of_clk_src_simple_get, priv->clk); } /** @@ -2780,8 +2769,8 @@ static void ca8210_unregister_ext_clock(struct spi_device *spi) { struct ca8210_priv *priv = spi_get_drvdata(spi); - if (!priv->clk) - return + if (IS_ERR_OR_NULL(priv->clk)) + return; of_clk_del_provider(spi->dev.of_node); clk_unregister(priv->clk); -- cgit From b3fa3cf02e3ce92d32bfdeedd5a6bd0825f55a14 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sat, 7 Oct 2023 23:38:18 +0200 Subject: ASoC: ti: ams-delta: Fix cx81801_receive() argument types Since types of arguments accepted by tty_ldis_ops::receive_buf() have changed, the driver no longer builds. .../linux/sound/soc/ti/ams-delta.c:403:24: error: initialization of 'void (*)(struct tty_struct *, const u8 *, const u8 *, size_t)' {aka 'void (*)(struct tty_struct *, const unsigned char *, const unsigned char *, unsigned int)'} from incompatible pointer type 'void (*)(struct tty_struct *, const u8 *, const char *, int)' {aka 'void (*)(struct tty_struct *, const unsigned char *, const char *, int)'} [-Werror=incompatible-pointer-types] 403 | .receive_buf = cx81801_receive, Fix it. Fixes: e8161447bb0c ("tty: make tty_ldisc_ops::*buf*() hooks operate on size_t") Fixes: 892bc209f250 ("tty: use u8 for flags") Signed-off-by: Janusz Krzysztofik Link: https://lore.kernel.org/r/20231007213820.376360-1-jmkrzyszt@gmail.com Signed-off-by: Greg Kroah-Hartman --- sound/soc/ti/ams-delta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c index 371943350fdf..666057d50ea0 100644 --- a/sound/soc/ti/ams-delta.c +++ b/sound/soc/ti/ams-delta.c @@ -336,8 +336,8 @@ static void cx81801_hangup(struct tty_struct *tty) } /* Line discipline .receive_buf() */ -static void cx81801_receive(struct tty_struct *tty, const u8 *cp, - const char *fp, int count) +static void cx81801_receive(struct tty_struct *tty, const u8 *cp, const u8 *fp, + size_t count) { struct snd_soc_component *component = tty->disc_data; const unsigned char *c; -- cgit From 025d5ac978cc3b47874cc1c03ab096a78b49f278 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Oct 2023 16:51:32 -0700 Subject: x86/resctrl: Fix kernel-doc warnings The kernel test robot reported kernel-doc warnings here: monitor.c:34: warning: Cannot understand * @rmid_free_lru A least recently used list of free RMIDs on line 34 - I thought it was a doc line monitor.c:41: warning: Cannot understand * @rmid_limbo_count count of currently unused but (potentially) on line 41 - I thought it was a doc line monitor.c:50: warning: Cannot understand * @rmid_entry - The entry in the limbo and free lists. on line 50 - I thought it was a doc line We don't have a syntax for documenting individual data items via kernel-doc, so remove the "/**" kernel-doc markers and add a hyphen for consistency. Fixes: 6a445edce657 ("x86/intel_rdt/cqm: Add RDT monitoring initialization") Fixes: 24247aeeabe9 ("x86/intel_rdt/cqm: Improve limbo list processing") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231006235132.16227-1-rdunlap@infradead.org --- arch/x86/kernel/cpu/resctrl/monitor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ded1fc7cb7cb..f136ac046851 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,15 +30,15 @@ struct rmid_entry { struct list_head list; }; -/** - * @rmid_free_lru A least recently used list of free RMIDs +/* + * @rmid_free_lru - A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the * threshold occupancy */ static LIST_HEAD(rmid_free_lru); -/** - * @rmid_limbo_count count of currently unused but (potentially) +/* + * @rmid_limbo_count - count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that * may have a occupancy value > resctrl_rmid_realloc_threshold. User can @@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru); */ static unsigned int rmid_limbo_count; -/** +/* * @rmid_entry - The entry in the limbo and free lists. */ static struct rmid_entry *rmid_ptrs; -- cgit From e53899771a02f798d436655efbd9d4b46c0f9265 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Fri, 6 Oct 2023 11:57:26 -0700 Subject: perf/x86/lbr: Filter vsyscall addresses We found that a panic can occur when a vsyscall is made while LBR sampling is active. If the vsyscall is interrupted (NMI) for perf sampling, this call sequence can occur (most recent at top): __insn_get_emulate_prefix() insn_get_emulate_prefix() insn_get_prefixes() insn_get_opcode() decode_branch_type() get_branch_type() intel_pmu_lbr_filter() intel_pmu_handle_irq() perf_event_nmi_handler() Within __insn_get_emulate_prefix() at frame 0, a macro is called: peek_nbyte_next(insn_byte_t, insn, i) Within this macro, this dereference occurs: (insn)->next_byte Inspecting registers at this point, the value of the next_byte field is the address of the vsyscall made, for example the location of the vsyscall version of gettimeofday() at 0xffffffffff600000. The access to an address in the vsyscall region will trigger an oops due to an unhandled page fault. To fix the bug, filtering for vsyscalls can be done when determining the branch type. This patch will return a "none" branch if a kernel address if found to lie in the vsyscall region. Suggested-by: Alexei Starovoitov Signed-off-by: JP Kobryn Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org --- arch/x86/events/utils.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index 76b1f8bb0fd5..dab4ed199227 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "perf_event.h" @@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort, * The LBR logs any address in the IP, even if the IP just * faulted. This means userspace can control the from address. * Ensure we don't blindly read any address by validating it is - * a known text address. + * a known text address and not a vsyscall address. */ - if (kernel_text_address(from)) { + if (kernel_text_address(from) && !in_gate_area_no_mm(from)) { addr = (void *)from; /* * Assume we can get the maximum possible size -- cgit From 1e0b72a2a6432c0ef67ee5ce8d9172a7c20bba25 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2023 17:00:12 +0300 Subject: mlxsw: fix mlxsw_sp2_nve_vxlan_learning_set() return type The mlxsw_sp2_nve_vxlan_learning_set() function is supposed to return zero on success or negative error codes. So it needs to be type int instead of bool. Fixes: 4ee70efab68d ("mlxsw: spectrum_nve: Add support for VXLAN on Spectrum-2") Signed-off-by: Dan Carpenter Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index bb8eeb86edf7..52c2fe3644d4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -310,8 +310,8 @@ const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, }; -static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, - bool learning_en) +static int mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, + bool learning_en) { char tnpc_pl[MLXSW_REG_TNPC_LEN]; -- cgit From 66cf7435a26917c0c4d6245ad9137e7606e84fdf Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 5 Oct 2023 16:08:31 +0200 Subject: xen-netback: use default TX queue size for vifs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not set netback interfaces (vifs) default TX queue size to the ring size. The TX queue size is not related to the ring size, and using the ring size (32) as the queue size can lead to packet drops. Note the TX side of the vif interface in the netback domain is the one receiving packets to be injected to the guest. Do not explicitly set the TX queue length to any value when creating the interface, and instead use the system default. Note that the queue length can also be adjusted at runtime. Fixes: f942dc2552b8 ('xen network backend driver') Signed-off-by: Roger Pau Monné Reviewed-by: Ross Lagerwall Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f3f2c07423a6..fc3bb63b9ac3 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -41,8 +41,6 @@ #include #include -#define XENVIF_QUEUE_LENGTH 32 - /* Number of bytes allowed on the internal guest Rx queue. */ #define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) @@ -530,8 +528,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->features = dev->hw_features | NETIF_F_RXCSUM; dev->ethtool_ops = &xenvif_ethtool_ops; - dev->tx_queue_len = XENVIF_QUEUE_LENGTH; - dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_MAX_MTU - VLAN_ETH_HLEN; -- cgit From 776fe19953b0e0af00399e50fb3b205101d4b3c1 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Thu, 5 Oct 2023 09:33:30 -0700 Subject: ice: block default rule setting on LAG interface When one of the LAG interfaces is in switchdev mode, setting default rule can't be done. The interface on which switchdev is running has ice_set_rx_mode() blocked to avoid default rule adding (and other rules). The other interfaces (without switchdev running but connected via bond with interface that runs switchdev) can't follow the same scheme, because rx filtering needs to be disabled when failover happens. Notification for bridge to set promisc mode seems like good place to do that. Fixes: bb52f42acef6 ("ice: Add driver support for firmware changes for LAG") Signed-off-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Przemek Kitszel Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_lag.c | 32 ++++++++++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_lag.h | 1 + drivers/net/ethernet/intel/ice/ice_lib.c | 6 ++++++ 3 files changed, 39 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 4f39863b5537..7b1256992dcf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2093,3 +2093,35 @@ lag_rebuild_out: } mutex_unlock(&pf->lag_mutex); } + +/** + * ice_lag_is_switchdev_running + * @pf: pointer to PF structure + * + * Check if switchdev is running on any of the interfaces connected to lag. + */ +bool ice_lag_is_switchdev_running(struct ice_pf *pf) +{ + struct ice_lag *lag = pf->lag; + struct net_device *tmp_nd; + + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || !lag) + return false; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { + struct ice_netdev_priv *priv = netdev_priv(tmp_nd); + + if (!netif_is_ice(tmp_nd) || !priv || !priv->vsi || + !priv->vsi->back) + continue; + + if (ice_is_switchdev_running(priv->vsi->back)) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 18075b82485a..facb6c894b6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -62,4 +62,5 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf); int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); +bool ice_lag_is_switchdev_running(struct ice_pf *pf); #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 201570cd2e0b..7bf9b7069754 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3575,6 +3575,12 @@ int ice_set_dflt_vsi(struct ice_vsi *vsi) dev = ice_pf_to_dev(vsi->back); + if (ice_lag_is_switchdev_running(vsi->back)) { + dev_dbg(dev, "VSI %d passed is a part of LAG containing interfaces in switchdev mode, nothing to do\n", + vsi->vsi_num); + return 0; + } + /* the VSI passed in is already the default VSI */ if (ice_is_vsi_dflt_vsi(vsi)) { dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n", -- cgit From 94f6f0550c625fab1f373bb86a6669b45e9748b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Oct 2023 13:49:43 -0700 Subject: Linux 6.6-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 373649c7374e..88ebf6547964 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 0618c077a8c20e8c81e367988f70f7e32bb5a717 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Thu, 5 Oct 2023 22:28:35 +0800 Subject: dmaengine: ste_dma40: Fix PM disable depth imbalance in d40_probe The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by calling pm_runtime_disable when error returns. Signed-off-by: Zhang Shurong Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/tencent_DD2D371DB5925B4B602B1E1D0A5FA88F1208@qq.com Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 89e82508c133..002833fb1fa0 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -3668,6 +3668,7 @@ static int __init d40_probe(struct platform_device *pdev) regulator_disable(base->lcpa_regulator); regulator_put(base->lcpa_regulator); } + pm_runtime_disable(base->dev); report_failure: d40_err(dev, "probe failed\n"); -- cgit From 81337b9a72dc58a5fa0ae8a042e8cb59f9bdec4a Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:28 +0200 Subject: dmaengine: stm32-mdma: abort resume if no ongoing transfer chan->desc can be null, if transfer is terminated when resume is called, leading to a NULL pointer when retrieving the hwdesc. To avoid this case, check that chan->desc is not null and channel is disabled (transfer previously paused or terminated). Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 0de234022c6d..cc6f4b00091f 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1236,6 +1236,10 @@ static int stm32_mdma_resume(struct dma_chan *c) unsigned long flags; u32 status, reg; + /* Transfer can be terminated */ + if (!chan->desc || (stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & STM32_MDMA_CCR_EN)) + return -EPERM; + hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc; spin_lock_irqsave(&chan->vchan.lock, flags); -- cgit From a4b306eb83579c07b63dc65cd5bae53b7b4019d0 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:29 +0200 Subject: dmaengine: stm32-mdma: use Link Address Register to compute residue Current implementation relies on curr_hwdesc index. But to keep this index up to date, Block Transfer interrupt (BTIE) has to be enabled. If it is not, curr_hwdesc is not updated, and then residue is not reliable. Rely on Link Address Register instead. And disable BTIE interrupt in stm32_mdma_setup_xfer() because it is no more needed in case of _prep_slave_sg() to maintain curr_hwdesc up to date. It avoids extra interrupts and also ensures a reliable residue. These improvements are required for STM32 DCMI camera capture use case, which need STM32 DMA and MDMA chaining for good performance. Fixes: 696874322771 ("dmaengine: stm32-mdma: add support to be triggered by STM32 DMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index cc6f4b00091f..da73e13b8c9d 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -777,8 +777,6 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, /* Enable interrupts */ ccr &= ~STM32_MDMA_CCR_IRQ_MASK; ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE; - if (sg_len > 1) - ccr |= STM32_MDMA_CCR_BTIE; desc->ccr = ccr; return 0; @@ -1324,12 +1322,21 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct stm32_mdma_hwdesc *hwdesc; - u32 cbndtr, residue, modulo, burst_size; + u32 cisr, clar, cbndtr, residue, modulo, burst_size; int i; + cisr = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); + residue = 0; - for (i = curr_hwdesc + 1; i < desc->count; i++) { + /* Get the next hw descriptor to process from current transfer */ + clar = stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)); + for (i = desc->count - 1; i >= 0; i--) { hwdesc = desc->node[i].hwdesc; + + if (hwdesc->clar == clar) + break;/* Current transfer found, stop cumulating */ + + /* Cumulate residue of unprocessed hw descriptors */ residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr); } cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); -- cgit From 584970421725b7805db84714b857851fdf7203a9 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:30 +0200 Subject: dmaengine: stm32-mdma: set in_flight_bytes in case CRQA flag is set CRQA flag is set by hardware when the channel request become active and the channel is enabled. It is cleared by hardware, when the channel request is completed. So when it is set, it means MDMA is transferring bytes. This information is useful in case of STM32 DMA and MDMA chaining, especially when the user pauses DMA before stopping it, to trig one last MDMA transfer to get the latest bytes of the SRAM buffer to the destination buffer. STM32 DCMI driver can then use this to know if the last MDMA transfer in case of chaining is done. Fixes: 696874322771 ("dmaengine: stm32-mdma: add support to be triggered by STM32 DMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-3-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index da73e13b8c9d..bae08b3f55c7 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1318,7 +1318,8 @@ static int stm32_mdma_slave_config(struct dma_chan *c, static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, struct stm32_mdma_desc *desc, - u32 curr_hwdesc) + u32 curr_hwdesc, + struct dma_tx_state *state) { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct stm32_mdma_hwdesc *hwdesc; @@ -1342,6 +1343,10 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + state->in_flight_bytes = 0; + if (chan->chan_config.m2m_hw && (cisr & STM32_MDMA_CISR_CRQA)) + state->in_flight_bytes = cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + if (!chan->mem_burst) return residue; @@ -1371,11 +1376,10 @@ static enum dma_status stm32_mdma_tx_status(struct dma_chan *c, vdesc = vchan_find_desc(&chan->vchan, cookie); if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) - residue = stm32_mdma_desc_residue(chan, chan->desc, - chan->curr_hwdesc); + residue = stm32_mdma_desc_residue(chan, chan->desc, chan->curr_hwdesc, state); else if (vdesc) - residue = stm32_mdma_desc_residue(chan, - to_stm32_mdma_desc(vdesc), 0); + residue = stm32_mdma_desc_residue(chan, to_stm32_mdma_desc(vdesc), 0, state); + dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); -- cgit From 2df467e908ce463cff1431ca1b00f650f7a514b4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 17:50:23 +0200 Subject: dmaengine: stm32-dma: fix stm32_dma_prep_slave_sg in case of MDMA chaining Current Target (CT) have to be reset when starting an MDMA chaining use case, as Double Buffer mode is activated. It ensures the DMA will start processing the first memory target (pointed with SxM0AR). Fixes: 723795173ce1 ("dmaengine: stm32-dma: add support to trigger STM32 MDMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004155024.2609531-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 5c36811aa134..7427acc82259 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1113,8 +1113,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; /* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */ - if (chan->trig_mdma && sg_len > 1) + if (chan->trig_mdma && sg_len > 1) { chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM; + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT; + } for_each_sg(sgl, sg, sg_len, i) { ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, -- cgit From 67e13e89742c3b21ce177f612bf9ef32caae6047 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 17:50:24 +0200 Subject: dmaengine: stm32-dma: fix residue in case of MDMA chaining In case of MDMA chaining, DMA is configured in Double-Buffer Mode (DBM) with two periods, but if transfer has been prepared with _prep_slave_sg(), the transfer is not marked cyclic (=!chan->desc->cyclic). However, as DBM is activated for MDMA chaining, residue computation must take into account cyclic constraints. With only two periods in MDMA chaining, and no update due to Transfer Complete interrupt masked, n_sg is always 0. If DMA current memory address (depending on SxCR.CT and SxM0AR/SxM1AR) does not correspond, it means n_sg should be increased. Then, the residue of the current period is the one read from SxNDTR and should not be overwritten with the full period length. Fixes: 723795173ce1 ("dmaengine: stm32-dma: add support to trigger STM32 MDMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004155024.2609531-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 7427acc82259..0b30151fb45c 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1389,11 +1389,12 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, residue = stm32_dma_get_remaining_bytes(chan); - if (chan->desc->cyclic && !stm32_dma_is_current_sg(chan)) { + if ((chan->desc->cyclic || chan->trig_mdma) && !stm32_dma_is_current_sg(chan)) { n_sg++; if (n_sg == chan->desc->num_sgs) n_sg = 0; - residue = sg_req->len; + if (!chan->trig_mdma) + residue = sg_req->len; } /* @@ -1403,7 +1404,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, * residue = remaining bytes from NDTR + remaining * periods/sg to be transferred */ - if (!chan->desc->cyclic || n_sg != 0) + if ((!chan->desc->cyclic && !chan->trig_mdma) || n_sg != 0) for (i = n_sg; i < desc->num_sgs; i++) residue += desc->sg_req[i].len; -- cgit From 3fa53518ad419bfacceae046a9d8027e4c4c5290 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 4 Oct 2023 10:29:11 -0400 Subject: dmaengine: fsl-edma: fix all channels requested when call fsl_edma3_xlate() dma_get_slave_channel() increases client_count for all channels. It should only be called when a matched channel is found in fsl_edma3_xlate(). Move dma_get_slave_channel() after checking for a matched channel. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231004142911.838916-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 621a460fae0c..8c4ed7012e23 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -154,18 +154,20 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, fsl_chan = to_fsl_edma_chan(chan); i = fsl_chan - fsl_edma->chans; - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; fsl_chan->priority = dma_spec->args[1]; fsl_chan->is_rxchan = dma_spec->args[2] & ARGS_RX; fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE; fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO; if (!b_chmux && i == dma_spec->args[0]) { + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; mutex_unlock(&fsl_edma->fsl_edma_mutex); return chan; } else if (b_chmux && !fsl_chan->srcid) { /* if controller support channel mux, choose a free channel */ + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; fsl_chan->srcid = dma_spec->args[0]; mutex_unlock(&fsl_edma->fsl_edma_mutex); return chan; -- cgit From 4a63e68a295187ae3c1cb3fa0c583c96a959714f Mon Sep 17 00:00:00 2001 From: Christos Skevis Date: Fri, 6 Oct 2023 17:53:30 +0200 Subject: ALSA: usb-audio: Fix microphone sound on Nexigo webcam. I own an external usb Webcam, model NexiGo N930AF, which had low mic volume and inconsistent sound quality. Video works as expected. (snip) [ +0.047857] usb 5-1: new high-speed USB device number 2 using xhci_hcd [ +0.003406] usb 5-1: New USB device found, idVendor=1bcf, idProduct=2283, bcdDevice=12.17 [ +0.000007] usb 5-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ +0.000004] usb 5-1: Product: NexiGo N930AF FHD Webcam [ +0.000003] usb 5-1: Manufacturer: SHENZHEN AONI ELECTRONIC CO., LTD [ +0.000004] usb 5-1: SerialNumber: 20201217011 [ +0.003900] usb 5-1: Found UVC 1.00 device NexiGo N930AF FHD Webcam (1bcf:2283) [ +0.025726] usb 5-1: 3:1: cannot get usb sound sample rate freq at ep 0x86 [ +0.071482] usb 5-1: 3:2: cannot get usb sound sample rate freq at ep 0x86 [ +0.004679] usb 5-1: 3:3: cannot get usb sound sample rate freq at ep 0x86 [ +0.051607] usb 5-1: Warning! Unlikely big volume range (=4096), cval->res is probably wrong. [ +0.000005] usb 5-1: [7] FU [Mic Capture Volume] ch = 1, val = 0/4096/1 Set up quirk cval->res to 16 for 256 levels, Set GET_SAMPLE_RATE quirk flag to stop trying to get the sample rate. Confirmed that happened anyway later due to the backoff mechanism, after 3 failures All audio stream on device interfaces share the same values, apart from wMaxPacketSize and tSamFreq : (snip) Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 3 bNumEndpoints 1 bInterfaceClass 1 Audio bInterfaceSubClass 2 Streaming bInterfaceProtocol 0 iInterface 0 AudioStreaming Interface Descriptor: bLength 7 bDescriptorType 36 bDescriptorSubtype 1 (AS_GENERAL) bTerminalLink 8 bDelay 1 frames wFormatTag 0x0001 PCM AudioStreaming Interface Descriptor: bLength 11 bDescriptorType 36 bDescriptorSubtype 2 (FORMAT_TYPE) bFormatType 1 (FORMAT_TYPE_I) bNrChannels 1 bSubframeSize 2 bBitResolution 16 bSamFreqType 1 Discrete tSamFreq[ 0] 44100 Endpoint Descriptor: bLength 9 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 5 Transfer Type Isochronous Synch Type Asynchronous Usage Type Data wMaxPacketSize 0x005c 1x 92 bytes bInterval 4 bRefresh 0 bSynchAddress 0 AudioStreaming Endpoint Descriptor: bLength 7 bDescriptorType 37 bDescriptorSubtype 1 (EP_GENERAL) bmAttributes 0x01 Sampling Frequency bLockDelayUnits 0 Undefined wLockDelay 0x0000 (snip) Based on the usb data about manufacturer, SPCA2281B3 is the most likely controller IC Manufacturer does not provide link for datasheet nor detailed specs. No way to confirm if the firmware supports any other way of getting the sample rate. Testing patch provides consistent good sound recording quality and volume range. (snip) [ +0.045764] usb 5-1: new high-speed USB device number 2 using xhci_hcd [ +0.106290] usb 5-1: New USB device found, idVendor=1bcf, idProduct=2283, bcdDevice=12.17 [ +0.000006] usb 5-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ +0.000004] usb 5-1: Product: NexiGo N930AF FHD Webcam [ +0.000003] usb 5-1: Manufacturer: SHENZHEN AONI ELECTRONIC CO., LTD [ +0.000004] usb 5-1: SerialNumber: 20201217011 [ +0.043700] usb 5-1: set resolution quirk: cval->res = 16 [ +0.002585] usb 5-1: Found UVC 1.00 device NexiGo N930AF FHD Webcam (1bcf:2283) Signed-off-by: Christos Skevis Link: https://lore.kernel.org/r/20231006155330.399393-1-xristos.thes@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 7 +++++++ sound/usb/quirks.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 985b1aea9cdc..409fc1164694 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1204,6 +1204,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 16; } break; + case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */ + if (!strcmp(kctl->id.name, "Mic Capture Volume")) { + usb_audio_info(chip, + "set resolution quirk: cval->res = 16\n"); + cval->res = 16; + } + break; } } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d4bbef70d2f7..4e64842245e1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2177,6 +2177,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- cgit From 87797fad6cce28ec9be3c13f031776ff4f104cfc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 28 Aug 2023 08:09:47 +0200 Subject: xen/events: replace evtchn_rwlock with RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In unprivileged Xen guests event handling can cause a deadlock with Xen console handling. The evtchn_rwlock and the hvc_lock are taken in opposite sequence in __hvc_poll() and in Xen console IRQ handling. Normally this is no problem, as the evtchn_rwlock is taken as a reader in both paths, but as soon as an event channel is being closed, the lock will be taken as a writer, which will cause read_lock() to block: CPU0 CPU1 CPU2 (IRQ handling) (__hvc_poll()) (closing event channel) read_lock(evtchn_rwlock) spin_lock(hvc_lock) write_lock(evtchn_rwlock) [blocks] spin_lock(hvc_lock) [blocks] read_lock(evtchn_rwlock) [blocks due to writer waiting, and not in_interrupt()] This issue can be avoided by replacing evtchn_rwlock with RCU in xen_free_irq(). Note that RCU is used only to delay freeing of the irq_info memory. There is no RCU based dereferencing or replacement of pointers involved. In order to avoid potential races between removing the irq_info reference and handling of interrupts, set the irq_info pointer to NULL only when freeing its memory. The IRQ itself must be freed at that time, too, as otherwise the same IRQ number could be allocated again before handling of the old instance would have been finished. This is XSA-441 / CVE-2023-34324. Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 87 +++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 0bb86e6c4d0a..1b2136fe0fa5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; + struct rcu_work rwork; short refcnt; u8 spurious_cnt; u8 is_accounted; @@ -146,23 +148,13 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); -/* - * Lock protecting event handling loop against removing event channels. - * Adding of event channels is no issue as the associated IRQ becomes active - * only after everything is setup (before request_[threaded_]irq() the handler - * can't be entered for an event, as the event channel will be unmasked only - * then). - */ -static DEFINE_RWLOCK(evtchn_rwlock); - /* * Lock hierarchy: * * irq_mapping_update_lock - * evtchn_rwlock - * IRQ-desc lock - * percpu eoi_list_lock - * irq_info->lock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void delayed_free_irq(struct work_struct *work) +{ + struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, + rwork); + unsigned int irq = info->irq; + + /* Remove the info pointer only now, with no potential users left. */ + set_info_for_irq(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); while (true) { - spin_lock(&eoi->eoi_list_lock); + spin_lock_irqsave(&eoi->eoi_list_lock, flags); info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, eoi_list); - if (info == NULL || now < info->eoi_time) { - spin_unlock(&eoi->eoi_list_lock); + if (info == NULL) + break; + + if (now < info->eoi_time) { + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, + info->eoi_time - now); break; } list_del_init(&info->eoi_list); - spin_unlock(&eoi->eoi_list_lock); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); info->eoi_time = 0; xen_irq_lateeoi_locked(info, false); } - if (info) - mod_delayed_work_on(info->eoi_cpu, system_wq, - &eoi->delayed, info->eoi_time - now); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } static void xen_cpu_init_eoi(unsigned int cpu) @@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; - unsigned long flags; - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); info = info_for_irq(irq); if (info) xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); @@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); set_info_for_irq(irq, info); /* @@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); - unsigned long flags; if (WARN_ON(!info)) return; - write_lock_irqsave(&evtchn_rwlock, flags); - if (!list_empty(&info->eoi_list)) lateeoi_list_del(info); list_del(&info->list); - set_info_for_irq(irq, NULL); - WARN_ON(info->refcnt > 0); - write_unlock_irqrestore(&evtchn_rwlock, flags); - - kfree(info); - - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < nr_legacy_irqs()) - return; - - irq_free_desc(irq); + queue_rcu_work(system_wq, &info->rwork); } /* Not called for lateeoi events. */ @@ -1711,7 +1709,14 @@ int xen_evtchn_do_upcall(void) int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 }; - read_lock(&evtchn_rwlock); + /* + * When closing an event channel the associated IRQ must not be freed + * until all cpus have left the event handling loop. This is ensured + * by taking the rcu_read_lock() while handling events, as freeing of + * the IRQ is handled via queue_rcu_work() _after_ closing the event + * channel. + */ + rcu_read_lock(); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1724,7 +1729,7 @@ int xen_evtchn_do_upcall(void) } while (vcpu_info->evtchn_upcall_pending); - read_unlock(&evtchn_rwlock); + rcu_read_unlock(); /* * Increment irq_epoch only now to defer EOIs only for -- cgit From 8dafa9d0eb1a1550a0f4d462db9354161bc51e0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Oct 2023 21:24:45 +0200 Subject: sched/eevdf: Fix min_deadline heap integrity Marek and Biju reported instances of: "EEVDF scheduling fail, picking leftmost" which Mike correlated with cgroup scheduling and the min_deadline heap getting corrupted; some trace output confirms: > And yeah, min_deadline is hosed somehow: > > validate_cfs_rq: --- / > __print_se: ffff88845cf48080 w: 1024 ve: -58857638 lag: 870381 vd: -55861854 vmd: -66302085 E (11372/tr) > __print_se: ffff88810d165800 w: 25 ve: -80323686 lag: 22336429 vd: -41496434 vmd: -66302085 E (-1//autogroup-31) > __print_se: ffff888108379000 w: 25 ve: 0 lag: -57987257 vd: 114632828 vmd: 114632828 N (-1//autogroup-33) > validate_cfs_rq: min_deadline: -55861854 avg_vruntime: -62278313462 / 1074 = -57987256 Turns out that reweight_entity(), which tries really hard to be fast, does not do the normal dequeue+update+enqueue pattern but *does* scale the deadline. However, it then fails to propagate the updated deadline value up the heap. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Reported-by: Marek Szyprowski Reported-by: Biju Das Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Biju Das Tested-by: Mike Galbraith Link: https://lkml.kernel.org/r/20231006192445.GE743@noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ef7490c4b8b4..a4b904a010c6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3613,6 +3613,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, */ deadline = div_s64(deadline * old_weight, weight); se->deadline = se->vruntime + deadline; + min_deadline_cb_propagate(&se->run_node, NULL); } #ifdef CONFIG_SMP -- cgit From b01db23d5923a35023540edc4f0c5f019e11ac7d Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Fri, 29 Sep 2023 17:09:30 -0700 Subject: sched/eevdf: Fix pick_eevdf() The old pick_eevdf() could fail to find the actual earliest eligible deadline when it descended to the right looking for min_deadline, but it turned out that that min_deadline wasn't actually eligible. In that case we need to go back and search through any left branches we skipped looking for the actual best _eligible_ min_deadline. This is more expensive, but still O(log n), and at worst should only involve descending two branches of the rbtree. I've run this through a userspace stress test (thank you tools/lib/rbtree.c), so hopefully this implementation doesn't miss any corner cases. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Signed-off-by: Ben Segall Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/xm261qego72d.fsf_-_@google.com --- kernel/sched/fair.c | 72 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a4b904a010c6..061a30a8925a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -872,14 +872,16 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) * * Which allows an EDF like search on (sub)trees. */ -static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq) { struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node; struct sched_entity *curr = cfs_rq->curr; struct sched_entity *best = NULL; + struct sched_entity *best_left = NULL; if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) curr = NULL; + best = curr; /* * Once selected, run a task until it either becomes non-eligible or @@ -900,33 +902,75 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } /* - * If this entity has an earlier deadline than the previous - * best, take this one. If it also has the earliest deadline - * of its subtree, we're done. + * Now we heap search eligible trees for the best (min_)deadline */ - if (!best || deadline_gt(deadline, best, se)) { + if (!best || deadline_gt(deadline, best, se)) best = se; - if (best->deadline == best->min_deadline) - break; - } /* - * If the earlest deadline in this subtree is in the fully - * eligible left half of our space, go there. + * Every se in a left branch is eligible, keep track of the + * branch with the best min_deadline */ + if (node->rb_left) { + struct sched_entity *left = __node_2_se(node->rb_left); + + if (!best_left || deadline_gt(min_deadline, best_left, left)) + best_left = left; + + /* + * min_deadline is in the left branch. rb_left and all + * descendants are eligible, so immediately switch to the second + * loop. + */ + if (left->min_deadline == se->min_deadline) + break; + } + + /* min_deadline is at this node, no need to look right */ + if (se->deadline == se->min_deadline) + break; + + /* else min_deadline is in the right branch. */ + node = node->rb_right; + } + + /* + * We ran into an eligible node which is itself the best. + * (Or nr_running == 0 and both are NULL) + */ + if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0) + return best; + + /* + * Now best_left and all of its children are eligible, and we are just + * looking for deadline == min_deadline + */ + node = &best_left->run_node; + while (node) { + struct sched_entity *se = __node_2_se(node); + + /* min_deadline is the current node */ + if (se->deadline == se->min_deadline) + return se; + + /* min_deadline is in the left branch */ if (node->rb_left && __node_2_se(node->rb_left)->min_deadline == se->min_deadline) { node = node->rb_left; continue; } + /* else min_deadline is in the right branch */ node = node->rb_right; } + return NULL; +} - if (!best || (curr && deadline_gt(deadline, best, curr))) - best = curr; +static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +{ + struct sched_entity *se = __pick_eevdf(cfs_rq); - if (unlikely(!best)) { + if (!se) { struct sched_entity *left = __pick_first_entity(cfs_rq); if (left) { pr_err("EEVDF scheduling fail, picking leftmost\n"); @@ -934,7 +978,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } } - return best; + return se; } #ifdef CONFIG_SCHED_DEBUG -- cgit From 054c22bd784d6953ac85f545bed4a2a27b0e4ddb Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 6 Oct 2023 10:21:50 +0200 Subject: printk: flush consoles before checking progress Commit 9e70a5e109a4 ("printk: Add per-console suspended state") removed console lock usage during resume and replaced it with the clearly defined console_list_lock and srcu mechanisms. However, the console lock usage had an important side-effect of flushing the consoles. After its removal, consoles were no longer flushed before checking their progress. Add the console_lock/console_unlock dance to the beginning of __pr_flush() to actually flush the consoles before checking their progress. Also add comments to clarify this additional usage of the console lock. Note that console_unlock() does not guarantee flushing all messages since the commit dbdda842fe96f89 ("printk: Add console owner and waiter logic to load balance console writes"). Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217955 Fixes: 9e70a5e109a4 ("printk: Add per-console suspended state") Co-developed-by: Petr Mladek Signed-off-by: Petr Mladek Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20231006082151.6969-2-pmladek@suse.com --- kernel/printk/printk.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8787d3a72114..1919899b6897 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3738,12 +3738,18 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre seq = prb_next_seq(prb); + /* Flush the consoles so that records up to @seq are printed. */ + console_lock(); + console_unlock(); + for (;;) { diff = 0; /* * Hold the console_lock to guarantee safe access to - * console->seq. + * console->seq. Releasing console_lock flushes more + * records in case @seq is still not printed on all + * usable consoles. */ console_lock(); -- cgit From 15c0a870dc44ed14e01efbdd319d232234ee639f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 6 Sep 2023 14:22:07 +0800 Subject: ceph: fix incorrect revoked caps assert in ceph_fill_file_size() When truncating the inode the MDS will acquire the xlock for the ifile Locker, which will revoke the 'Frwsxl' caps from the clients. But when the client just releases and flushes the 'Fw' caps to MDS, for exmaple, and once the MDS receives the caps flushing msg it just thought the revocation has finished. Then the MDS will continue truncating the inode and then issued the truncate notification to all the clients. While just before the clients receives the cap flushing ack they receive the truncation notification, the clients will detecte that the 'issued | dirty' is still holding the 'Fw' caps. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/56693 Fixes: b0d7c2231015 ("ceph: introduce i_truncate_mutex") Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 800ab7920513..b79100f720b3 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -769,9 +769,7 @@ int ceph_fill_file_size(struct inode *inode, int issued, ci->i_truncate_seq = truncate_seq; /* the MDS should have revoked these caps */ - WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL | - CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR | + WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_LAZYIO)); /* * If we hold relevant caps, or in the case where we're -- cgit From 42b71826fe5d01f3e6cdddc91f81d0e4afb91801 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 29 Sep 2023 10:12:06 +0100 Subject: ceph: remove unnecessary IS_ERR() check in ceph_fname_to_usr() Before returning, function ceph_fname_to_usr() does a final IS_ERR() check in 'dir': if ((dir != fname->dir) && !IS_ERR(dir)) {...} This check is unnecessary because, if the 'dir' variable has changed to something other than 'fname->dir' (it's initial value), that error check has been performed already and, if there was indeed an error, it would have been returned immediately. Besides, this useless IS_ERR() is also confusing static analysis tools. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309282202.xZxGdvS3-lkp@intel.com/ Signed-off-by: Luis Henriques Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index e1f31b86fd48..5b5112c78462 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -460,7 +460,7 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, out: fscrypt_fname_free_buffer(&_tname); out_inode: - if ((dir != fname->dir) && !IS_ERR(dir)) { + if (dir != fname->dir) { if ((dir->i_state & I_NEW)) discard_new_inode(dir); else -- cgit From 7563cf17dce0a875ba3d872acdc63a78ea344019 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Wed, 4 Oct 2023 18:38:27 -0500 Subject: libceph: use kernel_connect() Direct calls to ops->connect() can overwrite the address parameter when used in conjunction with BPF SOCK_ADDR hooks. Recent changes to kernel_connect() ensure that callers are insulated from such side effects. This patch wraps the direct call to ops->connect() with kernel_connect() to prevent unexpected changes to the address passed to ceph_tcp_connect(). This change was originally part of a larger patch targeting the net tree addressing all instances of unprotected calls to ops->connect() throughout the kernel, but this change was split up into several patches targeting various trees. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/netdev/20230821100007.559638-1-jrife@google.com/ Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Signed-off-by: Jordan Rife Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 10a41cd9c523..3c8b78d9c4d1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -459,8 +459,8 @@ int ceph_tcp_connect(struct ceph_connection *con) set_sock_callbacks(sock, con); con_sock_state_connecting(con); - ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), - O_NONBLOCK); + ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), + O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr), -- cgit From 07bb00ef00ace88dd6f695fadbba76565756e55c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Oct 2023 11:52:39 +0300 Subject: ceph: fix type promotion bug on 32bit systems In this code "ret" is type long and "src_objlen" is unsigned int. The problem is that on 32bit systems, when we do the comparison signed longs are type promoted to unsigned int. So negative error codes from do_splice_direct() are treated as success instead of failure. Cc: stable@vger.kernel.org Fixes: 1b0c3b9f91f0 ("ceph: re-org copy_file_range and fix some error paths") Signed-off-by: Dan Carpenter Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b1da02f5dbe3..b5f8038065d7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2969,7 +2969,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = do_splice_direct(src_file, &src_off, dst_file, &dst_off, src_objlen, flags); /* Abort on short copies or on error */ - if (ret < src_objlen) { + if (ret < (long)src_objlen) { dout("Failed partial copy (%zd)\n", ret); goto out; } -- cgit From 5d9cea8a552ee122e21fbd5a3c5d4eb85f648e06 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:15 +0200 Subject: powerpc/8xx: Fix pte_access_permitted() for PAGE_NONE On 8xx, PAGE_NONE is handled by setting _PAGE_NA instead of clearing _PAGE_USER. But then pte_user() returns 1 also for PAGE_NONE. As _PAGE_NA prevent reads, add a specific version of pte_read() that returns 0 when _PAGE_NA is set instead of always returning 1. Fixes: 351750331fc1 ("powerpc/mm: Introduce _PAGE_NA") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/57bcfbe578e43123f9ed73e040229b80f1ad56ec.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 7 +++++++ arch/powerpc/include/asm/nohash/pgtable.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 21f681ee535a..e6fe1d5731f2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte) #define pte_wrprotect pte_wrprotect +static inline int pte_read(pte_t pte) +{ + return (pte_val(pte) & _PAGE_RO) != _PAGE_NA; +} + +#define pte_read pte_read + static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RO); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 56ea48276356..c721478c5934 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_RW; } #endif +#ifndef pte_read static inline int pte_read(pte_t pte) { return 1; } +#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -- cgit From 5ea0bbaa32e8f54e9a57cfee4a3b8769b80be0d2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:16 +0200 Subject: powerpc/64e: Fix wrong test in __ptep_test_and_clear_young() Commit 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.") replaced: if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) return 0; By: if (pte_young(*ptep)) return 0; But it should be: if (!pte_young(*ptep)) return 0; Fix it. Fixes: 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8bb7f06494e21adada724ede47a4c3d97e879d40.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 5cd9acf58a7d..eb6891e34cbd 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if (pte_young(*ptep)) + if (!pte_young(*ptep)) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; -- cgit From 76aca10ccb7c23a7b7a0d56e0bfde2c8cdddfe24 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:09 +0200 Subject: ASoC: soc-dapm: Add helper for comparing widget name Some drivers use one event callback for multiple widgets but still need to perform a bit different actions based on actual widget. This is done by comparing widget name, however drivers tend to miss possible name prefix. Add a helper to solve common mistakes. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + sound/soc/soc-component.c | 1 + sound/soc/soc-dapm.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index d2faec9a323e..433543eb82b9 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -469,6 +469,7 @@ void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index ba7c0ae82e00..566033f7dd2e 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -242,6 +242,7 @@ int snd_soc_component_notify_control(struct snd_soc_component *component, char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; struct snd_kcontrol *kctl; + /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); else diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index f07e83678373..312e55579831 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2728,6 +2728,18 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_soc_dapm_update_dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); + const char *wname = widget->name; + + if (component->name_prefix) + wname += strlen(component->name_prefix) + 1; /* plus space */ + + return strcmp(wname, s); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_widget_name_cmp); + /* * dapm_update_widget_flags() - Re-compute widget sink and source flags * @w: The widget for which to update the flags -- cgit From c29e5263d32a6d0ec094d425ae7fef3fa8d4da1c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:10 +0200 Subject: ASoC: codecs: wsa-macro: handle component name prefix When comparing widget names in wsa_macro_spk_boost_event(), consider also the component's name prefix. Otherwise the WSA codec won't have proper mixer setup resulting in no sound playback through speakers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index ec6859ec0d38..fff4a8b862a7 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1675,12 +1675,12 @@ static int wsa_macro_spk_boost_event(struct snd_soc_dapm_widget *w, u16 boost_path_ctl, boost_path_cfg1; u16 reg, reg_mix; - if (!strcmp(w->name, "WSA_RX INT0 CHAIN")) { + if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT0 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST0_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX0_RX_PATH_CFG1; reg = CDC_WSA_RX0_RX_PATH_CTL; reg_mix = CDC_WSA_RX0_RX_PATH_MIX_CTL; - } else if (!strcmp(w->name, "WSA_RX INT1 CHAIN")) { + } else if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT1 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST1_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX1_RX_PATH_CFG1; reg = CDC_WSA_RX1_RX_PATH_CTL; -- cgit From bfbc79de60c53e5fed505390440b87ef59ee268c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:52 +0200 Subject: ASoC: codecs: wcd938x: drop bogus bind error handling Drop the bogus error handling for a soundwire device backcast during bind() that cannot fail. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-2-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a3c680661377..cf1eaf678fc2 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3448,10 +3448,6 @@ static int wcd938x_bind(struct device *dev) wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); - if (!wcd938x->tx_sdw_dev) { - dev_err(dev, "could not get txslave with matching of dev\n"); - return -EINVAL; - } /* As TX is main CSR reg interface, which should not be suspended first. * expicilty add the dependency link */ -- cgit From fa2f8a991ba4aa733ac1c3b1be0c86148aa4c52c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:53 +0200 Subject: ASoC: codecs: wcd938x: fix unbind tear down order Make sure to deregister the component before tearing down the resources it depends on during unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-3-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index cf1eaf678fc2..c617fc3ce489 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3504,10 +3504,10 @@ static void wcd938x_unbind(struct device *dev) { struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + snd_soc_unregister_component(dev); device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); - snd_soc_unregister_component(dev); component_unbind_all(dev, wcd938x); } -- cgit From da29b94ed3547cee9d510d02eca4009f2de476cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:54 +0200 Subject: ASoC: codecs: wcd938x: fix resource leaks on bind errors Add the missing code to release resources on bind errors, including the references taken by wcd938x_sdw_device_get() which also need to be dropped on unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-4-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index c617fc3ce489..7e0b07eeed77 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3435,7 +3435,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->rxdev = wcd938x_sdw_device_get(wcd938x->rxnode); if (!wcd938x->rxdev) { dev_err(dev, "could not find slave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_unbind; } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; @@ -3443,7 +3444,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { dev_err(dev, "could not find txslave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_rxdev; } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; @@ -3454,31 +3456,35 @@ static int wcd938x_bind(struct device *dev) if (!device_link_add(wcd938x->rxdev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink tx and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_txdev; } if (!device_link_add(dev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and tx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rxtx_link; } if (!device_link_add(dev, wcd938x->rxdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_tx_link; } wcd938x->regmap = dev_get_regmap(&wcd938x->tx_sdw_dev->dev, NULL); if (!wcd938x->regmap) { dev_err(dev, "could not get TX device regmap\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rx_link; } ret = wcd938x_irq_init(wcd938x, dev); if (ret) { dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); - return ret; + goto err_remove_rx_link; } wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; @@ -3487,17 +3493,33 @@ static int wcd938x_bind(struct device *dev) ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); - return ret; + goto err_remove_rx_link; } ret = snd_soc_register_component(dev, &soc_codec_dev_wcd938x, wcd938x_dais, ARRAY_SIZE(wcd938x_dais)); - if (ret) + if (ret) { dev_err(dev, "%s: Codec registration failed\n", __func__); + goto err_remove_rx_link; + } - return ret; + return 0; +err_remove_rx_link: + device_link_remove(dev, wcd938x->rxdev); +err_remove_tx_link: + device_link_remove(dev, wcd938x->txdev); +err_remove_rxtx_link: + device_link_remove(wcd938x->rxdev, wcd938x->txdev); +err_put_txdev: + put_device(wcd938x->txdev); +err_put_rxdev: + put_device(wcd938x->rxdev); +err_unbind: + component_unbind_all(dev, wcd938x); + + return ret; } static void wcd938x_unbind(struct device *dev) @@ -3508,6 +3530,8 @@ static void wcd938x_unbind(struct device *dev) device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); + put_device(wcd938x->txdev); + put_device(wcd938x->rxdev); component_unbind_all(dev, wcd938x); } -- cgit From 69a026a2357ee69983690d07976de44ef26ee38a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:55 +0200 Subject: ASoC: codecs: wcd938x: fix regulator leaks on probe errors Make sure to disable and free the regulators on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-5-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 7e0b07eeed77..679c627f7eaa 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3325,8 +3325,10 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device return dev_err_probe(dev, ret, "Failed to get supplies\n"); ret = regulator_bulk_enable(WCD938X_MAX_SUPPLY, wcd938x->supplies); - if (ret) + if (ret) { + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); return dev_err_probe(dev, ret, "Failed to enable supplies\n"); + } wcd938x_dt_parse_micbias_info(dev, wcd938x); @@ -3592,13 +3594,13 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) - return ret; + goto err_disable_regulators; wcd938x_reset(wcd938x); ret = component_master_add_with_match(dev, &wcd938x_comp_ops, match); if (ret) - return ret; + goto err_disable_regulators; pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); @@ -3608,11 +3610,21 @@ static int wcd938x_probe(struct platform_device *pdev) pm_runtime_idle(dev); return 0; + +err_disable_regulators: + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); + + return ret; } static void wcd938x_remove(struct platform_device *pdev) { + struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + component_master_del(&pdev->dev, &wcd938x_comp_ops); + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } #if defined(CONFIG_OF) -- cgit From 3ebebb2c1eca92a15107b2d7aeff34196fd9e217 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:56 +0200 Subject: ASoC: codecs: wcd938x: fix runtime PM imbalance on remove Make sure to balance the runtime PM operations, including the disable count, on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-6-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 679c627f7eaa..d27b919c63b4 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3620,9 +3620,15 @@ err_disable_regulators: static void wcd938x_remove(struct platform_device *pdev) { - struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + + component_master_del(dev, &wcd938x_comp_ops); + + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); - component_master_del(&pdev->dev, &wcd938x_comp_ops); regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } -- cgit From f0dfdcbe706462495d47982eecd13a61aabd644d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:57 +0200 Subject: ASoC: codecs: wcd938x-sdw: fix use after free on driver unbind Make sure to deregister the component when the driver is being unbound and before the underlying device-managed resources are freed. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-7-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 6951120057e5..1baea04480e2 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1281,6 +1281,15 @@ static int wcd9380_probe(struct sdw_slave *pdev, return component_add(dev, &wcd938x_sdw_component_ops); } +static int wcd9380_remove(struct sdw_slave *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &wcd938x_sdw_component_ops); + + return 0; +} + static const struct sdw_device_id wcd9380_slave_id[] = { SDW_SLAVE_ENTRY(0x0217, 0x10d, 0), {}, @@ -1320,6 +1329,7 @@ static const struct dev_pm_ops wcd938x_sdw_pm_ops = { static struct sdw_driver wcd9380_codec_driver = { .probe = wcd9380_probe, + .remove = wcd9380_remove, .ops = &wcd9380_slave_ops, .id_table = wcd9380_slave_id, .driver = { -- cgit From c5c0383082eace13da2ffceeea154db2780165e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:58 +0200 Subject: ASoC: codecs: wcd938x-sdw: fix runtime PM imbalance on probe errors Make sure to balance the runtime PM operations, including the disable count, on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-8-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 1baea04480e2..a1f04010da95 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1278,7 +1278,18 @@ static int wcd9380_probe(struct sdw_slave *pdev, pm_runtime_set_active(dev); pm_runtime_enable(dev); - return component_add(dev, &wcd938x_sdw_component_ops); + ret = component_add(dev, &wcd938x_sdw_component_ops); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + + return ret; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1287,6 +1298,10 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd938x_sdw_component_ops); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + return 0; } -- cgit From af5fd122d7bd739a2b66405f6e8ab92557279325 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 6 Oct 2023 17:44:05 +0100 Subject: ASoC: cs35l56: Fix illegal use of init_completion() Fix cs35l56_patch() to call reinit_completion() to reinitialize the completion object. It was incorrectly using init_completion(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231006164405.253796-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index f2e7c6d0be46..9c2d9cbc63d3 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -706,7 +706,7 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56) mutex_lock(&cs35l56->base.irq_lock); - init_completion(&cs35l56->init_completion); + reinit_completion(&cs35l56->init_completion); cs35l56->soft_resetting = true; cs35l56_system_reset(&cs35l56->base, !!cs35l56->sdw_peripheral); -- cgit From aa6464edbd51af4a2f8db43df866a7642b244b5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2023 17:00:24 +0300 Subject: ASoC: pxa: fix a memory leak in probe() Free the "priv" pointer before returning the error code. Fixes: 90eb6b59d311 ("ASoC: pxa-ssp: add support for an external clock in devicetree") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/84ac2313-1420-471a-b2cb-3269a2e12a7c@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/pxa/pxa-ssp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b70034c07eee..b8a3cb8b7597 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -773,7 +773,7 @@ static int pxa_ssp_probe(struct snd_soc_dai *dai) if (IS_ERR(priv->extclk)) { ret = PTR_ERR(priv->extclk); if (ret == -EPROBE_DEFER) - return ret; + goto err_priv; priv->extclk = NULL; } -- cgit From c6df843348d6b71ea986266c12831cb60c2cf325 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Oct 2023 10:21:04 +0200 Subject: regmap: fix NULL deref on lookup Not all regmaps have a name so make sure to check for that to avoid dereferencing a NULL pointer when dev_get_regmap() is used to lookup a named regmap. Fixes: e84861fec32d ("regmap: dev_get_regmap_match(): fix string comparison") Cc: stable@vger.kernel.org # 5.8 Cc: Marc Kleine-Budde Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231006082104.16707-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 884cb51c8f67..234a84ecde8b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1478,7 +1478,7 @@ static int dev_get_regmap_match(struct device *dev, void *res, void *data) /* If the user didn't specify a name match any */ if (data) - return !strcmp((*r)->name, data); + return (*r)->name && !strcmp((*r)->name, data); else return 1; } -- cgit From f9b3ea02555e67e2e7bf95219953b88d122bd275 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Oct 2023 14:11:01 +0200 Subject: ACPI: resource: Add TongFang GM6BGEQ, GM6BG5Q and GM6BG0Q to irq1_edge_low_force_override[] The TongFang GM6BGEQ, GM6BG5Q and GM6BG0Q are 3 GPU variants of a TongFang barebone design which is sold under various brand names. The ACPI IRQ override for the keyboard IRQ must be used on these AMD Zen laptops in order for the IRQ to work. Adjust the pcspecialist_laptop[] DMI match table for this: 1. Drop the sys-vendor match from the existing PCSpecialist Elimina Pro 16 entry for the GM6BGEQ (RTX3050 GPU) model so that it will also match the laptop when sold by other vendors such as hyperbook.pl. 2. Add board-name matches for the GM6BG5Q (RTX4050) and GM6B0Q (RTX4060) models. Note the .ident values of the dmi_system_id structs are left unset since these are not used. Suggested-by: August Wikerfors Reported-by: Francesco Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217394 Link: https://laptopparts4less.frl/index.php?route=product/search&filter_name=GM6BG Link: https://hyperbook.pl/en/content/14-hyperbook-drivers Link: https://linux-hardware.org/?probe=bfa70344e3 Link: https://bbs.archlinuxcn.org/viewtopic.php?id=13313 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8116b55b6c98..297a88587031 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -507,16 +507,23 @@ static const struct dmi_system_id maingear_laptop[] = { static const struct dmi_system_id pcspecialist_laptop[] = { { - .ident = "PCSpecialist Elimina Pro 16 M", - /* - * Some models have product-name "Elimina Pro 16 M", - * others "GM6BGEQ". Match on board-name to match both. - */ + /* TongFang GM6BGEQ / PCSpecialist Elimina Pro 16 M, RTX 3050 */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"), }, }, + { + /* TongFang GM6BG5Q, RTX 4050 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6BG5Q"), + }, + }, + { + /* TongFang GM6BG0Q / PCSpecialist Elimina Pro 16 M, RTX 4060 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6BG0Q"), + }, + }, { } }; -- cgit From 2f1b0d3d733169eb11680bfa97c266ae5e757148 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Oct 2023 14:07:05 +0200 Subject: riscv, bpf: Sign-extend return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V architecture does not expose sub-registers, and hold all 32-bit values in a sign-extended format [1] [2]: | The compiler and calling convention maintain an invariant that all | 32-bit values are held in a sign-extended format in 64-bit | registers. Even 32-bit unsigned integers extend bit 31 into bits | 63 through 32. Consequently, conversion between unsigned and | signed 32-bit integers is a no-op, as is conversion from a signed | 32-bit integer to a signed 64-bit integer. While BPF, on the other hand, exposes sub-registers, and use zero-extension (similar to arm64/x86). This has led to some subtle bugs, where a BPF JITted program has not sign-extended the a0 register (return value in RISC-V land), passed the return value up the kernel, e.g.: | int from_bpf(void); | | long foo(void) | { | return from_bpf(); | } Here, a0 would be 0xffff_ffff, instead of the expected 0xffff_ffff_ffff_ffff. Internally, the RISC-V JIT uses a5 as a dedicated register for BPF return values. Keep a5 zero-extended, but explicitly sign-extend a0 (which is used outside BPF land). Now that a0 (RISC-V ABI) and a5 (BPF ABI) differs, a0 is only moved to a5 for non-BPF native calls (BPF_PSEUDO_CALL). Fixes: 2353ecc6f91f ("bpf, riscv: add BPF JIT for RV64G") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://github.com/riscv/riscv-isa-manual/releases/download/riscv-isa-release-056b6ff-2023-10-02/unpriv-isa-asciidoc.pdf # [2] Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases/download/draft-20230929-e5c800e661a53efe3c2678d71a306323b60eb13b/riscv-abi.pdf # [2] Link: https://lore.kernel.org/bpf/20231004120706.52848-2-bjorn@kernel.org --- arch/riscv/net/bpf_jit_comp64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index ecd3ae6f4116..de4c9957d223 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -245,7 +245,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ if (!is_tail_call) - emit_mv(RV_REG_A0, RV_REG_A5, ctx); + emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); @@ -1515,7 +1515,8 @@ out_be: if (ret) return ret; - emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); + if (insn->src_reg != BPF_PSEUDO_CALL) + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); break; } /* tail call */ -- cgit From 7112cd26e606c7ba51f9cc5c1905f06039f6f379 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Oct 2023 14:07:06 +0200 Subject: riscv, bpf: Track both a0 (RISC-V ABI) and a5 (BPF) return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V BPF uses a5 for BPF return values, which are zero-extended, whereas the RISC-V ABI uses a0 which is sign-extended. In other words, a5 and a0 can differ, and are used in different context. The BPF trampoline are used for both BPF programs, and regular kernel functions. Make sure that the RISC-V BPF trampoline saves, and restores both a0 and a5. Fixes: 49b5e77ae3e2 ("riscv, bpf: Add bpf trampoline support for RV64") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231004120706.52848-3-bjorn@kernel.org --- arch/riscv/net/bpf_jit_comp64.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index de4c9957d223..8581693e62d3 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -759,8 +759,10 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; - if (save_ret) - emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx); + if (save_ret) { + emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); + } /* update branch with beqz */ if (ctx->insns) { @@ -853,7 +855,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { - stack_size += 8; + stack_size += 16; /* Save both A5 (BPF R0) and A0 */ retval_off = stack_size; } @@ -957,6 +959,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (ret) goto out; emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); im->ip_after_call = ctx->insns + ctx->ninsns; /* 2 nops reserved for auipc+jalr pair */ emit(rv_nop(), ctx); @@ -988,8 +991,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_args(nregs, args_off, ctx); - if (save_ret) + if (save_ret) { emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); + emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); + } emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); -- cgit From a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 5 Oct 2023 11:06:36 +0200 Subject: x86/sev: Disable MMIO emulation from user mode A virt scenario can be constructed where MMIO memory can be user memory. When that happens, a race condition opens between when the hardware raises the #VC and when the #VC handler gets to emulate the instruction. If the MOVS is replaced with a MOVS accessing kernel memory in that small race window, then write to kernel memory happens as the access checks are not done at emulation time. Disable MMIO emulation in user mode temporarily until a sensible use case appears and justifies properly handling the race window. Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions") Reported-by: Tom Dohrmann Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/kernel/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f60..4ee14e647ae6 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1509,6 +1509,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ES_DECODE_FAILED; } + if (user_mode(ctxt->regs)) + return ES_UNSUPPORTED; + switch (mmio) { case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); -- cgit From b9cb9c45583b911e0db71d09caa6b56469eb2bdf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Jun 2023 17:42:42 +0200 Subject: x86/sev: Check IOBM for IOIO exceptions from user-space Check the IO permission bitmap (if present) before emulating IOIO #VC exceptions for user-space. These permissions are checked by hardware already before the #VC is raised, but due to the VC-handler decoding race it needs to be checked again in software. Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 22 +++++++++++++++------- arch/x86/kernel/sev.c | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index dc8c876fbd8f..afd91041ec3e 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + #undef __init #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb..bcd77c48be0a 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -656,6 +656,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -664,7 +667,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -672,41 +675,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -716,12 +721,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -737,7 +745,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4ee14e647ae6..0f218932e844 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" -- cgit From a12bbb3cccf03b12847de0f7a6772127f90936ac Mon Sep 17 00:00:00 2001 From: Andrew Kanner Date: Sat, 7 Oct 2023 10:51:49 +0300 Subject: xdp: Fix zero-size allocation warning in xskq_create() Syzkaller reported the following issue: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 2807 at mm/vmalloc.c:3247 __vmalloc_node_range (mm/vmalloc.c:3361) Modules linked in: CPU: 0 PID: 2807 Comm: repro Not tainted 6.6.0-rc2+ #12 Hardware name: Generic DT based system unwind_backtrace from show_stack (arch/arm/kernel/traps.c:258) show_stack from dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) dump_stack_lvl from __warn (kernel/panic.c:633 kernel/panic.c:680) __warn from warn_slowpath_fmt (./include/linux/context_tracking.h:153 kernel/panic.c:700) warn_slowpath_fmt from __vmalloc_node_range (mm/vmalloc.c:3361 (discriminator 3)) __vmalloc_node_range from vmalloc_user (mm/vmalloc.c:3478) vmalloc_user from xskq_create (net/xdp/xsk_queue.c:40) xskq_create from xsk_setsockopt (net/xdp/xsk.c:953 net/xdp/xsk.c:1286) xsk_setsockopt from __sys_setsockopt (net/socket.c:2308) __sys_setsockopt from ret_fast_syscall (arch/arm/kernel/entry-common.S:68) xskq_get_ring_size() uses struct_size() macro to safely calculate the size of struct xsk_queue and q->nentries of desc members. But the syzkaller repro was able to set q->nentries with the value initially taken from copy_from_sockptr() high enough to return SIZE_MAX by struct_size(). The next PAGE_ALIGN(size) is such case will overflow the size_t value and set it to 0. This will trigger WARN_ON_ONCE in vmalloc_user() -> __vmalloc_node_range(). The issue is reproducible on 32-bit arm kernel. Fixes: 9f78bf330a66 ("xsk: support use vaddr as ring") Reported-by: syzbot+fae676d3cf469331fc89@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000c84b4705fb31741e@google.com/T/ Reported-by: syzbot+b132693e925cbbd89e26@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000e20df20606ebab4f@google.com/T/ Signed-off-by: Andrew Kanner Signed-off-by: Daniel Borkmann Tested-by: syzbot+fae676d3cf469331fc89@syzkaller.appspotmail.com Acked-by: Magnus Karlsson Link: https://syzkaller.appspot.com/bug?extid=fae676d3cf469331fc89 Link: https://lore.kernel.org/bpf/20231007075148.1759-1-andrew.kanner@gmail.com --- net/xdp/xsk_queue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index f8905400ee07..d2c264030017 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -34,6 +34,16 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) q->ring_mask = nentries - 1; size = xskq_get_ring_size(q, umem_queue); + + /* size which is overflowing or close to SIZE_MAX will become 0 in + * PAGE_ALIGN(), checking SIZE_MAX is enough due to the previous + * is_power_of_2(), the rest will be handled by vmalloc_user() + */ + if (unlikely(size == SIZE_MAX)) { + kfree(q); + return NULL; + } + size = PAGE_ALIGN(size); q->ring = vmalloc_user(size); -- cgit From 1ca0b605150501b7dc59f3016271da4eb3e96fce Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Mon, 9 Oct 2023 15:58:11 +0200 Subject: cgroup: Remove duplicates in cgroup v1 tasks file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One PID may appear multiple times in a preloaded pidlist. (Possibly due to PID recycling but we have reports of the same task_struct appearing with different PIDs, thus possibly involving transfer of PID via de_thread().) Because v1 seq_file iterator uses PIDs as position, it leads to a message: > seq_file: buggy .next function kernfs_seq_next did not update position index Conservative and quick fix consists of removing duplicates from `tasks` file (as opposed to removing pidlists altogether). It doesn't affect correctness (it's sufficient to show a PID once), performance impact would be hidden by unconditional sorting of the pidlist already in place (asymptotically). Link: https://lore.kernel.org/r/20230823174804.23632-1-mkoutny@suse.com/ Suggested-by: Firo Yang Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cgroup/cgroup-v1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c487ffef6652..76db6c67e39a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -360,10 +360,9 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, } css_task_iter_end(&it); length = n; - /* now sort & (if procs) strip out duplicates */ + /* now sort & strip out duplicates (tgids or recycled thread PIDs) */ sort(array, length, sizeof(pid_t), cmppid, NULL); - if (type == CGROUP_FILE_PROCS) - length = pidlist_uniq(array, length); + length = pidlist_uniq(array, length); l = cgroup_pidlist_find_create(cgrp, type); if (!l) { -- cgit From 1bba0badff0ede8dc51641cff4b153422baa3369 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 9 Oct 2023 16:34:12 +0100 Subject: ASoC: cs35l56: ASP1 DOUT must default to Hi-Z when not transmitting The ASP1 DOUT line must be defaulted to be high-impedance when it is not actually transmitting data for an active channel. In non-SoundWire modes ASP1 will usually be shared by multiple amps so each amp must only drive the line during the slot for an enabled TX channel. In SoundWire mode a custom firmware can use ASP1 as a secondary chip-to-chip audio link or as GPIO. It should be defaulted to high-impedance since by default the purpose of this pin is not known. Backport note: On kernel versions before 6.6 the cs35l56->base.regmap argument to regmap_set_bits() must be changed to cs35l56->regmap. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231009153412.30380-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 9c2d9cbc63d3..f9059780b7a7 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1186,6 +1186,12 @@ post_soft_reset: /* Registers could be dirty after soft reset or SoundWire enumeration */ regcache_sync(cs35l56->base.regmap); + /* Set ASP1 DOUT to high-impedance when it is not transmitting audio data. */ + ret = regmap_set_bits(cs35l56->base.regmap, CS35L56_ASP1_CONTROL3, + CS35L56_ASP1_DOUT_HIZ_CTRL_MASK); + if (ret) + return dev_err_probe(cs35l56->base.dev, ret, "Failed to write ASP1_CONTROL3\n"); + cs35l56->base.init_done = true; complete(&cs35l56->init_completion); -- cgit From 39465cac283702a7d4a507a558db81898029c6d3 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Tue, 5 Sep 2023 18:02:03 +0800 Subject: drm/vmwgfx: fix typo of sizeof argument Since size of 'header' pointer and '*header' structure is equal on 64-bit machines issue probably didn't cause any wrong behavior. But anyway, fixing typo is required. Fixes: 7a73ba7469cb ("drm/vmwgfx: Use TTM handles instead of SIDs as user-space surface handles.") Co-developed-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20230905100203.1716731-1-konstantin.meskhidze@huawei.com --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 98e0723ca6f5..cc3f301ca163 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1619,7 +1619,7 @@ static int vmw_cmd_tex_state(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdSetTextureState); SVGA3dTextureState *last_state = (SVGA3dTextureState *) - ((unsigned long) header + header->size + sizeof(header)); + ((unsigned long) header + header->size + sizeof(*header)); SVGA3dTextureState *cur_state = (SVGA3dTextureState *) ((unsigned long) header + sizeof(*cmd)); struct vmw_resource *ctx; -- cgit From 91398b413d03660fd5828f7b4abc64e884b98069 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 28 Sep 2023 00:13:55 -0400 Subject: drm/vmwgfx: Keep a gem reference to user bos in surfaces Surfaces can be backed (i.e. stored in) memory objects (mob's) which are created and managed by the userspace as GEM buffers. Surfaces grab only a ttm reference which means that the gem object can be deleted underneath us, especially in cases where prime buffer export is used. Make sure that all userspace surfaces which are backed by gem objects hold a gem reference to make sure they're not deleted before vmw surfaces are done with them, which fixes: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 2632 at lib/refcount.c:28 refcount_warn_saturate+0xfb/0x150 Modules linked in: overlay vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock snd_ens1371 snd_ac97_codec ac97_bus snd_pcm gameport> CPU: 2 PID: 2632 Comm: vmw_ref_count Not tainted 6.5.0-rc2-vmwgfx #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 RIP: 0010:refcount_warn_saturate+0xfb/0x150 Code: eb 9e 0f b6 1d 8b 5b a6 01 80 fb 01 0f 87 ba e4 80 00 83 e3 01 75 89 48 c7 c7 c0 3c f9 a3 c6 05 6f 5b a6 01 01 e8 15 81 98 ff <0f> 0b e9 6f ff ff ff 0f b> RSP: 0018:ffffbdc34344bba0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000027 RDX: ffff960475ea1548 RSI: 0000000000000001 RDI: ffff960475ea1540 RBP: ffffbdc34344bba8 R08: 0000000000000003 R09: 65646e75203a745f R10: ffffffffa5b32b20 R11: 72657466612d6573 R12: ffff96037d6a6400 R13: ffff9603484805b0 R14: 000000000000000b R15: ffff9603bed06060 FS: 00007f5fd8520c40(0000) GS:ffff960475e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f5fda755000 CR3: 000000010d012005 CR4: 00000000003706e0 Call Trace: ? show_regs+0x6e/0x80 ? refcount_warn_saturate+0xfb/0x150 ? __warn+0x91/0x150 ? refcount_warn_saturate+0xfb/0x150 ? report_bug+0x19d/0x1b0 ? handle_bug+0x46/0x80 ? exc_invalid_op+0x1d/0x80 ? asm_exc_invalid_op+0x1f/0x30 ? refcount_warn_saturate+0xfb/0x150 drm_gem_object_handle_put_unlocked+0xba/0x110 [drm] drm_gem_object_release_handle+0x6e/0x80 [drm] drm_gem_handle_delete+0x6a/0xc0 [drm] ? __pfx_vmw_bo_unref_ioctl+0x10/0x10 [vmwgfx] vmw_bo_unref_ioctl+0x33/0x40 [vmwgfx] drm_ioctl_kernel+0xbc/0x160 [drm] drm_ioctl+0x2d2/0x580 [drm] ? __pfx_vmw_bo_unref_ioctl+0x10/0x10 [vmwgfx] ? do_vmi_munmap+0xee/0x180 vmw_generic_ioctl+0xbd/0x180 [vmwgfx] vmw_unlocked_ioctl+0x19/0x20 [vmwgfx] __x64_sys_ioctl+0x99/0xd0 do_syscall_64+0x5d/0x90 ? syscall_exit_to_user_mode+0x2a/0x50 ? do_syscall_64+0x6d/0x90 ? handle_mm_fault+0x16e/0x2f0 ? exit_to_user_mode_prepare+0x34/0x170 ? irqentry_exit_to_user_mode+0xd/0x20 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x8e/0x190 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7f5fda51aaff Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 7> RSP: 002b:00007ffd536a4d30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd536a4de0 RCX: 00007f5fda51aaff RDX: 00007ffd536a4de0 RSI: 0000000040086442 RDI: 0000000000000003 RBP: 0000000040086442 R08: 000055fa603ada50 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffd536a51b8 R13: 0000000000000003 R14: 000055fa5ebb4c80 R15: 00007f5fda90f040 ---[ end trace 0000000000000000 ]--- A lot of the analyis on the bug was done by Murray McAllister and Ian Forbes. Reported-by: Murray McAllister Cc: Ian Forbes Signed-off-by: Zack Rusin Fixes: a950b989ea29 ("drm/vmwgfx: Do not drop the reference to the handle too soon") Cc: # v6.2+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20230928041355.737635-1-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 7 ++++--- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 17 ++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++++ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 10 ++++++---- drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 18 +++++++++++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 12 ++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 31 ++++++++++++------------------- 11 files changed, 68 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index c43853597776..2bfac3aad7b7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -34,6 +34,8 @@ static void vmw_bo_release(struct vmw_bo *vbo) { + WARN_ON(vbo->tbo.base.funcs && + kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); drm_gem_object_release(&vbo->tbo.base); } @@ -497,7 +499,7 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp, if (!(flags & drm_vmw_synccpu_allow_cs)) { atomic_dec(&vmw_bo->cpu_writers); } - vmw_user_bo_unref(vmw_bo); + vmw_user_bo_unref(&vmw_bo); } return ret; @@ -539,7 +541,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, return ret; ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); - vmw_user_bo_unref(vbo); + vmw_user_bo_unref(&vbo); if (unlikely(ret != 0)) { if (ret == -ERESTARTSYS || ret == -EBUSY) return -EBUSY; @@ -612,7 +614,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, } *out = to_vmw_bo(gobj); - ttm_bo_get(&(*out)->tbo); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 1d433fceed3d..0d496dc9c6af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -195,12 +195,19 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf) return buf; } -static inline void vmw_user_bo_unref(struct vmw_bo *vbo) +static inline struct vmw_bo *vmw_user_bo_ref(struct vmw_bo *vbo) { - if (vbo) { - ttm_bo_put(&vbo->tbo); - drm_gem_object_put(&vbo->tbo.base); - } + drm_gem_object_get(&vbo->tbo.base); + return vbo; +} + +static inline void vmw_user_bo_unref(struct vmw_bo **buf) +{ + struct vmw_bo *tmp_buf = *buf; + + *buf = NULL; + if (tmp_buf) + drm_gem_object_put(&tmp_buf->tbo.base); } static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index c0b24d1cacbf..a7c07692262b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -432,7 +432,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) * for the new COTable. Initially pin the buffer object to make sure * we can use tryreserve without failure. */ - ret = vmw_bo_create(dev_priv, &bo_params, &buf); + ret = vmw_gem_object_create(dev_priv, &bo_params, &buf); if (ret) { DRM_ERROR("Failed initializing new cotable MOB.\n"); goto out_done; @@ -502,7 +502,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) vmw_resource_mob_attach(res); /* Let go of the old mob. */ - vmw_bo_unreference(&old_buf); + vmw_user_bo_unref(&old_buf); res->id = vcotbl->type; ret = dma_resv_reserve_fences(bo->base.resv, 1); @@ -521,7 +521,7 @@ out_map_new: out_wait: ttm_bo_unpin(bo); ttm_bo_unreserve(bo); - vmw_bo_unreference(&buf); + vmw_user_bo_unref(&buf); out_done: MKS_STAT_TIME_POP(MKSSTAT_KERN_COTABLE_RESIZE); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 58bfdf203eca..3cd5090dedfc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -853,6 +853,10 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) /** * GEM related functionality - vmwgfx_gem.c */ +struct vmw_bo_params; +int vmw_gem_object_create(struct vmw_private *vmw, + struct vmw_bo_params *params, + struct vmw_bo **p_vbo); extern int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, struct drm_file *filp, uint32_t size, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index cc3f301ca163..36987ef3fc30 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1151,7 +1151,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, SVGAMobId *id, struct vmw_bo **vmw_bo_p) { - struct vmw_bo *vmw_bo; + struct vmw_bo *vmw_bo, *tmp_bo; uint32_t handle = *id; struct vmw_relocation *reloc; int ret; @@ -1164,7 +1164,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); - vmw_user_bo_unref(vmw_bo); + tmp_bo = vmw_bo; + vmw_user_bo_unref(&tmp_bo); if (unlikely(ret != 0)) return ret; @@ -1206,7 +1207,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, SVGAGuestPtr *ptr, struct vmw_bo **vmw_bo_p) { - struct vmw_bo *vmw_bo; + struct vmw_bo *vmw_bo, *tmp_bo; uint32_t handle = ptr->gmrId; struct vmw_relocation *reloc; int ret; @@ -1220,7 +1221,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); - vmw_user_bo_unref(vmw_bo); + tmp_bo = vmw_bo; + vmw_user_bo_unref(&tmp_bo); if (unlikely(ret != 0)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index c0da89e16e6f..8b1eb0061610 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -111,6 +111,20 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .vm_ops = &vmw_vm_ops, }; +int vmw_gem_object_create(struct vmw_private *vmw, + struct vmw_bo_params *params, + struct vmw_bo **p_vbo) +{ + int ret = vmw_bo_create(vmw, params, p_vbo); + + if (ret != 0) + goto out_no_bo; + + (*p_vbo)->tbo.base.funcs = &vmw_gem_object_funcs; +out_no_bo: + return ret; +} + int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, struct drm_file *filp, uint32_t size, @@ -126,12 +140,10 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, .pin = false }; - ret = vmw_bo_create(dev_priv, ¶ms, p_vbo); + ret = vmw_gem_object_create(dev_priv, ¶ms, p_vbo); if (ret != 0) goto out_no_bo; - (*p_vbo)->tbo.base.funcs = &vmw_gem_object_funcs; - ret = drm_gem_handle_create(filp, &(*p_vbo)->tbo.base, handle); out_no_bo: return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 1489ad73c103..818b7f109f53 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1471,8 +1471,8 @@ static int vmw_create_bo_proxy(struct drm_device *dev, /* Reserve and switch the backing mob. */ mutex_lock(&res->dev_priv->cmdbuf_mutex); (void) vmw_resource_reserve(res, false, true); - vmw_bo_unreference(&res->guest_memory_bo); - res->guest_memory_bo = vmw_bo_reference(bo_mob); + vmw_user_bo_unref(&res->guest_memory_bo); + res->guest_memory_bo = vmw_user_bo_ref(bo_mob); res->guest_memory_offset = 0; vmw_resource_unreserve(res, false, false, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); @@ -1666,7 +1666,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, err_out: /* vmw_user_lookup_handle takes one ref so does new_fb */ if (bo) - vmw_user_bo_unref(bo); + vmw_user_bo_unref(&bo); if (surface) vmw_surface_unreference(&surface); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index fb85f244c3d0..c45b4724e414 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -451,7 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, ret = vmw_overlay_update_stream(dev_priv, buf, arg, true); - vmw_user_bo_unref(buf); + vmw_user_bo_unref(&buf); out_unlock: mutex_unlock(&overlay->mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 71eeabf001c8..ca300c7427d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -141,7 +141,7 @@ static void vmw_resource_release(struct kref *kref) if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); ttm_bo_unreserve(bo); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } if (likely(res->hw_destroy != NULL)) { @@ -338,7 +338,7 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res, return 0; } - ret = vmw_bo_create(res->dev_priv, &bo_params, &gbo); + ret = vmw_gem_object_create(res->dev_priv, &bo_params, &gbo); if (unlikely(ret != 0)) goto out_no_bo; @@ -457,11 +457,11 @@ void vmw_resource_unreserve(struct vmw_resource *res, vmw_resource_mob_detach(res); if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } if (new_guest_memory_bo) { - res->guest_memory_bo = vmw_bo_reference(new_guest_memory_bo); + res->guest_memory_bo = vmw_user_bo_ref(new_guest_memory_bo); /* * The validation code should already have added a @@ -551,7 +551,7 @@ out_no_reserve: ttm_bo_put(val_buf->bo); val_buf->bo = NULL; if (guest_memory_dirty) - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); return ret; } @@ -727,7 +727,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr, goto out_no_validate; else if (!res->func->needs_guest_memory && res->guest_memory_bo) { WARN_ON_ONCE(vmw_resource_mob_attached(res)); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 1e81ff2422cf..a01ca3226d0a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -180,7 +180,7 @@ static int vmw_gb_shader_init(struct vmw_private *dev_priv, res->guest_memory_size = size; if (byte_code) { - res->guest_memory_bo = vmw_bo_reference(byte_code); + res->guest_memory_bo = vmw_user_bo_ref(byte_code); res->guest_memory_offset = offset; } shader->size = size; @@ -809,7 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, shader_type, num_input_sig, num_output_sig, tfile, shader_handle); out_bad_arg: - vmw_user_bo_unref(buffer); + vmw_user_bo_unref(&buffer); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5db403ee8261..3829be282ff0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -686,9 +686,6 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; - if (res->guest_memory_bo) - drm_gem_object_put(&res->guest_memory_bo->tbo.base); - *p_base = NULL; vmw_resource_unreference(&res); } @@ -855,23 +852,21 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, * expect a backup buffer to be present. */ if (dev_priv->has_mob && req->shareable) { - uint32_t backup_handle; - - ret = vmw_gem_object_create_with_handle(dev_priv, - file_priv, - res->guest_memory_size, - &backup_handle, - &res->guest_memory_bo); + struct vmw_bo_params params = { + .domain = VMW_BO_DOMAIN_SYS, + .busy_domain = VMW_BO_DOMAIN_SYS, + .bo_type = ttm_bo_type_device, + .size = res->guest_memory_size, + .pin = false + }; + + ret = vmw_gem_object_create(dev_priv, + ¶ms, + &res->guest_memory_bo); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; } - vmw_bo_reference(res->guest_memory_bo); - /* - * We don't expose the handle to the userspace and surface - * already holds a gem reference - */ - drm_gem_handle_delete(file_priv, backup_handle); } tmp = vmw_resource_reference(&srf->res); @@ -1512,7 +1507,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, if (ret == 0) { if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; goto out_unlock; } else { @@ -1526,8 +1521,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, res->guest_memory_size, &backup_handle, &res->guest_memory_bo); - if (ret == 0) - vmw_bo_reference(res->guest_memory_bo); } if (unlikely(ret != 0)) { -- cgit From 829955981c557c7fc7416581c4cd68a8a0c28620 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 9 Oct 2023 11:14:13 -0500 Subject: bpf: Fix verifier log for async callback return values The verifier, as part of check_return_code(), verifies that async callbacks such as from e.g. timers, will return 0. It does this by correctly checking that R0->var_off is in tnum_const(0), which effectively checks that it's in a range of 0. If this condition fails, however, it prints an error message which says that the value should have been in (0x0; 0x1). This results in possibly confusing output such as the following in which an async callback returns 1: At async callback the register R0 has value (0x1; 0x0) should have been in (0x0; 0x1) The fix is easy -- we should just pass the tnum_const(0) as the correct range to verbose_invalid_scalar(), which will then print the following: At async callback the register R0 has value (0x1; 0x0) should have been in (0x0; 0x0) Fixes: bfc6bb74e4f1 ("bpf: Implement verifier support for validation of async callbacks.") Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231009161414.235829-1-void@manifault.com --- kernel/bpf/verifier.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c0c7d137066a..873ade146f3d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14479,7 +14479,7 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; struct bpf_reg_state *reg; - struct tnum range = tnum_range(0, 1); + struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; struct bpf_func_state *frame = env->cur_state->frame[0]; @@ -14527,8 +14527,8 @@ static int check_return_code(struct bpf_verifier_env *env) return -EINVAL; } - if (!tnum_in(tnum_const(0), reg->var_off)) { - verbose_invalid_scalar(env, reg, &range, "async callback", "R0"); + if (!tnum_in(const_0, reg->var_off)) { + verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0"); return -EINVAL; } return 0; -- cgit From 57ddeb86b311ff41925e0fac7b983c097336f1f3 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 9 Oct 2023 11:14:14 -0500 Subject: selftests/bpf: Add testcase for async callback return value failure A previous commit updated the verifier to print an accurate failure message for when someone specifies a nonzero return value from an async callback. This adds a testcase for validating that the verifier emits the correct message in such a case. Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231009161414.235829-2-void@manifault.com --- tools/testing/selftests/bpf/prog_tests/timer.c | 6 ++- tools/testing/selftests/bpf/progs/timer_failure.c | 47 +++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/timer_failure.c diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 290c21dbe65a..ce2c61d62fc6 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include #include "timer.skel.h" +#include "timer_failure.skel.h" static int timer(struct timer *timer_skel) { @@ -49,10 +50,11 @@ void serial_test_timer(void) timer_skel = timer__open_and_load(); if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) - goto cleanup; + return; err = timer(timer_skel); ASSERT_OK(err, "timer"); -cleanup: timer__destroy(timer_skel); + + RUN_TESTS(timer_failure); } diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c new file mode 100644 index 000000000000..226d33b5a05c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer_map SEC(".maps"); + +static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +{ + if (bpf_get_smp_processor_id() % 2) + return 1; + else + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__failure __msg("should have been in (0x0; 0x0)") +int BPF_PROG2(test_ret_1, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + timer = bpf_map_lookup_elem(&timer_map, &key); + if (timer) { + bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_start(timer, 1000, 0); + } + + return 0; +} -- cgit From 23645bca98304a2772f0de96f97370dd567d0ae6 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Fri, 29 Sep 2023 13:04:33 -0400 Subject: drm/amd/display: Don't set dpms_off for seamless boot [Why] eDPs fail to light up with seamless boot enabled [How] When seamless boot is enabled don't configure dpms_off in disable_vbios_mode_if_required. Reviewed-by: Charlene Liu Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Tom Chung Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3a9077b60029..d08e60dff46d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1262,6 +1262,9 @@ static void disable_vbios_mode_if_required( if (stream == NULL) continue; + if (stream->apply_seamless_boot_optimization) + continue; + // only looking for first odm pipe if (pipe->prev_odm_pipe) continue; -- cgit From ff89f064dca38e2203790bf876cc7756b8ab2961 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 Oct 2023 14:04:04 +0200 Subject: drm/amdgpu: add missing NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo->tbo.resource can easily be NULL here. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2902 Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index f3ee83cdf97e..d28e21baef16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -252,7 +252,7 @@ static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_res_cursor cursor; - if (bo->tbo.resource->mem_type != TTM_PL_VRAM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) return false; amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); -- cgit From 3806a8c64794661b15ff5ed28180ff9a5f79fce8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 8 Oct 2023 14:46:49 +0800 Subject: drm/amdgpu: fix SI failure due to doorbells allocation SI hardware does not have doorbells at all, however currently the code will try to do the allocation and thus fail, makes SI AMDGPU not usable. Fix this failure by skipping doorbells allocation when doorbells count is zero. Fixes: 54c30d2a8def ("drm/amdgpu: create kernel doorbell pages") Reviewed-by: Shashank Sharma Signed-off-by: Icenowy Zheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index da4be0bbb446..8eee5d783a92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -142,6 +142,10 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev) int r; int size; + /* SI HW does not have doorbells, skip allocation */ + if (adev->doorbell.num_kernel_doorbells == 0) + return 0; + /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */ size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE); -- cgit From a20c4350c6a12405b7f732b3ee6801ffe2cc45ce Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 3 Oct 2023 10:20:02 +0800 Subject: scsi: ufs: core: Correct clear TM error log The clear TM function error log status was inverted. Fixes: 4693fad7d6d4 ("scsi: ufs: core: Log error handler activity") Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20231003022002.25578-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index c2df07545f96..8382e8cfa414 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6895,7 +6895,7 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) mask, 0, 1000, 1000); dev_err(hba->dev, "Clearing task management function with tag %d %s\n", - tag, err ? "succeeded" : "failed"); + tag, err < 0 ? "failed" : "succeeded"); out: return err; -- cgit From 26c29961b142444cd99361644c30fa1e9b3da6be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Oct 2023 17:33:54 +0000 Subject: net: refine debug info in skb_checksum_help() syzbot uses panic_on_warn. This means that the skb_dump() I added in the blamed commit are not even called. Rewrite this so that we get the needed skb dump before syzbot crashes. Fixes: eeee4b77dc52 ("net: add more debug info in skb_checksum_help()") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231006173355.2254983-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 85df22f05c38..5aaf5753d4e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3292,15 +3292,19 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb_checksum_start_offset(skb); ret = -EINVAL; - if (WARN_ON_ONCE(offset >= skb_headlen(skb))) { + if (unlikely(offset >= skb_headlen(skb))) { DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + WARN_ONCE(true, "offset (%d) >= skb_headlen() (%u)\n", + offset, skb_headlen(skb)); goto out; } csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; - if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) { + if (unlikely(offset + sizeof(__sum16) > skb_headlen(skb))) { DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + WARN_ONCE(true, "offset+2 (%zu) > skb_headlen() (%u)\n", + offset + sizeof(__sum16), skb_headlen(skb)); goto out; } ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); -- cgit From 42999c90461293233de9bb6e6c7d8a2db7281c1e Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Mon, 9 Oct 2023 03:38:40 -0700 Subject: hv/hv_kvp_daemon:Support for keyfile based connection profile Ifcfg config file support in NetworkManger is deprecated. This patch provides support for the new keyfile config format for connection profiles in NetworkManager. The patch modifies the hv_kvp_daemon code to generate the new network configuration in keyfile format(.ini-style format) along with a ifcfg format configuration. The ifcfg format configuration is also retained to support easy backward compatibility for distro vendors. These configurations are stored in temp files which are further translated using the hv_set_ifconfig.sh script. This script is implemented by individual distros based on the network management commands supported. For example, RHEL's implementation could be found here: https://gitlab.com/redhat/centos-stream/src/hyperv-daemons/-/blob/c9s/hv_set_ifconfig.sh Debian's implementation could be found here: https://github.com/endlessm/linux/blob/master/debian/cloud-tools/hv_set_ifconfig The next part of this support is to let the Distro vendors consume these modified implementations to the new configuration format. Tested-on: Rhel9(Hyper-V, Azure)(nm and ifcfg files verified) Signed-off-by: Shradha Gupta Reviewed-by: Saurabh Sengar Reviewed-by: Ani Sinha Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/1696847920-31125-1-git-send-email-shradhagupta@linux.microsoft.com --- tools/hv/hv_kvp_daemon.c | 233 ++++++++++++++++++++++++++++++++++++++------ tools/hv/hv_set_ifconfig.sh | 39 ++++++-- 2 files changed, 235 insertions(+), 37 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 27f5e7dfc2f7..264eeb9c46a9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1171,12 +1171,79 @@ static int process_ip_string(FILE *f, char *ip_string, int type) return 0; } +/* + * Only IPv4 subnet strings needs to be converted to plen + * For IPv6 the subnet is already privided in plen format + */ +static int kvp_subnet_to_plen(char *subnet_addr_str) +{ + int plen = 0; + struct in_addr subnet_addr4; + + /* + * Convert subnet address to binary representation + */ + if (inet_pton(AF_INET, subnet_addr_str, &subnet_addr4) == 1) { + uint32_t subnet_mask = ntohl(subnet_addr4.s_addr); + + while (subnet_mask & 0x80000000) { + plen++; + subnet_mask <<= 1; + } + } else { + return -1; + } + + return plen; +} + +static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, + int is_ipv6) +{ + char addr[INET6_ADDRSTRLEN]; + char subnet_addr[INET6_ADDRSTRLEN]; + int error, i = 0; + int ip_offset = 0, subnet_offset = 0; + int plen; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + + while (parse_ip_val_buffer(ip_string, &ip_offset, addr, + (MAX_IP_ADDR_SIZE * 2)) && + parse_ip_val_buffer(subnet, + &subnet_offset, + subnet_addr, + (MAX_IP_ADDR_SIZE * + 2))) { + if (!is_ipv6) + plen = kvp_subnet_to_plen((char *)subnet_addr); + else + plen = atoi(subnet_addr); + + if (plen < 0) + return plen; + + error = fprintf(f, "address%d=%s/%d\n", ++i, (char *)addr, + plen); + if (error < 0) + return error; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + } + + return 0; +} + static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { int error = 0; - char if_file[PATH_MAX]; - FILE *file; + char if_filename[PATH_MAX]; + char nm_filename[PATH_MAX]; + FILE *ifcfg_file, *nmfile; char cmd[PATH_MAX]; + int is_ipv6 = 0; char *mac_addr; int str_len; @@ -1197,7 +1264,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * in a given distro to configure the interface and so are free * ignore information that may not be relevant. * - * Here is the format of the ip configuration file: + * Here is the ifcfg format of the ip configuration file: * * HWADDR=macaddr * DEVICE=interface name @@ -1220,6 +1287,32 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as * IPV6NETMASK. * + * Here is the keyfile format of the ip configuration file: + * + * [ethernet] + * mac-address=macaddr + * [connection] + * interface-name=interface name + * + * [ipv4] + * method= (where is "auto" if DHCP is configured + * or "manual" if no boot-time protocol should be used) + * + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * + * [ipv6] + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * * The host can specify multiple ipv4 and ipv6 addresses to be * configured for the interface. Furthermore, the configuration * needs to be persistent. A subsequent GET call on the interface @@ -1227,14 +1320,29 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * call. */ - snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC, - "/ifcfg-", if_name); + /* + * We are populating both ifcfg and nmconnection files + */ + snprintf(if_filename, sizeof(if_filename), "%s%s%s", KVP_CONFIG_LOC, + "/ifcfg-", if_name); - file = fopen(if_file, "w"); + ifcfg_file = fopen(if_filename, "w"); - if (file == NULL) { + if (!ifcfg_file) { syslog(LOG_ERR, "Failed to open config file; error: %d %s", - errno, strerror(errno)); + errno, strerror(errno)); + return HV_E_FAIL; + } + + snprintf(nm_filename, sizeof(nm_filename), "%s%s%s%s", KVP_CONFIG_LOC, + "/", if_name, ".nmconnection"); + + nmfile = fopen(nm_filename, "w"); + + if (!nmfile) { + syslog(LOG_ERR, "Failed to open config file; error: %d %s", + errno, strerror(errno)); + fclose(ifcfg_file); return HV_E_FAIL; } @@ -1248,14 +1356,31 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) goto setval_error; } - error = kvp_write_file(file, "HWADDR", "", mac_addr); - free(mac_addr); + error = kvp_write_file(ifcfg_file, "HWADDR", "", mac_addr); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(ifcfg_file, "DEVICE", "", if_name); + if (error < 0) + goto setmac_error; + + error = fprintf(nmfile, "\n[connection]\n"); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(nmfile, "interface-name", "", if_name); if (error) - goto setval_error; + goto setmac_error; - error = kvp_write_file(file, "DEVICE", "", if_name); + error = fprintf(nmfile, "\n[ethernet]\n"); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(nmfile, "mac-address", "", mac_addr); if (error) - goto setval_error; + goto setmac_error; + + free(mac_addr); /* * The dhcp_enabled flag is only for IPv4. In the case the host only @@ -1263,47 +1388,91 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * proceed to parse and pass the IPv6 information to the * disto-specific script hv_set_ifconfig. */ + + /* + * First populate the ifcfg file format + */ if (new_val->dhcp_enabled) { - error = kvp_write_file(file, "BOOTPROTO", "", "dhcp"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "dhcp"); if (error) goto setval_error; - } else { - error = kvp_write_file(file, "BOOTPROTO", "", "none"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "none"); if (error) goto setval_error; } - /* - * Write the configuration for ipaddress, netmask, gateway and - * name servers. - */ - - error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR); + error = process_ip_string(ifcfg_file, (char *)new_val->ip_addr, + IPADDR); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->sub_net, NETMASK); + error = process_ip_string(ifcfg_file, (char *)new_val->sub_net, + NETMASK); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY); + error = process_ip_string(ifcfg_file, (char *)new_val->gate_way, + GATEWAY); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->dns_addr, DNS); + error = process_ip_string(ifcfg_file, (char *)new_val->dns_addr, DNS); if (error) goto setval_error; - fclose(file); + if (new_val->addr_family == ADDR_FAMILY_IPV6) { + error = fprintf(nmfile, "\n[ipv6]\n"); + if (error < 0) + goto setval_error; + is_ipv6 = 1; + } else { + error = fprintf(nmfile, "\n[ipv4]\n"); + if (error < 0) + goto setval_error; + } + + /* + * Now we populate the keyfile format + */ + + if (new_val->dhcp_enabled) { + error = kvp_write_file(nmfile, "method", "", "auto"); + if (error < 0) + goto setval_error; + } else { + error = kvp_write_file(nmfile, "method", "", "manual"); + if (error < 0) + goto setval_error; + } + + /* + * Write the configuration for ipaddress, netmask, gateway and + * name services + */ + error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr, + (char *)new_val->sub_net, is_ipv6); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + + fclose(nmfile); + fclose(ifcfg_file); /* * Now that we have populated the configuration file, * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", - "hv_set_ifconfig", if_file); + str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", + "hv_set_ifconfig", if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. @@ -1316,14 +1485,16 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (system(cmd)) { syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s", - cmd, errno, strerror(errno)); + cmd, errno, strerror(errno)); return HV_E_FAIL; } return 0; - +setmac_error: + free(mac_addr); setval_error: syslog(LOG_ERR, "Failed to write config file"); - fclose(file); + fclose(ifcfg_file); + fclose(nmfile); return error; } diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index d10fe35b7f25..ae5a7a8249a2 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -18,12 +18,12 @@ # # This example script is based on a RHEL environment. # -# Here is the format of the ip configuration file: +# Here is the ifcfg format of the ip configuration file: # # HWADDR=macaddr # DEVICE=interface name # BOOTPROTO= (where is "dhcp" if DHCP is configured -# or "none" if no boot-time protocol should be used) +# or "none" if no boot-time protocol should be used) # # IPADDR0=ipaddr1 # IPADDR1=ipaddr2 @@ -41,6 +41,32 @@ # tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as # IPV6NETMASK. # +# Here is the keyfile format of the ip configuration file: +# +# [ethernet] +# mac-address=macaddr +# [connection] +# interface-name=interface name +# +# [ipv4] +# method= (where is "auto" if DHCP is configured +# or "manual" if no boot-time protocol should be used) +# +# address1=ipaddr1/plen +# address=ipaddr2/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1; +# +# [ipv6] +# address1=ipaddr1/plen +# address2=ipaddr1/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1;dns2 +# # The host can specify multiple ipv4 and ipv6 addresses to be # configured for the interface. Furthermore, the configuration # needs to be persistent. A subsequent GET call on the interface @@ -48,18 +74,19 @@ # call. # - - echo "IPV6INIT=yes" >> $1 echo "NM_CONTROLLED=no" >> $1 echo "PEERDNS=yes" >> $1 echo "ONBOOT=yes" >> $1 - cp $1 /etc/sysconfig/network-scripts/ +chmod 600 $2 +interface=$(echo $2 | awk -F - '{ print $2 }') +filename="${2##*/}" + +sed '/\[connection\]/a autoconnect=true' $2 > /etc/NetworkManager/system-connections/${filename} -interface=$(echo $1 | awk -F - '{ print $2 }') /sbin/ifdown $interface 2>/dev/null /sbin/ifup $interface 2>/dev/null -- cgit From b555aa66760f17df4a0a5e4b440816e390311a38 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 5 Oct 2023 22:55:56 +0200 Subject: ata: pata_parport: fix pata_parport_devchk There's a 'x' missing in 0x55 in pata_parport_devchk(), causing the detection to always fail. Fix it. Fixes: 246a1c4c6b7f ("ata: pata_parport: add driver (PARIDE replacement)") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Zary Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/pata_parport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index 1af64d435d3c..258d189f42e5 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -64,7 +64,7 @@ static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 0xaa); pi->proto->write_regr(pi, 0, ATA_REG_LBAL, 0x55); - pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 055); + pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 0x55); pi->proto->write_regr(pi, 0, ATA_REG_LBAL, 0xaa); nsect = pi->proto->read_regr(pi, 0, ATA_REG_NSECT); -- cgit From d2302427c12277929c9f390adeda19fbf403c0bb Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 5 Oct 2023 22:55:57 +0200 Subject: ata: pata_parport: implement set_devctl Add missing ops->sff_set_devctl implementation. Fixes: 246a1c4c6b7f ("ata: pata_parport: add driver (PARIDE replacement)") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Zary Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/pata_parport.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index 258d189f42e5..cf87bbb52f1f 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -51,6 +51,13 @@ static void pata_parport_dev_select(struct ata_port *ap, unsigned int device) ata_sff_pause(ap); } +static void pata_parport_set_devctl(struct ata_port *ap, u8 ctl) +{ + struct pi_adapter *pi = ap->host->private_data; + + pi->proto->write_regr(pi, 1, 6, ctl); +} + static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) { struct pi_adapter *pi = ap->host->private_data; @@ -252,6 +259,7 @@ static struct ata_port_operations pata_parport_port_ops = { .hardreset = NULL, .sff_dev_select = pata_parport_dev_select, + .sff_set_devctl = pata_parport_set_devctl, .sff_check_status = pata_parport_check_status, .sff_check_altstatus = pata_parport_check_altstatus, .sff_tf_load = pata_parport_tf_load, -- cgit From f343e578fef99a69b3322aca38b94a6d8ded2ce7 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 5 Oct 2023 22:55:58 +0200 Subject: ata: pata_parport: add custom version of wait_after_reset Some parallel adapters (e.g. EXP Computer MC-1285B EPP Cable) return bogus values when there's no master device present. This can cause reset to fail, preventing the lone slave device (such as EXP Computer CD-865) from working. Add custom version of wait_after_reset that ignores master failure when a slave device is present. The custom version is also needed because the generic ata_sff_wait_after_reset uses direct port I/O for slave device detection. Signed-off-by: Ondrej Zary Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/pata_parport.c | 68 ++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index cf87bbb52f1f..a7adfdcb5e27 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -80,6 +80,72 @@ static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) return (nsect == 0x55) && (lbal == 0xaa); } +static int pata_parport_wait_after_reset(struct ata_link *link, + unsigned int devmask, + unsigned long deadline) +{ + struct ata_port *ap = link->ap; + struct pi_adapter *pi = ap->host->private_data; + unsigned int dev0 = devmask & (1 << 0); + unsigned int dev1 = devmask & (1 << 1); + int rc, ret = 0; + + ata_msleep(ap, ATA_WAIT_AFTER_RESET); + + /* always check readiness of the master device */ + rc = ata_sff_wait_ready(link, deadline); + if (rc) { + /* + * some adapters return bogus values if master device is not + * present, so don't abort now if a slave device is present + */ + if (!dev1) + return rc; + ret = -ENODEV; + } + + /* + * if device 1 was found in ata_devchk, wait for register + * access briefly, then wait for BSY to clear. + */ + if (dev1) { + int i; + + pata_parport_dev_select(ap, 1); + + /* + * Wait for register access. Some ATAPI devices fail + * to set nsect/lbal after reset, so don't waste too + * much time on it. We're gonna wait for !BSY anyway. + */ + for (i = 0; i < 2; i++) { + u8 nsect, lbal; + + nsect = pi->proto->read_regr(pi, 0, ATA_REG_NSECT); + lbal = pi->proto->read_regr(pi, 0, ATA_REG_LBAL); + if (nsect == 1 && lbal == 1) + break; + /* give drive a breather */ + ata_msleep(ap, 50); + } + + rc = ata_sff_wait_ready(link, deadline); + if (rc) { + if (rc != -ENODEV) + return rc; + ret = rc; + } + } + + pata_parport_dev_select(ap, 0); + if (dev1) + pata_parport_dev_select(ap, 1); + if (dev0) + pata_parport_dev_select(ap, 0); + + return ret; +} + static int pata_parport_bus_softreset(struct ata_port *ap, unsigned int devmask, unsigned long deadline) { @@ -94,7 +160,7 @@ static int pata_parport_bus_softreset(struct ata_port *ap, unsigned int devmask, ap->last_ctl = ap->ctl; /* wait the port to become ready */ - return ata_sff_wait_after_reset(&ap->link, devmask, deadline); + return pata_parport_wait_after_reset(&ap->link, devmask, deadline); } static int pata_parport_softreset(struct ata_link *link, unsigned int *classes, -- cgit From 0c1e81d0b5ebd5813536dd5fcf5966ad043f37dc Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 5 Oct 2023 22:55:59 +0200 Subject: ata: pata_parport: fit3: implement IDE command set registers fit3 protocol driver does not support accessing IDE control registers (device control/altstatus). The DOS driver does not use these registers either (as observed from DOSEMU trace). But the HW seems to be capable of accessing these registers - I simply tried bit 3 and it works! The control register is required to properly reset ATAPI devices or they will be detected only once (after a power cycle). Tested with EXP Computer CD-865 with MC-1285B EPP cable and TransDisk 3000. Signed-off-by: Ondrej Zary Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/fit3.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/ata/pata_parport/fit3.c b/drivers/ata/pata_parport/fit3.c index bad7aa920cdc..d2b81cf2e16d 100644 --- a/drivers/ata/pata_parport/fit3.c +++ b/drivers/ata/pata_parport/fit3.c @@ -9,11 +9,6 @@ * * The TD-2000 and certain older devices use a different protocol. * Try the fit2 protocol module with them. - * - * NB: The FIT adapters do not appear to support the control - * registers. So, we map ALT_STATUS to STATUS and NO-OP writes - * to the device control register - this means that IDE reset - * will not work on these devices. */ #include @@ -37,8 +32,7 @@ static void fit3_write_regr(struct pi_adapter *pi, int cont, int regr, int val) { - if (cont == 1) - return; + regr += cont << 3; switch (pi->mode) { case 0: @@ -59,11 +53,7 @@ static int fit3_read_regr(struct pi_adapter *pi, int cont, int regr) { int a, b; - if (cont) { - if (regr != 6) - return 0xff; - regr = 7; - } + regr += cont << 3; switch (pi->mode) { case 0: -- cgit From 626b13f015e080e434b1dee9a0c116ddbf4fb695 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 4 Oct 2023 17:50:49 +0900 Subject: scsi: Do not rescan devices with a suspended queue Commit ff48b37802e5 ("scsi: Do not attempt to rescan suspended devices") modified scsi_rescan_device() to avoid attempting rescanning a suspended device. However, the modification added a check to verify that a SCSI device is in the running state without checking if the device request queue (in the case of block device) is also running, thus allowing the exectuion of internal requests. Without checking the device request queue, commit ff48b37802e5 fix is incomplete and deadlocks on resume can still happen. Use blk_queue_pm_only() to check if the device request queue allows executing commands in addition to checking the SCSI device state. Reported-by: Petr Tesarik Fixes: ff48b37802e5 ("scsi: Do not attempt to rescan suspended devices") Cc: stable@vger.kernel.org Tested-by: Petr Tesarik Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/scsi/scsi_scan.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 902655d75947..44680f65ea14 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1627,12 +1627,13 @@ int scsi_rescan_device(struct scsi_device *sdev) device_lock(dev); /* - * Bail out if the device is not running. Otherwise, the rescan may - * block waiting for commands to be executed, with us holding the - * device lock. This can result in a potential deadlock in the power - * management core code when system resume is on-going. + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. */ - if (sdev->sdev_state != SDEV_RUNNING) { + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { ret = -EWOULDBLOCK; goto unlock; } -- cgit From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sat, 7 Oct 2023 08:59:53 +0800 Subject: net: ipv6: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fddd0cbdede1..e023d29e919c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -770,7 +770,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; -- cgit From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 9 Oct 2023 09:13:37 +0800 Subject: net: ipv4: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2be2d4922557..d18f0f092fe7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -732,7 +732,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; -- cgit From 0412cc846a1ef38697c3f321f9b174da91ecd3b5 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Thu, 5 Oct 2023 21:06:33 +0300 Subject: net: macsec: indicate next pn update when offloading Indicate next PN update using update_pn flag in macsec_context. Offloaded MACsec implementations does not know whether or not the MACSEC_SA_ATTR_PN attribute was passed for an SA update and assume that next PN should always updated, but this is not always true. The PN can be reset to its initial value using the following command: $ ip macsec set macsec0 tx sa 0 off #octeontx2-pf case Or, the update PN command will succeed even if the driver does not support PN updates. $ ip macsec set macsec0 tx sa 0 pn 1 on #mscc phy driver case Comparing the initial PN with the new PN value is not a solution. When the user updates the PN using its initial value the command will succeed, even if the driver does not support it. Like this: $ ip macsec add macsec0 tx sa 0 pn 1 on key 00 \ ead3664f508eb06c40ac7104cdae4ce5 $ ip macsec set macsec0 tx sa 0 pn 1 on #mlx5 case Signed-off-by: Radu Pirea (NXP OSS) Reviewed-by: Sabrina Dubroca Signed-off-by: Paolo Abeni --- drivers/net/macsec.c | 2 ++ include/net/macsec.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index b7e151439c48..c5cd4551c67c 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2383,6 +2383,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; + ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_txsa, &ctx); @@ -2476,6 +2477,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; + ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsa, &ctx); diff --git a/include/net/macsec.h b/include/net/macsec.h index 75a6f4863c83..ebf9bc54036a 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -258,6 +258,7 @@ struct macsec_context { struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct { + bool update_pn; unsigned char assoc_num; u8 key[MACSEC_MAX_KEY_LEN]; union { -- cgit From 4dcf38ae3ca16b8872f151d46ba5ac28dd580b60 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Thu, 5 Oct 2023 21:06:34 +0300 Subject: octeontx2-pf: mcs: update PN only when update_pn is true When updating SA, update the PN only when the update_pn flag is true. Otherwise, the PN will be reset to its previous value using the following command and this should not happen: $ ip macsec set macsec0 tx sa 0 on Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Radu Pirea (NXP OSS) Reviewed-by: Sabrina Dubroca Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 59b138214af2..6cc7a78968fc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -1357,10 +1357,12 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) if (netif_running(secy->netdev)) { /* Keys cannot be changed after creation */ - err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn); - if (err) - return err; + if (ctx->sa.update_pn) { + err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, + sw_tx_sa->next_pn); + if (err) + return err; + } err = cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num, sw_tx_sa->active); @@ -1529,6 +1531,9 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) if (err) return err; + if (!ctx->sa.update_pn) + return 0; + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, rx_sa->next_pn); if (err) -- cgit From e0a8c918daa58700609ebd45e3fcd49965be8bbc Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Thu, 5 Oct 2023 21:06:35 +0300 Subject: net: phy: mscc: macsec: reject PN update requests Updating the PN is not supported. Return -EINVAL if update_pn is true. The following command succeeded, but it should fail because the driver does not update the PN: ip macsec set macsec0 tx sa 0 pn 232 on Fixes: 28c5107aa904 ("net: phy: mscc: macsec support") Signed-off-by: Radu Pirea (NXP OSS) Reviewed-by: Sabrina Dubroca Signed-off-by: Paolo Abeni --- drivers/net/phy/mscc/mscc_macsec.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index 018253a573b8..4f39ba63a9a9 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -849,6 +849,9 @@ static int vsc8584_macsec_upd_rxsa(struct macsec_context *ctx) struct macsec_flow *flow; int ret; + if (ctx->sa.update_pn) + return -EINVAL; + flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); if (IS_ERR(flow)) return PTR_ERR(flow); @@ -900,6 +903,9 @@ static int vsc8584_macsec_upd_txsa(struct macsec_context *ctx) struct macsec_flow *flow; int ret; + if (ctx->sa.update_pn) + return -EINVAL; + flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); if (IS_ERR(flow)) return PTR_ERR(flow); -- cgit From fde2f2d7f23d39f2fc699ba6d91ac3f4a2e637ca Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Thu, 5 Oct 2023 21:06:36 +0300 Subject: net/mlx5e: macsec: use update_pn flag instead of PN comparation When updating the SA, use the new update_pn flags instead of comparing the new PN with the initial one. Comparing the initial PN value with the new value will allow the user to update the SA using the initial PN value as a parameter like this: $ ip macsec add macsec0 tx sa 0 pn 1 on key 00 \ ead3664f508eb06c40ac7104cdae4ce5 $ ip macsec set macsec0 tx sa 0 pn 1 off Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Fixes: aae3454e4d4c ("net/mlx5e: Add MACsec offload Rx command support") Signed-off-by: Radu Pirea (NXP OSS) Reviewed-by: Sabrina Dubroca Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index c9c1db971652..d4ebd8743114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -580,7 +580,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - if (tx_sa->next_pn != ctx_tx_sa->next_pn_halves.lower) { + if (ctx->sa.update_pn) { netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n", assoc_num); err = -EINVAL; @@ -973,7 +973,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - if (rx_sa->next_pn != ctx_rx_sa->next_pn_halves.lower) { + if (ctx->sa.update_pn) { netdev_err(ctx->netdev, "MACsec offload update RX sa %d PN isn't supported\n", assoc_num); -- cgit From 258dd5e6e65995ee85a941eed9a06708a36b1bfe Mon Sep 17 00:00:00 2001 From: Ruihai Zhou Date: Sat, 7 Oct 2023 14:49:49 +0800 Subject: drm/panel: boe-tv101wum-nl6: Completely pull GPW to VGL before TP term The sta_himax83102 panel sometimes shows abnormally flickering horizontal lines. The front gate output will precharge the X point of the next pole circuit before TP(TouchPanel Enable) term starts, and wait until the end of the TP term to resume the CLK. For this reason, the X point must be maintained during the TP term. In abnormal case, we measured a slight leakage at point X. This because during the TP term, the GPW does not fully pull the VGL low, causing the TFT to not be closed tightly. To fix this, we completely pull GPW to VGL before entering the TP term. This will ensure that the TFT is closed tightly and prevent the abnormal display. Fixes: 1bc2ef065f13 ("drm/panel: Support for Starry-himax83102-j02 TDDI MIPI-DSI panel") Signed-off-by: Ruihai Zhou Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231007064949.22668-1-zhouruihai@huaqin.corp-partner.google.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231007064949.22668-1-zhouruihai@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 5ac926281d2c..c9087f474cbc 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1342,9 +1342,7 @@ static const struct panel_init_cmd starry_himax83102_j02_init_cmd[] = { _INIT_DCS_CMD(0xB1, 0x01, 0xBF, 0x11), _INIT_DCS_CMD(0xCB, 0x86), _INIT_DCS_CMD(0xD2, 0x3C, 0xFA), - _INIT_DCS_CMD(0xE9, 0xC5), - _INIT_DCS_CMD(0xD3, 0x00, 0x00, 0x00, 0x00, 0x80, 0x0C, 0x01), - _INIT_DCS_CMD(0xE9, 0x3F), + _INIT_DCS_CMD(0xD3, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x0C, 0x01), _INIT_DCS_CMD(0xE7, 0x02, 0x00, 0x28, 0x01, 0x7E, 0x0F, 0x7E, 0x10, 0xA0, 0x00, 0x00, 0x20, 0x40, 0x50, 0x40), _INIT_DCS_CMD(0xBD, 0x02), _INIT_DCS_CMD(0xD8, 0xFF, 0xFF, 0xBF, 0xFE, 0xAA, 0xA0, 0xFF, 0xFF, 0xBF, 0xFE, 0xAA, 0xA0), -- cgit From 7b5add9af567c44e12196107f0fe106e194034fd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Oct 2023 15:53:09 +0300 Subject: ixgbe: fix crash with empty VF macvlan list The adapter->vf_mvs.l list needs to be initialized even if the list is empty. Otherwise it will lead to crashes. Fixes: a1cbb15c1397 ("ixgbe: Add macvlan support for VF") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/ZSADNdIw8zFx1xw2@kadam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 29cc60988071..ea88ac04ab9a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -28,6 +28,9 @@ static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter, struct vf_macvlans *mv_list; int num_vf_macvlans, i; + /* Initialize list of VF macvlans */ + INIT_LIST_HEAD(&adapter->vf_mvs.l); + num_vf_macvlans = hw->mac.num_rar_entries - (IXGBE_MAX_PF_MACVLANS + 1 + num_vfs); if (!num_vf_macvlans) @@ -36,8 +39,6 @@ static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter, mv_list = kcalloc(num_vf_macvlans, sizeof(struct vf_macvlans), GFP_KERNEL); if (mv_list) { - /* Initialize list of VF macvlans */ - INIT_LIST_HEAD(&adapter->vf_mvs.l); for (i = 0; i < num_vf_macvlans; i++) { mv_list[i].vf = -1; mv_list[i].free = true; -- cgit From a72178cfe855c283224f393d94a1332b90d1483e Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Fri, 6 Oct 2023 14:58:47 +0200 Subject: net/smc: Fix dependency of SMC on ISM When the SMC protocol is built into the kernel proper while ISM is configured to be built as module, linking the kernel fails due to unresolved dependencies out of net/smc/smc_ism.o to ism_get_smcd_ops, ism_register_client, and ism_unregister_client as reported via the linux-next test automation (see link). This however is a bug introduced a while ago. Correct the dependency list in ISM's and SMC's Kconfig to reflect the dependencies that are actually inverted. With this you cannot build a kernel with CONFIG_SMC=y and CONFIG_ISM=m. Either ISM needs to be 'y', too - or a 'n'. That way, SMC can still be configured on non-s390 architectures that do not have (nor need) an ISM driver. Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/linux-next/d53b5b50-d894-4df8-8969-fd39e63440ae@infradead.org/ Co-developed-by: Wenjia Zhang Signed-off-by: Wenjia Zhang Signed-off-by: Gerd Bayer Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20231006125847.1517840-1-gbayer@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/s390/net/Kconfig | 2 +- net/smc/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index 74760c1a163b..4902d45e929c 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -102,7 +102,7 @@ config CCWGROUP config ISM tristate "Support for ISM vPCI Adapter" - depends on PCI && SMC + depends on PCI default n help Select this option if you want to use the Internal Shared Memory diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 1ab3c5a2c5ad..746be3996768 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -2,6 +2,7 @@ config SMC tristate "SMC socket protocol family" depends on INET && INFINIBAND + depends on m || ISM != m help SMC-R provides a "sockets over RDMA" solution making use of RDMA over Converged Ethernet (RoCE) technology to upgrade -- cgit From 8e8a12ecbc86700b5e1a3596ce2b3c43dafad336 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 17:55:51 +0200 Subject: powerpc/85xx: Fix math emulation exception Booting mpc85xx_defconfig kernel on QEMU leads to: Bad trap at PC: fe9bab0, SR: 2d000, vector=800 awk[82]: unhandled trap (5) at 0 nip fe9bab0 lr fe9e01c code 5 in libc-2.27.so[fe5a000+17a000] awk[82]: code: 3aa00000 3a800010 4bffe03c 9421fff0 7ca62b78 38a00000 93c10008 83c10008 awk[82]: code: 38210010 4bffdec8 9421ffc0 7c0802a6 d8010008 4815190d 93810030 Trace/breakpoint trap WARNING: no useful console This is because allthough CONFIG_MATH_EMULATION is selected, Exception 800 calls unknown_exception(). Call emulation_assist_interrupt() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/066caa6d9480365da9b8ed83692d7101e10ac5f8.1695657339.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_85xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 97e9ea0c7297..0f1641a31250 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -395,7 +395,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ -- cgit From 53ba32acb5ab137ba333c20e0c987bdd6273a366 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:24 +0100 Subject: ASoC: dt-bindings: cirrus,cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Acked-by: Krzysztof Kozlowski Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml index 7a6de938b11d..4118aa54bbd5 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml @@ -82,7 +82,7 @@ properties: description: Current at which the headset micbias sense clamp will engage, 0 to disable. - enum: [ 0, 14, 23, 41, 50, 60, 68, 86, 95 ] + enum: [ 0, 14, 24, 43, 52, 61, 71, 90, 99 ] default: 0 cirrus,bias-ramp-ms: -- cgit From 99d426c6dd2d6f9734617ec12def856ee35b9218 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:25 +0100 Subject: ASoC: cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 92e37bc1df9d..9f5f1a92561d 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -34,7 +34,7 @@ static const unsigned int cs42l43_accdet_db_ms[] = { static const unsigned int cs42l43_accdet_ramp_ms[] = { 10, 40, 90, 170 }; static const unsigned int cs42l43_accdet_bias_sense[] = { - 14, 23, 41, 50, 60, 68, 86, 95, 0, + 14, 24, 43, 52, 61, 71, 90, 99, 0, }; static int cs42l43_find_index(struct cs42l43_codec *priv, const char * const prop, -- cgit From 2b7947bd32e243c52870d54141d3b4ea6775e63d Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 5 Oct 2023 13:16:32 +0000 Subject: drm/atomic-helper: relax unregistered connector check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver might pull connectors which weren't submitted by user-space into the atomic state. For instance, intel_dp_mst_atomic_master_trans_check() pulls in connectors sharing the same DP-MST stream. However, if the connector is unregistered, this later fails with: [ 559.425658] i915 0000:00:02.0: [drm:drm_atomic_helper_check_modeset] [CONNECTOR:378:DP-7] is not registered Skip the unregistered connector check to allow user-space to turn off connectors one-by-one. See this wlroots issue: https://gitlab.freedesktop.org/wlroots/wlroots/-/issues/3407 Previous discussion: https://lore.kernel.org/intel-gfx/Y6GX7z17WmDSKwta@ideak-desk.fi.intel.com/ Signed-off-by: Simon Ser Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Reviewed-by: Lyude Paul Cc: Jani Nikula Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231005131623.114379-1-contact@emersion.fr --- drivers/gpu/drm/drm_atomic_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 292e38eb6218..60794fcde1d5 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -290,7 +290,8 @@ static int update_connector_routing(struct drm_atomic_state *state, struct drm_connector *connector, struct drm_connector_state *old_connector_state, - struct drm_connector_state *new_connector_state) + struct drm_connector_state *new_connector_state, + bool added_by_user) { const struct drm_connector_helper_funcs *funcs; struct drm_encoder *new_encoder; @@ -339,9 +340,13 @@ update_connector_routing(struct drm_atomic_state *state, * there's a chance the connector may have been destroyed during the * process, but it's better to ignore that then cause * drm_atomic_helper_resume() to fail. + * + * Last, we want to ignore connector registration when the connector + * was not pulled in the atomic state by user-space (ie, was pulled + * in by the driver, e.g. when updating a DP-MST stream). */ if (!state->duplicated && drm_connector_is_unregistered(connector) && - crtc_state->active) { + added_by_user && crtc_state->active) { drm_dbg_atomic(connector->dev, "[CONNECTOR:%d:%s] is not registered\n", connector->base.id, connector->name); @@ -620,7 +625,10 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, struct drm_connector *connector; struct drm_connector_state *old_connector_state, *new_connector_state; int i, ret; - unsigned int connectors_mask = 0; + unsigned int connectors_mask = 0, user_connectors_mask = 0; + + for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) + user_connectors_mask |= BIT(i); for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { bool has_connectors = @@ -685,7 +693,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, */ ret = update_connector_routing(state, connector, old_connector_state, - new_connector_state); + new_connector_state, + BIT(i) & user_connectors_mask); if (ret) return ret; if (old_connector_state->crtc) { -- cgit From da6192ca72d5ad913d109d43dc896290ad05d98f Mon Sep 17 00:00:00 2001 From: Will Mortensen Date: Thu, 5 Oct 2023 22:37:06 -0700 Subject: net/mlx5e: Again mutually exclude RX-FCS and RX-port-timestamp Commit 1e66220948df8 ("net/mlx5e: Update rx ring hw mtu upon each rx-fcs flag change") seems to have accidentally inverted the logic added in commit 0bc73ad46a76 ("net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp"). The impact of this is a little unclear since it seems the FCS scattered with RX-FCS is (usually?) correct regardless. Fixes: 1e66220948df8 ("net/mlx5e: Update rx ring hw mtu upon each rx-fcs flag change") Tested-by: Charlotte Tan Reviewed-by: Charlotte Tan Cc: Adham Faris Cc: Aya Levin Cc: Tariq Toukan Cc: Moshe Shemesh Cc: Saeed Mahameed Signed-off-by: Will Mortensen Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20231006053706.514618-1-will@extrahop.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a2ae791538ed..acb40770cf0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3952,13 +3952,14 @@ static int set_feature_rx_fcs(struct net_device *netdev, bool enable) struct mlx5e_channels *chs = &priv->channels; struct mlx5e_params new_params; int err; + bool rx_ts_over_crc = !enable; mutex_lock(&priv->state_lock); new_params = chs->params; new_params.scatter_fcs_en = enable; err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, - &new_params.scatter_fcs_en, true); + &rx_ts_over_crc, true); mutex_unlock(&priv->state_lock); return err; } -- cgit From 54f67decddeb47680f08c720c94b4d4f67181442 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 10 Oct 2023 15:27:56 +0200 Subject: Revert "btrfs: reject unknown mount options early" This reverts commit 5f521494cc73520ffac18ede0758883b9aedd018. The patch breaks mounts with security mount options like $ mount -o context=system_u:object_r:root_t:s0 /dev/sdX /mn mount: /mnt: wrong fs type, bad option, bad superblock on /dev/sdX, missing codepage or helper program, ... We cannot reject all unknown options in btrfs_parse_subvol_options() as intended, the security options can be present at this point and it's not possible to enumerate them in a future proof way. This means unknown mount options are silently accepted like before when the filesystem is mounted with either -o subvol=/path or as followup mounts of the same device. Reported-by: Shinichiro Kawasaki --- fs/btrfs/super.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f49e597e197f..1a093ec0f7e3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -954,10 +954,6 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name, *subvol_objectid = subvolid; break; - case Opt_err: - btrfs_err(NULL, "unrecognized mount option '%s'", p); - error = -EINVAL; - goto out; default: break; } -- cgit From d920abd1e7c4884f9ecd0749d1921b7ab19ddfbd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 2 Oct 2023 13:54:28 +0300 Subject: nvmet-tcp: Fix a possible UAF in queue intialization setup From Alon: "Due to a logical bug in the NVMe-oF/TCP subsystem in the Linux kernel, a malicious user can cause a UAF and a double free, which may lead to RCE (may also lead to an LPE in case the attacker already has local privileges)." Hence, when a queue initialization fails after the ahash requests are allocated, it is guaranteed that the queue removal async work will be called, hence leave the deallocation to the queue removal. Also, be extra careful not to continue processing the socket, so set queue rcv_state to NVMET_TCP_RECV_ERR upon a socket error. Cc: stable@vger.kernel.org Reported-by: Alon Zahavi Tested-by: Alon Zahavi Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cd92d7ddf5ed..197fc2ecb164 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -372,6 +372,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else @@ -910,15 +911,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) - goto free_crypto; + return ret; /* queue removal will cleanup */ queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, -- cgit From 4ae55a7dce04989f289d5c5c8c8e5c37adc36c71 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 4 Sep 2023 17:26:38 +0200 Subject: nvme-auth: use chap->s2 to indicate bidirectional authentication Commit 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") replaced the condition "if (ctrl->ctrl_key)" (indicating bidirectional auth) by "if (chap->ctrl_key)", because ctrl->ctrl_key is a resource shared with sysfs. But chap->ctrl_key is set in nvme_auth_process_dhchap_challenge() depending on the DHVLEN in the DH-HMAC-CHAP Challenge message received from the controller, and will thus be non-NULL for every DH-HMAC-CHAP exchange, even if unidirectional auth was requested. This will lead to a protocol violation by sending a Success2 message in the unidirectional case (per NVMe base spec 2.0, the authentication transaction ends after the Success1 message for unidirectional auth). Use chap->s2 instead, which is non-zero if and only if the host requested bi-directional authentication from the controller. Fixes: 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") Signed-off-by: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index daf5d144a8ea..064592a5d546 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -341,7 +341,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_success1_data *data = chap->buf; size_t size = sizeof(*data); - if (chap->ctrl_key) + if (chap->s2) size += chap->hash_len; if (size > CHAP_BUF_SIZE) { @@ -825,7 +825,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->ctrl_key) { + if (chap->s2) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); -- cgit From 01bbafc63b65689cb179ca537971286bc27f3b74 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 6 Oct 2023 10:48:01 +0530 Subject: KEYS: trusted: Remove redundant static calls usage Static calls invocations aren't well supported from module __init and __exit functions. Especially the static call from cleanup_trusted() led to a crash on x86 kernel with CONFIG_DEBUG_VIRTUAL=y. However, the usage of static call invocations for trusted_key_init() and trusted_key_exit() don't add any value from either a performance or security perspective. Hence switch to use indirect function calls instead. Note here that although it will fix the current crash report, ultimately the static call infrastructure should be fixed to either support its future usage from module __init and __exit functions or not. Reported-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Link: https://lore.kernel.org/lkml/ZRhKq6e5nF%2F4ZIV1@fedora/#t Fixes: 5d0682be3189 ("KEYS: trusted: Add generic trusted keys framework") Signed-off-by: Sumit Garg Signed-off-by: Linus Torvalds --- security/keys/trusted-keys/trusted_core.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index c6fc50d67214..85fb5c22529a 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -44,13 +44,12 @@ static const struct trusted_key_source trusted_key_sources[] = { #endif }; -DEFINE_STATIC_CALL_NULL(trusted_key_init, *trusted_key_sources[0].ops->init); DEFINE_STATIC_CALL_NULL(trusted_key_seal, *trusted_key_sources[0].ops->seal); DEFINE_STATIC_CALL_NULL(trusted_key_unseal, *trusted_key_sources[0].ops->unseal); DEFINE_STATIC_CALL_NULL(trusted_key_get_random, *trusted_key_sources[0].ops->get_random); -DEFINE_STATIC_CALL_NULL(trusted_key_exit, *trusted_key_sources[0].ops->exit); +static void (*trusted_key_exit)(void); static unsigned char migratable; enum { @@ -359,19 +358,16 @@ static int __init init_trusted(void) if (!get_random) get_random = kernel_get_random; - static_call_update(trusted_key_init, - trusted_key_sources[i].ops->init); static_call_update(trusted_key_seal, trusted_key_sources[i].ops->seal); static_call_update(trusted_key_unseal, trusted_key_sources[i].ops->unseal); static_call_update(trusted_key_get_random, get_random); - static_call_update(trusted_key_exit, - trusted_key_sources[i].ops->exit); + trusted_key_exit = trusted_key_sources[i].ops->exit; migratable = trusted_key_sources[i].ops->migratable; - ret = static_call(trusted_key_init)(); + ret = trusted_key_sources[i].ops->init(); if (!ret) break; } @@ -388,7 +384,8 @@ static int __init init_trusted(void) static void __exit cleanup_trusted(void) { - static_call_cond(trusted_key_exit)(); + if (trusted_key_exit) + (*trusted_key_exit)(); } late_initcall(init_trusted); -- cgit From ce10fc0604bc6a0d626ed8e5d69088057edc71ab Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 10 Oct 2023 22:20:09 +0200 Subject: s390/bpf: Fix clobbering the caller's backchain in the trampoline One of the first things that s390x kernel functions do is storing the the caller's frame address (backchain) on stack. This makes unwinding possible. The backchain is always stored at frame offset 152, which is inside the 160-byte stack area, that the functions allocate for their callees. The callees must preserve the backchain; the remaining 152 bytes they may use as they please. Currently the trampoline uses all 160 bytes, clobbering the backchain. This causes kernel panics when using __builtin_return_address() in functions called by the trampoline. Fix by reducing the usage of the caller-reserved stack area by 8 bytes in the trampoline. Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()") Reported-by: Song Liu Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231010203512.385819-2-iii@linux.ibm.com --- arch/s390/net/bpf_jit_comp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2861e3360aff..9a9733e4bc80 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2260,8 +2260,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); - /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ - tjit->stack_size -= STACK_FRAME_OVERHEAD; + /* + * In accordance with the s390x ABI, the caller has allocated + * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's + * backchain, and the rest we can use. + */ + tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64); tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; /* aghi %r15,-stack_size */ -- cgit From 5356ba1ff4f2417e1aebcf99aab35c1ea94dd6d7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 10 Oct 2023 22:20:10 +0200 Subject: s390/bpf: Fix unwinding past the trampoline When functions called by the trampoline panic, the backtrace that is printed stops at the trampoline, because the trampoline does not store its caller's frame address (backchain) on stack; it also stores the return address at a wrong location. Store both the same way as is already done for the regular eBPF programs. Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231010203512.385819-3-iii@linux.ibm.com --- arch/s390/net/bpf_jit_comp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a9733e4bc80..e507692e51e7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2066,6 +2066,7 @@ struct bpf_tramp_jit { * func_addr's original caller */ int stack_size; /* Trampoline stack size */ + int backchain_off; /* Offset of backchain */ int stack_args_off; /* Offset of stack arguments for calling * func_addr, has to be at the top */ @@ -2086,9 +2087,10 @@ struct bpf_tramp_jit { * for __bpf_prog_enter() return value and * func_addr respectively */ - int r14_off; /* Offset of saved %r14 */ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */ int tccnt_off; /* Offset of saved tailcall counter */ + int r14_off; /* Offset of saved %r14, has to be at the + * bottom */ int do_fexit; /* do_fexit: label */ }; @@ -2247,8 +2249,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, * Calculate the stack layout. */ - /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */ + /* + * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x + * ABI requires, put our backchain at the end of the allocated memory. + */ tjit->stack_size = STACK_FRAME_OVERHEAD; + tjit->backchain_off = tjit->stack_size - sizeof(u64); tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64)); tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64)); tjit->ip_off = alloc_stack(tjit, sizeof(u64)); @@ -2256,10 +2262,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64)); tjit->retval_off = alloc_stack(tjit, sizeof(u64)); tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64)); - tjit->r14_off = alloc_stack(tjit, sizeof(u64)); tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); + tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2); /* * In accordance with the s390x ABI, the caller has allocated * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's @@ -2268,8 +2274,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64); tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; + /* lgr %r1,%r15 */ + EMIT4(0xb9040000, REG_1, REG_15); /* aghi %r15,-stack_size */ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ _EMIT6(0xd203f000 | tjit->tccnt_off, 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); -- cgit From f0eee815babed70a749d2496a7678be5b45b4c14 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Oct 2023 22:47:50 +1100 Subject: powerpc/47x: Fix 47x syscall return crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eddie reported that newer kernels were crashing during boot on his 476 FSP2 system: kernel tried to execute user page (b7ee2000) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0xb7ee2000 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K FSP-2 Modules linked in: CPU: 0 PID: 61 Comm: mount Not tainted 6.1.55-d23900f.ppcnf-fsp2 #1 Hardware name: ibm,fsp2 476fpe 0x7ff520c0 FSP-2 NIP:  b7ee2000 LR: 8c008000 CTR: 00000000 REGS: bffebd83 TRAP: 0400   Not tainted (6.1.55-d23900f.ppcnf-fs p2) MSR:  00000030   CR: 00001000  XER: 20000000 GPR00: c00110ac bffebe63 bffebe7e bffebe88 8c008000 00001000 00000d12 b7ee2000 GPR08: 00000033 00000000 00000000 c139df10 48224824 1016c314 10160000 00000000 GPR16: 10160000 10160000 00000008 00000000 10160000 00000000 10160000 1017f5b0 GPR24: 1017fa50 1017f4f0 1017fa50 1017f740 1017f630 00000000 00000000 1017f4f0 NIP [b7ee2000] 0xb7ee2000 LR [8c008000] 0x8c008000 Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX ---[ end trace 0000000000000000 ]--- The problem is in ret_from_syscall where the check for icache_44x_need_flush is done. When the flush is needed the code jumps out-of-line to do the flush, and then intends to jump back to continue the syscall return. However the branch back to label 1b doesn't return to the correct location, instead branching back just prior to the return to userspace, causing bogus register values to be used by the rfi. The breakage was introduced by commit 6f76a01173cc ("powerpc/syscall: implement system call entry/exit logic in C for PPC32") which inadvertently removed the "1" label and reused it elsewhere. Fix it by adding named local labels in the correct locations. Note that the return label needs to be outside the ifdef so that CONFIG_PPC_47x=n compiles. Fixes: 6f76a01173cc ("powerpc/syscall: implement system call entry/exit logic in C for PPC32") Cc: stable@vger.kernel.org # v5.12+ Reported-by: Eddie James Tested-by: Eddie James Link: https://lore.kernel.org/linuxppc-dev/fdaadc46-7476-9237-e104-1d2168526e72@linux.ibm.com/ Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://msgid.link/20231010114750.847794-1-mpe@ellerman.id.au --- arch/powerpc/kernel/entry_32.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9692acb0361f..7eda33a24bb4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -137,8 +137,9 @@ ret_from_syscall: lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -172,10 +173,11 @@ syscall_exit_finish: b 1b #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork -- cgit From 8527ca7735ef4cdad32c45853b0138f46ab2df58 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 14:41:21 -0700 Subject: net: skbuff: fix kernel-doc typos Correct punctuation and drop an extraneous word. Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231008214121.25940-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..97bfef071255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1309,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2016,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** -- cgit From 5093bbfc10ab6636b32728e35813cbd79feb063c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 9 Oct 2023 15:56:45 +0800 Subject: mctp: perform route lookups under a RCU read-side lock Our current route lookups (mctp_route_lookup and mctp_route_lookup_null) traverse the net's route list without the RCU read lock held. This means the route lookup is subject to preemption, resulting in an potential grace period expiry, and so an eventual kfree() while we still have the route pointer. Add the proper read-side critical section locks around the route lookups, preventing premption and a possible parallel kfree. The remaining net->mctp.routes accesses are already under a rcu_read_lock, or protected by the RTNL for updates. Based on an analysis from Sili Luo , where introducing a delay in the route lookup could cause a UAF on simultaneous sendmsg() and route deletion. Reported-by: Sili Luo Fixes: 889b7da23abf ("mctp: Add initial routing framework") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Kerr Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/29c4b0e67dc1bf3571df3982de87df90cae9b631.1696837310.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/route.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index ab62fe447038..7a47a58aa54b 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -737,6 +737,8 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, { struct mctp_route *tmp, *rt = NULL; + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { /* TODO: add metrics */ if (mctp_rt_match_eid(tmp, dnet, daddr)) { @@ -747,21 +749,29 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, } } + rcu_read_unlock(); + return rt; } static struct mctp_route *mctp_route_lookup_null(struct net *net, struct net_device *dev) { - struct mctp_route *rt; + struct mctp_route *tmp, *rt = NULL; - list_for_each_entry_rcu(rt, &net->mctp.routes, list) { - if (rt->dev->dev == dev && rt->type == RTN_LOCAL && - refcount_inc_not_zero(&rt->refs)) - return rt; + rcu_read_lock(); + + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { + if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL && + refcount_inc_not_zero(&tmp->refs)) { + rt = tmp; + break; + } } - return NULL; + rcu_read_unlock(); + + return rt; } static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, -- cgit From 31c07dffafce914c1d1543c135382a11ff058d93 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Oct 2023 12:31:10 +0000 Subject: net: nfc: fix races in nfc_llcp_sock_get() and nfc_llcp_sock_get_sn() Sili Luo reported a race in nfc_llcp_sock_get(), leading to UAF. Getting a reference on the socket found in a lookup while holding a lock should happen before releasing the lock. nfc_llcp_sock_get_sn() has a similar problem. Finally nfc_llcp_recv_snl() needs to make sure the socket found by nfc_llcp_sock_from_sn() does not disappear. Fixes: 8f50020ed9b8 ("NFC: LLCP late binding") Reported-by: Sili Luo Signed-off-by: Eric Dumazet Cc: Willy Tarreau Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231009123110.3735515-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/nfc/llcp_core.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 6705bb895e23..1dac28136e6a 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -203,17 +203,13 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { llcp_sock = tmp_sock; + sock_hold(&llcp_sock->sk); break; } } read_unlock(&local->sockets.lock); - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - return llcp_sock; } @@ -346,7 +342,8 @@ static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - const u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len, + bool needref) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -382,6 +379,8 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) { llcp_sock = tmp_sock; + if (needref) + sock_hold(&llcp_sock->sk); break; } } @@ -423,7 +422,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, * to this service name. */ if (nfc_llcp_sock_from_sn(local, sock->service_name, - sock->service_name_len) != NULL) { + sock->service_name_len, + false) != NULL) { mutex_unlock(&local->sdp_lock); return LLCP_SAP_MAX; @@ -824,16 +824,7 @@ out: static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, const u8 *sn, size_t sn_len) { - struct nfc_llcp_sock *llcp_sock; - - llcp_sock = nfc_llcp_sock_from_sn(local, sn, sn_len); - - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - - return llcp_sock; + return nfc_llcp_sock_from_sn(local, sn, sn_len, true); } static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) @@ -1298,7 +1289,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, } llcp_sock = nfc_llcp_sock_from_sn(local, service_name, - service_name_len); + service_name_len, + true); if (!llcp_sock) { sap = 0; goto add_snl; @@ -1318,6 +1310,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, if (sap == LLCP_SAP_MAX) { sap = 0; + nfc_llcp_sock_put(llcp_sock); goto add_snl; } @@ -1335,6 +1328,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, pr_debug("%p %d\n", llcp_sock, sap); + nfc_llcp_sock_put(llcp_sock); add_snl: sdp = nfc_llcp_build_sdres_tlv(tid, sap); if (sdp == NULL) -- cgit From 108a36d07c01edbc5942d27c92494d1c6e4d45a0 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 9 Oct 2023 15:36:45 +0200 Subject: ethtool: Fix mod state of verbose no_mask bitset A bitset without mask in a _SET request means we want exactly the bits in the bitset to be set. This works correctly for compact format but when verbose format is parsed, ethnl_update_bitset32_verbose() only sets the bits present in the request bitset but does not clear the rest. The commit 6699170376ab fixes this issue by clearing the whole target bitmap before we start iterating. The solution proposed brought an issue with the behavior of the mod variable. As the bitset is always cleared the old val will always differ to the new val. Fix it by adding a new temporary variable which save the state of the old bitmap. Fixes: 6699170376ab ("ethtool: fix application of verbose no_mask bitset") Signed-off-by: Kory Maincent Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231009133645.44503-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/bitset.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 0515d6604b3b..883ed9be81f9 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -431,8 +431,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { + u32 *orig_bitmap, *saved_bitmap = NULL; struct nlattr *bit_attr; bool no_mask; + bool dummy; int rem; int ret; @@ -448,8 +450,22 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, } no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; - if (no_mask) - ethnl_bitmap32_clear(bitmap, 0, nbits, mod); + if (no_mask) { + unsigned int nwords = DIV_ROUND_UP(nbits, 32); + unsigned int nbytes = nwords * sizeof(u32); + + /* The bitmap size is only the size of the map part without + * its mask part. + */ + saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL); + if (!saved_bitmap) + return -ENOMEM; + memcpy(saved_bitmap, bitmap, nbytes); + ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy); + orig_bitmap = saved_bitmap; + } else { + orig_bitmap = bitmap; + } nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; @@ -458,13 +474,14 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS"); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) - return ret; - old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32)); + goto out; + old_val = orig_bitmap[idx / 32] & ((u32)1 << (idx % 32)); if (new_val != old_val) { if (new_val) bitmap[idx / 32] |= ((u32)1 << (idx % 32)); @@ -474,7 +491,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, } } - return 0; + ret = 0; +out: + kfree(saved_bitmap); + return ret; } static int ethnl_compact_sanity_checks(unsigned int nbits, -- cgit From 8f8abb863fa5a4cc18955c6a0e17af0ded3e4a76 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Tue, 10 Oct 2023 00:26:14 +0200 Subject: net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read syzbot has found an uninit-value bug triggered by the dm9601 driver [1]. This error happens because the variable res is not updated if the call to dm_read_shared_word returns an error. In this particular case -EPROTO was returned and res stayed uninitialized. This can be avoided by checking the return value of dm_read_shared_word and propagating the error if the read operation failed. [1] https://syzkaller.appspot.com/bug?extid=1f53a30781af65d2c955 Cc: stable@vger.kernel.org Signed-off-by: Javier Carrasco Reported-and-tested-by: syzbot+1f53a30781af65d2c955@syzkaller.appspotmail.com Acked-by: Peter Korsgaard Fixes: d0374f4f9c35cdfbee0 ("USB: Davicom DM9601 usbnet driver") Link: https://lore.kernel.org/r/20231009-topic-dm9601_uninit_mdio_read-v2-1-f2fe39739b6c@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/dm9601.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 48d7d278631e..99ec1d4a972d 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -222,13 +222,18 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; + int err; if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return 0; } - dm_read_shared_word(dev, 1, loc, &res); + err = dm_read_shared_word(dev, 1, loc, &res); + if (err < 0) { + netdev_err(dev->net, "MDIO read error: %d\n", err); + return err; + } netdev_dbg(dev->net, "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", -- cgit From f454b18e07f518bcd0c05af17a2239138bff52de Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 7 Oct 2023 12:57:02 +0200 Subject: x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix erratum #1485 on Zen4 parts where running with STIBP disabled can cause an #UD exception. The performance impact of the fix is negligible. Reported-by: René Rebe Signed-off-by: Borislav Petkov (AMD) Tested-by: René Rebe Cc: Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com --- arch/x86/include/asm/msr-index.h | 9 +++++++-- arch/x86/kernel/cpu/amd.c | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d111350197f..b37abb55e948 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -637,12 +637,17 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e -/* Fam 17h MSRs */ -#define MSR_F17H_IRPERF 0xc00000e9 +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +/* Zen 2 */ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 03ef962a6992..ece2b5b7b0fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -80,6 +80,10 @@ static const int amd_div0[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); +static const int amd_erratum_1485[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), + AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1149,6 +1153,10 @@ static void init_amd(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); } + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + cpu_has_amd_erratum(c, amd_erratum_1485)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } #ifdef CONFIG_X86_32 -- cgit From 4d73c6772ab771cbbe7e46a73e7c78ba490350fa Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 4 Oct 2023 11:19:15 -0700 Subject: platform/x86: intel-uncore-freq: Conditionally create attribute for read frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the current uncore frequency can't be read, don't create attribute "current_freq_khz" as any read will fail later. Some user space applications like turbostat fail to continue with the failure. So, check error during attribute creation. Fixes: 414eef27283a ("platform/x86/intel/uncore-freq: Display uncore current frequency") Signed-off-by: Srinivas Pandruvada Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231004181915.1887913-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../platform/x86/intel/uncore-frequency/uncore-frequency-common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 1152deaa0078..33ab207493e3 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -176,7 +176,7 @@ show_uncore_data(initial_max_freq_khz); static int create_attr_group(struct uncore_data *data, char *name) { - int ret, index = 0; + int ret, freq, index = 0; init_attribute_rw(max_freq_khz); init_attribute_rw(min_freq_khz); @@ -197,7 +197,11 @@ static int create_attr_group(struct uncore_data *data, char *name) data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + + ret = uncore_read_freq(data, &freq); + if (!ret) + data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index] = NULL; data->uncore_attr_group.name = name; -- cgit From 6284e67aa6cb3af870ed11dfcfafd80fd927777b Mon Sep 17 00:00:00 2001 From: Nikita Kravets Date: Fri, 6 Oct 2023 20:53:53 +0300 Subject: platform/x86: msi-ec: Fix the 3rd config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the charge control address of CONF3 and remove an incorrect firmware version which turned out to be a BIOS firmware and not an EC firmware. Fixes: 392cacf2aa10 ("platform/x86: Add new msi-ec driver") Cc: Aakash Singh Cc: Jose Angel Pastrana Signed-off-by: Nikita Kravets Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231006175352.1753017-5-teackot@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/msi-ec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/msi-ec.c b/drivers/platform/x86/msi-ec.c index f26a3121092f..492eb383ee7a 100644 --- a/drivers/platform/x86/msi-ec.c +++ b/drivers/platform/x86/msi-ec.c @@ -276,14 +276,13 @@ static struct msi_ec_conf CONF2 __initdata = { static const char * const ALLOWED_FW_3[] __initconst = { "1592EMS1.111", - "E1592IMS.10C", NULL }; static struct msi_ec_conf CONF3 __initdata = { .allowed_fw = ALLOWED_FW_3, .charge_control = { - .address = 0xef, + .address = 0xd7, .offset_start = 0x8a, .offset_end = 0x80, .range_min = 0x8a, -- cgit From 51064b7acf665bfa2c929d7cafb7ad494b7d2783 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 8 Oct 2023 01:39:28 +0200 Subject: platform/x86: wmi: Update MAINTAINERS entry Since 2011, the WMI subsystem is marked as orphaned, which means that a important part of platform support for modern notebooks and even desktops is receiving not enough maintenance. So i decided to take over the maintenance of the WMI subsystem. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231007233933.72121-2-W_Armin@gmx.de Acked-by: Hans de Goede Signed-off-by: Hans de Goede --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dbf1668dcd84..b38fcbaa24f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -378,8 +378,9 @@ F: drivers/acpi/viot.c F: include/linux/acpi_viot.h ACPI WMI DRIVER +M: Armin Wolf L: platform-driver-x86@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/driver-api/wmi.rst F: Documentation/wmi/ F: drivers/platform/x86/wmi.c -- cgit From 14690995c14109852c7ba6e316045c02e4254272 Mon Sep 17 00:00:00 2001 From: Yanguo Li Date: Mon, 9 Oct 2023 13:21:55 +0200 Subject: nfp: flower: avoid rmmod nfp crash issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are CT table entries, and you rmmod nfp, the following events can happen: task1: nfp_net_pci_remove ↓ nfp_flower_stop->(asynchronous)tcf_ct_flow_table_cleanup_work(3) ↓ nfp_zone_table_entry_destroy(1) task2: nfp_fl_ct_handle_nft_flow(2) When the execution order is (1)->(2)->(3), it will crash. Therefore, in the function nfp_fl_ct_del_flow, nf_flow_table_offload_del_cb needs to be executed synchronously. At the same time, in order to solve the deadlock problem and the problem of rtnl_lock sometimes failing, replace rtnl_lock with the private nfp_fl_lock. Fixes: 7cc93d888df7 ("nfp: flower-ct: remove callback delete deadlock") Cc: stable@vger.kernel.org Signed-off-by: Yanguo Li Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 10 +++++---- .../net/ethernet/netronome/nfp/flower/conntrack.c | 19 +++++++++++------ drivers/net/ethernet/netronome/nfp/flower/main.h | 2 ++ .../net/ethernet/netronome/nfp/flower/metadata.c | 2 ++ .../net/ethernet/netronome/nfp/flower/offload.c | 24 ++++++++++++++++------ .../net/ethernet/netronome/nfp/flower/qos_conf.c | 20 +++++++++++------- 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index f21cf1f40f98..153533cd8f08 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -210,6 +210,7 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); struct nfp_flower_cmsg_merge_hint *msg; struct nfp_fl_payload *sub_flows[2]; + struct nfp_flower_priv *priv; int err, i, flow_cnt; msg = nfp_flower_cmsg_get_data(skb); @@ -228,14 +229,15 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) return; } - rtnl_lock(); + priv = app->priv; + mutex_lock(&priv->nfp_fl_lock); for (i = 0; i < flow_cnt; i++) { u32 ctx = be32_to_cpu(msg->flow[i].host_ctx); sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx); if (!sub_flows[i]) { nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n"); - goto err_rtnl_unlock; + goto err_mutex_unlock; } } @@ -244,8 +246,8 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) if (err == -ENOMEM) nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n"); -err_rtnl_unlock: - rtnl_unlock(); +err_mutex_unlock: + mutex_unlock(&priv->nfp_fl_lock); } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2643c4b3ff1f..2967bab72505 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -2131,8 +2131,6 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl struct nfp_fl_ct_flow_entry *ct_entry; struct netlink_ext_ack *extack = NULL; - ASSERT_RTNL(); - extack = flow->common.extack; switch (flow->command) { case FLOW_CLS_REPLACE: @@ -2178,9 +2176,13 @@ int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb switch (type) { case TC_SETUP_CLSFLOWER: - rtnl_lock(); + while (!mutex_trylock(&zt->priv->nfp_fl_lock)) { + if (!zt->nft) /* avoid deadlock */ + return err; + msleep(20); + } err = nfp_fl_ct_offload_nft_flow(zt, flow); - rtnl_unlock(); + mutex_unlock(&zt->priv->nfp_fl_lock); break; default: return -EOPNOTSUPP; @@ -2208,6 +2210,7 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) struct nfp_fl_ct_flow_entry *ct_entry; struct nfp_fl_ct_zone_entry *zt; struct rhashtable *m_table; + struct nf_flowtable *nft; if (!ct_map_ent) return -ENOENT; @@ -2226,8 +2229,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) if (ct_map_ent->cookie > 0) kfree(ct_map_ent); - if (!zt->pre_ct_count) { - zt->nft = NULL; + if (!zt->pre_ct_count && zt->nft) { + nft = zt->nft; + zt->nft = NULL; /* avoid deadlock */ + nf_flow_table_offload_del_cb(nft, + nfp_fl_ct_handle_nft_flow, + zt); nfp_fl_ct_clean_nft_entries(zt); } break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 40372545148e..2b7c947ff4f2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -297,6 +297,7 @@ struct nfp_fl_internal_ports { * @predt_list: List to keep track of decap pretun flows * @neigh_table: Table to keep track of neighbor entries * @predt_lock: Lock to serialise predt/neigh table updates + * @nfp_fl_lock: Lock to protect the flow offload operation */ struct nfp_flower_priv { struct nfp_app *app; @@ -339,6 +340,7 @@ struct nfp_flower_priv { struct list_head predt_list; struct rhashtable neigh_table; spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ + struct mutex nfp_fl_lock; /* Protect the flow operation */ }; /** diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 0f06ef6e24bf..80e4675582bf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -528,6 +528,8 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_stats_ctx_table; + mutex_init(&priv->nfp_fl_lock); + err = rhashtable_init(&priv->ct_zone_table, &nfp_zone_table_params); if (err) goto err_free_merge_table; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index c153f0575b92..0aceef9fe582 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1009,8 +1009,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, u64 parent_ctx = 0; int err; - ASSERT_RTNL(); - if (sub_flow1 == sub_flow2 || nfp_flower_is_merge_flow(sub_flow1) || nfp_flower_is_merge_flow(sub_flow2)) @@ -1727,19 +1725,30 @@ static int nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flower) { + struct nfp_flower_priv *priv = app->priv; + int ret; + if (!eth_proto_is_802_3(flower->common.protocol)) return -EOPNOTSUPP; + mutex_lock(&priv->nfp_fl_lock); switch (flower->command) { case FLOW_CLS_REPLACE: - return nfp_flower_add_offload(app, netdev, flower); + ret = nfp_flower_add_offload(app, netdev, flower); + break; case FLOW_CLS_DESTROY: - return nfp_flower_del_offload(app, netdev, flower); + ret = nfp_flower_del_offload(app, netdev, flower); + break; case FLOW_CLS_STATS: - return nfp_flower_get_stats(app, netdev, flower); + ret = nfp_flower_get_stats(app, netdev, flower); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + mutex_unlock(&priv->nfp_fl_lock); + + return ret; } static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, @@ -1778,6 +1787,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, repr_priv = repr->app_priv; repr_priv->block_shared = f->block_shared; f->driver_block_list = &nfp_block_cb_list; + f->unlocked_driver_cb = true; switch (f->command) { case FLOW_BLOCK_BIND: @@ -1876,6 +1886,8 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, str nfp_flower_internal_port_can_offload(app, netdev))) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; + switch (f->command) { case FLOW_BLOCK_BIND: cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 99052a925d9e..e7180b4793c7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -523,25 +523,31 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, { struct netlink_ext_ack *extack = flow->common.extack; struct nfp_flower_priv *fl_priv = app->priv; + int ret; if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support qos rate limit offload"); return -EOPNOTSUPP; } + mutex_lock(&fl_priv->nfp_fl_lock); switch (flow->command) { case TC_CLSMATCHALL_REPLACE: - return nfp_flower_install_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_install_rate_limiter(app, netdev, flow, extack); + break; case TC_CLSMATCHALL_DESTROY: - return nfp_flower_remove_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_remove_rate_limiter(app, netdev, flow, extack); + break; case TC_CLSMATCHALL_STATS: - return nfp_flower_stats_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_stats_rate_limiter(app, netdev, flow, extack); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + mutex_unlock(&fl_priv->nfp_fl_lock); + + return ret; } /* Offload tc action, currently only for tc police */ -- cgit From a950a5921db450c74212327f69950ff03419483a Mon Sep 17 00:00:00 2001 From: Nils Hoppmann Date: Mon, 9 Oct 2023 16:40:48 +0200 Subject: net/smc: Fix pos miscalculation in statistics SMC_STAT_PAYLOAD_SUB(_smc_stats, _tech, key, _len, _rc) will calculate wrong bucket positions for payloads of exactly 4096 bytes and (1 << (m + 12)) bytes, with m == SMC_BUF_MAX - 1. Intended bucket distribution: Assume l == size of payload, m == SMC_BUF_MAX - 1. Bucket 0 : 0 < l <= 2^13 Bucket n, 1 <= n <= m-1 : 2^(n+12) < l <= 2^(n+13) Bucket m : l > 2^(m+12) Current solution: _pos = fls64((l) >> 13) [...] _pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m For l == 4096, _pos == -1, but should be _pos == 0. For l == (1 << (m + 12)), _pos == m, but should be _pos == m - 1. In order to avoid special treatment of these corner cases, the calculation is adjusted. The new solution first subtracts the length by one, and then calculates the correct bucket by shifting accordingly, i.e. _pos = fls64((l - 1) >> 13), l > 0. This not only fixes the issues named above, but also makes the whole bucket assignment easier to follow. Same is done for SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len), where the calculation of the bucket position is similar to the one named above. Fixes: e0e4b8fa5338 ("net/smc: Add SMC statistics support") Suggested-by: Halil Pasic Signed-off-by: Nils Hoppmann Reviewed-by: Halil Pasic Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_stats.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index aa8928975cc6..9d32058db2b5 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -92,13 +92,14 @@ do { \ typeof(_smc_stats) stats = (_smc_stats); \ typeof(_tech) t = (_tech); \ typeof(_len) l = (_len); \ - int _pos = fls64((l) >> 13); \ + int _pos; \ typeof(_rc) r = (_rc); \ int m = SMC_BUF_MAX - 1; \ this_cpu_inc((*stats).smc[t].key ## _cnt); \ - if (r <= 0) \ + if (r <= 0 || l <= 0) \ break; \ - _pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \ + _pos = fls64((l - 1) >> 13); \ + _pos = (_pos <= m) ? _pos : m; \ this_cpu_inc((*stats).smc[t].key ## _pd.buf[_pos]); \ this_cpu_add((*stats).smc[t].key ## _bytes, r); \ } \ @@ -138,9 +139,12 @@ while (0) do { \ typeof(_len) _l = (_len); \ typeof(_tech) t = (_tech); \ - int _pos = fls((_l) >> 13); \ + int _pos; \ int m = SMC_BUF_MAX - 1; \ - _pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \ + if (_l <= 0) \ + break; \ + _pos = fls((_l - 1) >> 13); \ + _pos = (_pos <= m) ? _pos : m; \ this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \ } \ while (0) -- cgit From 75f5f60bf7ee075ed4a29637ce390898b4c36811 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Oct 2023 14:44:54 -0600 Subject: btrfs: add __counted_by for struct btrfs_delayed_item and use struct_size() Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). While there, use struct_size() helper, instead of the open-coded version, to calculate the size for the allocation of the whole flexible structure, including of course, the flexible-array member. This code was found with the help of Coccinelle, and audited and fixed manually. Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/delayed-inode.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index dfe29b29915f..7c2ee0795dc7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -313,7 +313,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, { struct btrfs_delayed_item *item; - item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); + item = kmalloc(struct_size(item, data, data_len), GFP_NOFS); if (item) { item->data_len = data_len; item->type = type; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index dc1085b2a397..1da213197f55 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -95,7 +95,7 @@ struct btrfs_delayed_item { bool logged; /* The maximum leaf size is 64K, so u16 is more than enough. */ u16 data_len; - char data[]; + char data[] __counted_by(data_len); }; static inline void btrfs_init_delayed_root( -- cgit From 89434b069e460967624903b049e5cf5c9e6b99b9 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Mon, 9 Oct 2023 21:00:58 +0000 Subject: usb: typec: altmodes/displayport: Signal hpd low when exiting mode Upon receiving an ACK for a sent EXIT_MODE message, the DisplayPort driver currently resets the status and configuration of the port partner. The hpd signal is not updated despite being part of the status, so the Display stack can still transmit video despite typec_altmode_exit placing the lanes in a Safe State. Set hpd to low when a sent EXIT_MODE message is ACK'ed. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231009210057.3773877-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 426c88a516e5..59e0218a8bc5 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -304,6 +304,11 @@ static int dp_altmode_vdm(struct typec_altmode *alt, typec_altmode_update_active(alt, false); dp->data.status = 0; dp->data.conf = 0; + if (dp->hpd) { + drm_connector_oob_hotplug_event(dp->connector_fwnode); + dp->hpd = false; + sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd"); + } break; case DP_CMD_STATUS_UPDATE: dp->data.status = *vdo; -- cgit From dddb91cde52b4a57fa06a332b230fca3b11b885f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 10 Oct 2023 17:17:49 +0300 Subject: usb: typec: ucsi: Fix missing link removal The link between the partner device and its USB Power Delivery instance was never removed which prevented the device from being released. Removing the link always when the partner is unregistered. Fixes: b04e1747fbcc ("usb: typec: ucsi: Register USB Power Delivery Capabilities") Cc: stable Reported-by: Douglas Gilbert Closes: https://lore.kernel.org/linux-usb/ZSUMXdw9nanHtnw2@kuha.fi.intel.com/ Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231010141749.3912016-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 509c67c94a70..61b64558f96c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -787,6 +787,7 @@ static void ucsi_unregister_partner(struct ucsi_connector *con) typec_set_mode(con->port, TYPEC_STATE_SAFE); + typec_partner_set_usb_power_delivery(con->partner, NULL); ucsi_unregister_partner_pdos(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_SOP); typec_unregister_partner(con->partner); -- cgit From c9ca8de2eb15f9da24113e652980c61f95a47530 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Oct 2023 13:46:43 -0500 Subject: usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs at a black screen on startup. This issue occurs only if `ucsi_acpi` has loaded before `amdgpu` has loaded. The reason for this failure is that `amdgpu` uses power_supply_is_system_supplied() to determine if running on AC or DC power at startup. If this value is reported incorrectly the dGPU will also be programmed incorrectly and trigger errors. power_supply_is_system_supplied() reports the wrong value because UCSI power supplies provided as part of the system don't properly report the scope as "DEVICE" scope (not powering the system). In order to fix this issue check the capabilities reported from the UCSI power supply to ensure that it supports charging a battery and that it can be powered by AC. Mark the scope accordingly. Cc: stable@vger.kernel.org Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope") Link: https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html p28 Reviewed-by: Sebastian Reichel Signed-off-by: Mario Limonciello Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231009184643.129986-1-mario.limonciello@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/psy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c index 384b42267f1f..b35c6e07911e 100644 --- a/drivers/usb/typec/ucsi/psy.c +++ b/drivers/usb/typec/ucsi/psy.c @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con, struct device *dev = con->ucsi->dev; device_property_read_u8(dev, "scope", &scope); + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) { + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY | + UCSI_CAP_ATTR_BATTERY_CHARGING; + + if (con->ucsi->cap.attributes & mask) + scope = POWER_SUPPLY_SCOPE_SYSTEM; + else + scope = POWER_SUPPLY_SCOPE_DEVICE; + } val->intval = scope; return 0; } -- cgit From c4dd854f740c21ae8dd9903fc67969c5497cb14b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 25 Sep 2023 17:28:39 +0100 Subject: cpu-hotplug: Provide prototypes for arch CPU registration Provide common prototypes for arch_register_cpu() and arch_unregister_cpu(). These are called by acpi_processor.c, with weak versions, so the prototype for this is already set. It is generally not necessary for function prototypes to be conditional on preprocessor macros. Some architectures (e.g. Loongarch) are missing the prototype for this, and rather than add it to Loongarch's asm/cpu.h, do the job once for everyone. Since this covers everyone, remove the now unnecessary prototypes in asm/cpu.h, and therefore remove the 'static' from one of ia64's arch_register_cpu() definitions. [ tglx: Bring back the ia64 part and remove the ACPI prototypes ] Signed-off-by: Russell King (Oracle) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1qkoRr-0088Q8-Da@rmk-PC.armlinux.org.uk --- arch/ia64/include/asm/cpu.h | 5 ----- arch/ia64/kernel/topology.c | 2 +- arch/x86/include/asm/cpu.h | 2 -- arch/x86/kernel/topology.c | 2 +- drivers/acpi/acpi_processor.c | 1 + include/acpi/processor.h | 5 ----- include/linux/cpu.h | 2 ++ 7 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index c711db8a9c33..0f5218e361df 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include +#include #include #include #include diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 94181fe9780a..3f34ebb27525 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -465,9 +465,4 @@ extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif -#ifdef CONFIG_ACPI_HOTPLUG_CPU -extern int arch_register_cpu(int cpu); -extern void arch_unregister_cpu(int cpu); -#endif - #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..eb768a866fe3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); -- cgit From 4800021c630210ea0b19434a1fb56ab16385f2b3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Oct 2023 12:24:58 +0200 Subject: media: subdev: Don't report V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled Since the stream API is still experimental it is currently locked away behind the internal, default disabled, v4l2_subdev_enable_streams_api flag. Advertising V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled confuses userspace. E.g. it causes the following libcamera error: ERROR SimplePipeline simple.cpp:1497 Failed to reset routes for /dev/v4l-subdev1: Inappropriate ioctl for device Don't report V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled to avoid problems like this. Reported-by: Dennis Bonke Fixes: 9a6b5bf4c1bb ("media: add V4L2_SUBDEV_CAP_STREAMS") Cc: stable@vger.kernel.org # for >= 6.3 Signed-off-by: Hans de Goede Acked-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index b92348ad61f6..31752c06d1f0 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -502,6 +502,13 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg, V4L2_SUBDEV_CLIENT_CAP_STREAMS; int rval; + /* + * If the streams API is not enabled, remove V4L2_SUBDEV_CAP_STREAMS. + * Remove this when the API is no longer experimental. + */ + if (!v4l2_subdev_enable_streams_api) + streams_subdev = false; + switch (cmd) { case VIDIOC_SUBDEV_QUERYCAP: { struct v4l2_subdev_capability *cap = arg; -- cgit From c15cdea517414e0b29a11e0a0e2443d127c9109b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 6 Oct 2023 17:39:34 +0100 Subject: mm: slab: Do not create kmalloc caches smaller than arch_slab_minalign() Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") allows architectures with non-coherent DMA to define a small ARCH_KMALLOC_MINALIGN (e.g. sizeof(unsigned long long)) and this has been enabled on arm64. With KASAN_HW_TAGS enabled, however, ARCH_SLAB_MINALIGN becomes 16 on arm64 (arch_slab_minalign() dynamically selects it since commit d949a8155d13 ("mm: make minimum slab alignment a runtime property")). This can lead to a situation where kmalloc-8 caches are attempted to be created with a kmem_caches.size aligned to 16. When the cache is mergeable, it can lead to kernel warnings like: sysfs: cannot create duplicate filename '/kernel/slab/:d-0000016' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc1-00001-gda98843cd306-dirty #5 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: dump_backtrace+0x90/0xe8 show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 sysfs_warn_dup+0x64/0x80 sysfs_create_dir_ns+0xe8/0x108 kobject_add_internal+0x98/0x264 kobject_init_and_add+0x8c/0xd8 sysfs_slab_add+0x12c/0x248 slab_sysfs_init+0x98/0x14c do_one_initcall+0x6c/0x1b0 kernel_init_freeable+0x1c0/0x288 kernel_init+0x24/0x1e0 ret_from_fork+0x10/0x20 kobject: kobject_add_internal failed for :d-0000016 with -EEXIST, don't try to register things with the same name in the same directory. SLUB: Unable to add boot slab dma-kmalloc-8 to sysfs Limit the __kmalloc_minalign() return value (used to create the kmalloc-* caches) to arch_slab_minalign() so that kmalloc-8 caches are skipped when KASAN_HW_TAGS is enabled (both config and runtime). Reported-by: Mark Rutland Fixes: b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 8fda308e400d..9bbffe82d65a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -895,10 +895,13 @@ void __init setup_kmalloc_cache_index_table(void) static unsigned int __kmalloc_minalign(void) { + unsigned int minalign = dma_get_cache_alignment(); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && is_swiotlb_allocated()) - return ARCH_KMALLOC_MINALIGN; - return dma_get_cache_alignment(); + minalign = ARCH_KMALLOC_MINALIGN; + + return max(minalign, arch_slab_minalign()); } void __init -- cgit From 30c1886ab53e89be5a0aa16df47b367957fc4bc2 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 29 Sep 2023 09:31:08 +0300 Subject: media: xilinx-vipp: Look for entities also in waiting_list The big V4L2 async framework overhaul simplified linked lists used by the V4L2 async framework. This affected a few drivers and it turns out a few of those drivers rely on searching for entities in both async notifier's waiting and done lists. Do that by separately traversing both. Fixes: 9bf19fbf0c8b ("media: v4l: async: Rework internal lists") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/platform/xilinx/xilinx-vipp.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/xilinx/xilinx-vipp.c b/drivers/media/platform/xilinx/xilinx-vipp.c index 4285770fde18..996684a73038 100644 --- a/drivers/media/platform/xilinx/xilinx-vipp.c +++ b/drivers/media/platform/xilinx/xilinx-vipp.c @@ -55,11 +55,18 @@ xvip_graph_find_entity(struct xvip_composite_device *xdev, { struct xvip_graph_entity *entity; struct v4l2_async_connection *asd; - - list_for_each_entry(asd, &xdev->notifier.done_list, asc_entry) { - entity = to_xvip_entity(asd); - if (entity->asd.match.fwnode == fwnode) - return entity; + struct list_head *lists[] = { + &xdev->notifier.done_list, + &xdev->notifier.waiting_list + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(lists); i++) { + list_for_each_entry(asd, lists[i], asc_entry) { + entity = to_xvip_entity(asd); + if (entity->asd.match.fwnode == fwnode) + return entity; + } } return NULL; -- cgit From 1d4c25854aac872e6d3d1dad5dc5eb580d18ac6c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Sep 2023 11:31:33 +0200 Subject: media: ipu-bridge: Add missing acpi_dev_put() in ipu_bridge_get_ivsc_acpi_dev() In ipu_bridge_get_ivsc_acpi_dev(), the "ivsc_adev" acpi_device pointer from the outer loop is handed over to the caller, which takes proper care of its reference count. However, the "consumer" acpi_device pointer from the inner loop is lost, without decrementing its reference count. Fix this by adding the missing call to acpi_dev_put(). Fixes: c66821f381ae ("media: pci: intel: Add IVSC support for IPU bridge driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ipu-bridge.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/intel/ipu-bridge.c b/drivers/media/pci/intel/ipu-bridge.c index 1bde8b6e0b11..e38198e259c0 100644 --- a/drivers/media/pci/intel/ipu-bridge.c +++ b/drivers/media/pci/intel/ipu-bridge.c @@ -107,8 +107,10 @@ static struct acpi_device *ipu_bridge_get_ivsc_acpi_dev(struct acpi_device *adev for_each_acpi_dev_match(ivsc_adev, acpi_id->id, NULL, -1) /* camera sensor depends on IVSC in DSDT if exist */ for_each_acpi_consumer_dev(ivsc_adev, consumer) - if (consumer->handle == handle) + if (consumer->handle == handle) { + acpi_dev_put(consumer); return ivsc_adev; + } } return NULL; -- cgit From c46f16f156ac58afcf4addc850bb5dfbca77b9fc Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Tue, 10 Oct 2023 09:07:44 +0200 Subject: media: i2c: ov8858: Don't set fwnode in the driver This makes the driver work with the new check in v4l2_async_register_subdev() that was introduced recently in 6.6-rc1. Without this change, probe fails with: ov8858 1-0036: Detected OV8858 sensor, revision 0xb2 ov8858 1-0036: sub-device fwnode is an endpoint! ov8858 1-0036: v4l2 async register subdev failed ov8858: probe of 1-0036 failed with error -22 This also simplifies the driver a bit. Signed-off-by: Ondrej Jirman Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/i2c/ov8858.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/media/i2c/ov8858.c b/drivers/media/i2c/ov8858.c index 3af6125a2eee..4d9fd76e2f60 100644 --- a/drivers/media/i2c/ov8858.c +++ b/drivers/media/i2c/ov8858.c @@ -1850,9 +1850,9 @@ static int ov8858_parse_of(struct ov8858 *ov8858) } ret = v4l2_fwnode_endpoint_parse(endpoint, &vep); + fwnode_handle_put(endpoint); if (ret) { dev_err(dev, "Failed to parse endpoint: %d\n", ret); - fwnode_handle_put(endpoint); return ret; } @@ -1864,12 +1864,9 @@ static int ov8858_parse_of(struct ov8858 *ov8858) default: dev_err(dev, "Unsupported number of data lanes %u\n", ov8858->num_lanes); - fwnode_handle_put(endpoint); return -EINVAL; } - ov8858->subdev.fwnode = endpoint; - return 0; } @@ -1913,7 +1910,7 @@ static int ov8858_probe(struct i2c_client *client) ret = ov8858_init_ctrls(ov8858); if (ret) - goto err_put_fwnode; + return ret; sd = &ov8858->subdev; sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS; @@ -1964,8 +1961,6 @@ err_clean_entity: media_entity_cleanup(&sd->entity); err_free_handler: v4l2_ctrl_handler_free(&ov8858->ctrl_handler); -err_put_fwnode: - fwnode_handle_put(ov8858->subdev.fwnode); return ret; } @@ -1978,7 +1973,6 @@ static void ov8858_remove(struct i2c_client *client) v4l2_async_unregister_subdev(sd); media_entity_cleanup(&sd->entity); v4l2_ctrl_handler_free(&ov8858->ctrl_handler); - fwnode_handle_put(ov8858->subdev.fwnode); pm_runtime_disable(&client->dev); if (!pm_runtime_status_suspended(&client->dev)) -- cgit From f588d72bd95f748849685412b1f0c7959ca228cf Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 18 Sep 2023 23:30:20 -0700 Subject: nfs42: client needs to strip file mode's suid/sgid bit after ALLOCATE op The Linux NFS server strips the SUID and SGID from the file mode on ALLOCATE op. Modify _nfs42_proc_fallocate to add NFS_INO_REVAL_FORCED to nfs_set_cache_invalid's argument to force update of the file mode suid/sgid bit. Suggested-by: Trond Myklebust Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 063e00aff87e..28704f924612 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -81,7 +81,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, if (status == 0) { if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, + NFS_INO_REVAL_FORCED | NFS_INO_INVALID_MODE); spin_unlock(&inode->i_lock); } status = nfs_post_op_update_inode_force_wcc(inode, -- cgit From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Oct 2023 10:18:01 +0200 Subject: wifi: cfg80211: use system_unbound_wq for wiphy work Since wiphy work items can run pretty much arbitrary code in the stack/driver, it can take longer to run all of this, so we shouldn't be using system_wq via schedule_work(). Also, we lock the wiphy (which is the reason this exists), so use system_unbound_wq. Reported-and-tested-by: Kalle Valo Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 64e861617110..acec41c1809a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1622,7 +1622,7 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) list_add_tail(&work->entry, &rdev->wiphy_work_list); spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); } EXPORT_SYMBOL_GPL(wiphy_work_queue); -- cgit From 02e0e426a2fb1446ebd7bc0eccfb48aedefb966b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Oct 2023 23:09:18 +0200 Subject: wifi: mac80211: fix error path key leak In the previous key leak fix for the other error paths, I meant to unify all of them to the same place, but used the wrong label, which I noticed when doing the merge into wireless-next. Fix it. Fixes: d097ae01ebd4 ("wifi: mac80211: fix potential key leak") Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0665ff5e456e..a2db0585dce0 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -912,7 +912,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ret = -EALREADY; - goto unlock; + goto out; } key->local = sdata->local; @@ -940,7 +940,6 @@ int ieee80211_key_link(struct ieee80211_key *key, out: ieee80211_key_free_unused(key); - unlock: mutex_unlock(&sdata->local->key_mtx); return ret; -- cgit From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Wed, 4 Oct 2023 18:39:28 +0200 Subject: net: rfkill: gpio: prevent value glitch during probe When either reset- or shutdown-gpio have are initially deasserted, e.g. after a reboot - or when the hardware does not include pull-down, there will be a short toggle of both IOs to logical 0 and back to 1. It seems that the rfkill default is unblocked, so the driver should not glitch to output low during probe. It can lead e.g. to unexpected lte modem reconnect: [1] root@localhost:~# dmesg | grep "usb 2-1" [ 2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd [ 21.215278] usb 2-1: USB disconnect, device number 2 [ 28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd The glitch has been discovered on an arm64 board, now that device-tree support for the rfkill-gpio driver has finally appeared :). Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to GPIOD_ASIS to avoid any glitches. The rfkill driver will set the intended value during rfkill_sync_work. Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver") Signed-off-by: Josua Mayer Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index e9d1b2f2ff0a..5a81505fba9a 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -108,13 +108,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->clk = devm_clk_get(&pdev->dev, NULL); - gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); rfkill->reset_gpio = gpio; - gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); -- cgit From f2ac54ebf85615a6d78f5eb213a8bbeeb17ebe5d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Oct 2023 16:55:10 +0200 Subject: net: rfkill: reduce data->mtx scope in rfkill_fop_open In syzbot runs, lockdep reports that there's a (potential) deadlock here of data->mtx being locked recursively. This isn't really a deadlock since they are different instances, but lockdep cannot know, and teaching it would be far more difficult than other fixes. At the same time we don't even really _need_ the mutex to be locked in rfkill_fop_open(), since we're modifying only a completely fresh instance of 'data' (struct rfkill_data) that's not yet added to the global list. However, to avoid any reordering etc. within the globally locked section, and to make the code look more symmetric, we should still lock the data->events list manipulation, but also need to lock _only_ that. So do that. Reported-by: syzbot+509238e523e032442b80@syzkaller.appspotmail.com Fixes: 2c3dfba4cf84 ("rfkill: sync before userspace visibility/changes") Signed-off-by: Johannes Berg --- net/rfkill/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 08630896b6c8..14cc8fe8584b 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1180,7 +1180,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) init_waitqueue_head(&data->read_wait); mutex_lock(&rfkill_global_mutex); - mutex_lock(&data->mtx); /* * start getting events from elsewhere but hold mtx to get * startup events added first @@ -1192,10 +1191,11 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) goto free; rfkill_sync(rfkill); rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); + mutex_lock(&data->mtx); list_add_tail(&ev->list, &data->events); + mutex_unlock(&data->mtx); } list_add(&data->list, &rfkill_fds); - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); file->private_data = data; @@ -1203,7 +1203,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) return stream_open(inode, file); free: - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); mutex_destroy(&data->mtx); list_for_each_entry_safe(ev, tmp, &data->events, list) -- cgit From 6a6d4644ce935ddec4f76223ac0ca68da56bd2d3 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 11 Oct 2023 10:43:26 -0400 Subject: NFS: Fix potential oops in nfs_inode_remove_request() Once a folio's private data has been cleared, it's possible for another process to clear the folio->mapping (e.g. via invalidate_complete_folio2 or evict_mapping_folio), so it wouldn't be safe to call nfs_page_to_inode() after that. Fixes: 0c493b5cf16e ("NFS: Convert buffered writes to use folios") Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7720b5e43014..9d82d50ce0b1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -788,6 +788,8 @@ static void nfs_inode_add_request(struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio_file_mapping(folio); @@ -802,7 +804,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { - atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests); + atomic_long_dec(&nfsi->nrequests); nfs_release_request(req); } } -- cgit From d6cbc6a3a856a7d8047316d81e2e039e44432acb Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 11 Oct 2023 14:48:53 +0100 Subject: ASoC: cs42l42: Fix missing include of gpio/consumer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to gpiod_set_value_cansleep() in cs42l42_sdw_update_status() needs the header file gpio/consumer.h to be included. This was introduced by commit 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") and caused error: sound/soc/codecs/cs42l42-sdw.c:374:4: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean gpio_set_value_cansleep’? Signed-off-by: Richard Fitzgerald Fixes: 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") Link: https://lore.kernel.org/r/20231011134853.20059-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42-sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs42l42-sdw.c b/sound/soc/codecs/cs42l42-sdw.c index 974bae4abfad..94a66a325303 100644 --- a/sound/soc/codecs/cs42l42-sdw.c +++ b/sound/soc/codecs/cs42l42-sdw.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include -- cgit From 92d4abd66f7080075793970fc8f241239e58a9e7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Bokowy Date: Wed, 20 Sep 2023 17:30:07 +0200 Subject: Bluetooth: vhci: Fix race when opening vhci device When the vhci device is opened in the two-step way, i.e.: open device then write a vendor packet with requested controller type, the device shall respond with a vendor packet which includes HCI index of created interface. When the virtual HCI is created, the host sends a reset request to the controller. This request is processed by the vhci_send_frame() function. However, this request is send by a different thread, so it might happen that this HCI request will be received before the vendor response is queued in the read queue. This results in the HCI vendor response and HCI reset request inversion in the read queue which leads to improper behavior of btvirt: > dmesg [1754256.640122] Bluetooth: MGMT ver 1.22 [1754263.023806] Bluetooth: MGMT ver 1.22 [1754265.043775] Bluetooth: hci1: Opcode 0x c03 failed: -110 In order to synchronize vhci two-step open/setup process with virtual HCI initialization, this patch adds internal lock when queuing data in the vhci_send_frame() function. Signed-off-by: Arkadiusz Bokowy Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 40e2b9fa11a2..f3892e9ce800 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -74,7 +74,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) struct vhci_data *data = hci_get_drvdata(hdev); memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); + + mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); + mutex_unlock(&data->open_mutex); wake_up_interruptible(&data->read_wait); return 0; -- cgit From acab8ff29a2a226409cfe04e6d2e0896928c1b3a Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Thu, 28 Sep 2023 10:52:57 +0300 Subject: Bluetooth: ISO: Fix invalid context error This moves the hci_le_terminate_big_sync call from rx_work to cmd_sync_work, to avoid calling sleeping function from an invalid context. Reported-by: syzbot+c715e1bd8dfbcb1ab176@syzkaller.appspotmail.com Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 31d02b54eea1..e6cfc65abcb8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7021,6 +7021,14 @@ unlock: hci_dev_unlock(hdev); } +static int hci_iso_term_big_sync(struct hci_dev *hdev, void *data) +{ + u8 handle = PTR_UINT(data); + + return hci_le_terminate_big_sync(hdev, handle, + HCI_ERROR_LOCAL_HOST_TERM); +} + static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -7065,16 +7073,17 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, rcu_read_lock(); } + rcu_read_unlock(); + if (!ev->status && !i) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation * has completed. */ - hci_le_terminate_big_sync(hdev, ev->handle, - HCI_ERROR_LOCAL_HOST_TERM); + hci_cmd_sync_queue(hdev, hci_iso_term_big_sync, + UINT_PTR(ev->handle), NULL); - rcu_read_unlock(); hci_dev_unlock(hdev); } -- cgit From 33155c4aae5260475def6f7438e4e35564f4f3ba Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:31 +0800 Subject: Bluetooth: hci_event: Ignore NULL link key This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. Thus, we can ignore null link key in the handler of "Link Key Notification event" to relieve the attack. A similar implementation also shows in btstack project. [3] v3: Drop the connection when null link key be detected. v2: - Used Link: tag instead of Closes: - Used bt_dev_dbg instead of BT_DBG - Added Fixes: tag Cc: stable@vger.kernel.org Fixes: 55ed8ca10f35 ("Bluetooth: Implement link key handling for the management interface") Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3722 [3] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e6cfc65abcb8..742dcfd136bc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4742,6 +4742,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, if (!conn) goto unlock; + /* Ignore NULL link key against CVE-2020-26555 */ + if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", + &ev->bdaddr); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_drop(conn); + goto unlock; + } + hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(conn); -- cgit From 1ffc6f8cc33268731fcf9629fc4438f6db1191fc Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:58 +0800 Subject: Bluetooth: Reject connection with the device which has same BD_ADDR This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. A condition of this attack is that attacker should change the BR_ADDR of his hacking device (Host B) to equal to the BR_ADDR with the target device being attacked (Host A). Thus, we reject the connection with device which has same BD_ADDR both on HCI_Create_Connection and HCI_Connection_Request to prevent the attack. A similar implementation also shows in btstack project. [3][4] Cc: stable@vger.kernel.org Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3523 [3] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L7297 [4] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 9 +++++++++ net/bluetooth/hci_event.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a6f20338db8..73470cc3518a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1627,6 +1627,15 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Reject outgoing connection to device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, dst)) { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + dst); + return ERR_PTR(-ECONNREFUSED); + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 742dcfd136bc..61b1b244595e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3268,6 +3268,17 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + /* Reject incoming connection from device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + &ev->bdaddr); + hci_reject_conn(hdev, &ev->bdaddr); + return; + } + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); -- cgit From a239110ee8e0b0aafa265f0d54f7a16744855e70 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 30 Sep 2023 15:53:32 +0300 Subject: Bluetooth: hci_sync: always check if connection is alive before deleting In hci_abort_conn_sync it is possible that conn is deleted concurrently by something else, also e.g. when waiting for hdev->lock. This causes double deletion of the conn, so UAF or conn_hash.list corruption. Fix by having all code paths check that the connection is still in conn_hash before deleting it, while holding hdev->lock which prevents any races. Log (when powering off while BAP streaming, occurs rarely): ======================================================================= kernel BUG at lib/list_debug.c:56! ... ? __list_del_entry_valid (lib/list_debug.c:56) hci_conn_del (net/bluetooth/hci_conn.c:154) bluetooth hci_abort_conn_sync (net/bluetooth/hci_sync.c:5415) bluetooth ? __pfx_hci_abort_conn_sync+0x10/0x10 [bluetooth] ? lock_release+0x1d5/0x3c0 ? hci_disconnect_all_sync.constprop.0+0xb2/0x230 [bluetooth] ? __pfx_lock_release+0x10/0x10 ? __kmem_cache_free+0x14d/0x2e0 hci_disconnect_all_sync.constprop.0+0xda/0x230 [bluetooth] ? __pfx_hci_disconnect_all_sync.constprop.0+0x10/0x10 [bluetooth] ? hci_clear_adv_sync+0x14f/0x170 [bluetooth] ? __pfx_set_powered_sync+0x10/0x10 [bluetooth] hci_set_powered_sync+0x293/0x450 [bluetooth] ======================================================================= Fixes: 94d9ba9f9888 ("Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d06e07a0ea5a..a15ab0b874a9 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5369,6 +5369,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { int err = 0; u16 handle = conn->handle; + bool disconnect = false; struct hci_conn *c; switch (conn->state) { @@ -5399,24 +5400,15 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) hci_dev_unlock(hdev); return 0; case BT_BOUND: - hci_dev_lock(hdev); - hci_conn_failed(conn, reason); - hci_dev_unlock(hdev); - return 0; + break; default: - hci_dev_lock(hdev); - conn->state = BT_CLOSED; - hci_disconn_cfm(conn, reason); - hci_conn_del(conn); - hci_dev_unlock(hdev); - return 0; + disconnect = true; + break; } hci_dev_lock(hdev); - /* Check if the connection hasn't been cleanup while waiting - * commands to complete. - */ + /* Check if the connection has been cleaned up concurrently */ c = hci_conn_hash_lookup_handle(hdev, handle); if (!c || c != conn) { err = 0; @@ -5428,7 +5420,13 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) * or in case of LE it was still scanning so it can be cleanup * safely. */ - hci_conn_failed(conn, reason); + if (disconnect) { + conn->state = BT_CLOSED; + hci_disconn_cfm(conn, reason); + hci_conn_del(conn); + } else { + hci_conn_failed(conn, reason); + } unlock: hci_dev_unlock(hdev); -- cgit From c7f59461f5a78994613afc112cdd73688aef9076 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 4 Oct 2023 20:42:24 +0800 Subject: Bluetooth: Fix a refcnt underflow problem for hci_conn Syzbot reports a warning as follows: WARNING: CPU: 1 PID: 26946 at net/bluetooth/hci_conn.c:619 hci_conn_timeout+0x122/0x210 net/bluetooth/hci_conn.c:619 ... Call Trace: process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 process_scheduled_works kernel/workqueue.c:2703 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2784 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 It is because the HCI_EV_SIMPLE_PAIR_COMPLETE event handler drops hci_conn directly without check Simple Pairing whether be enabled. But the Simple Pairing process can only be used if both sides have the support enabled in the host stack. Add hci_conn_ssp_enabled() for hci_conn in HCI_EV_IO_CAPA_REQUEST and HCI_EV_SIMPLE_PAIR_COMPLETE event handlers to fix the problem. Fixes: 0493684ed239 ("[Bluetooth] Disable disconnect timer during Simple Pairing") Signed-off-by: Ziyang Xuan Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 61b1b244595e..7e1c875e0e88 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5323,7 +5323,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; hci_conn_hold(conn); @@ -5570,7 +5570,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; /* Reset the authentication requirement to unknown */ -- cgit From 6868b8505c807ad9397d78cc4e07cb1cb3582152 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 11 Oct 2023 12:35:20 -0700 Subject: xfs: adjust the incore perag block_count when shrinking If we reduce the number of blocks in an AG, we must update the incore geometry values as well. Fixes: 0800169e3e2c9 ("xfs: Pre-calculate per-AG agbno geometry") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_ag.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e9cc481b4ddf..f9f4d694640d 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1001,6 +1001,12 @@ xfs_ag_shrink_space( error = -ENOSPC; goto resv_init_out; } + + /* Update perag geometry */ + pag->block_count -= delta; + __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, + &pag->agino_max); + xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); return 0; -- cgit From 442177be8c3b8edfc29e14837e59771181c590b3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 11 Oct 2023 12:35:21 -0700 Subject: xfs: process free extents to busy list in FIFO order When we're adding extents to the busy discard list, add them to the tail of the list so that we get FIFO order. For FITRIM commands, this means that we send discard bios sorted in order from longest to shortest, like we did before commit 89cfa899608fc. For transactions that are freeing extents, this puts them in the transaction's busy list in FIFO order as well, which shouldn't make any noticeable difference. Fixes: 89cfa899608fc ("xfs: reduce AGF hold times during fstrim operations") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_extent_busy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 746814815b1d..9ecfdcdc752f 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -62,7 +62,8 @@ xfs_extent_busy_insert_list( rb_link_node(&new->rb_node, parent, rbp); rb_insert_color(&new->rb_node, &pag->pagb_tree); - list_add(&new->list, busy_list); + /* always process discard lists in fifo order */ + list_add_tail(&new->list, busy_list); spin_unlock(&pag->pagb_lock); } -- cgit From 1364a3c391aedfeb32aa025303ead3d7c91cdf9d Mon Sep 17 00:00:00 2001 From: Sarthak Kukreti Date: Wed, 11 Oct 2023 13:12:30 -0700 Subject: block: Don't invalidate pagecache for invalid falloc modes Only call truncate_bdev_range() if the fallocate mode is supported. This fixes a bug where data in the pagecache could be invalidated if the fallocate() was called on the block device with an invalid mode. Fixes: 25f4c41415e5 ("block: implement (some of) fallocate for block devices") Cc: stable@vger.kernel.org Reported-by: "Darrick J. Wong" Signed-off-by: Sarthak Kukreti Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Mike Snitzer Fixes: line? I've never seen those wrapped. Link: https://lore.kernel.org/r/20231011201230.750105-1-sarthakkukreti@chromium.org Signed-off-by: Jens Axboe --- block/fops.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/block/fops.c b/block/fops.c index acff3d5d22d4..73e42742543f 100644 --- a/block/fops.c +++ b/block/fops.c @@ -772,24 +772,35 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, filemap_invalidate_lock(inode->i_mapping); - /* Invalidate the page cache, including dirty pages. */ - error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); - if (error) - goto fail; - + /* + * Invalidate the page cache, including dirty pages, for valid + * de-allocate mode calls to fallocate(). + */ switch (mode) { case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL); break; -- cgit From 71c299c711d1f44f0bf04f1fea66baad565240f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2023 10:36:51 -0700 Subject: net: tcp: fix crashes trying to free half-baked MTU probes tcp_stream_alloc_skb() initializes the skb to use tcp_tsorted_anchor which is a union with the destructor. We need to clean that TCP-iness up before freeing. Fixes: 736013292e3c ("tcp: let tcp_mtu_probe() build headless packets") Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231010173651.3990234-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index aa0fc8c766e5..9c8c42c280b7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2456,6 +2456,7 @@ static int tcp_mtu_probe(struct sock *sk) /* build the payload, and be prepared to abort if this fails. */ if (tcp_clone_payload(sk, nskb, probe_size)) { + tcp_skb_tsorted_anchor_cleanup(nskb); consume_skb(nskb); return -1; } -- cgit From 3c90c01e49342b166e5c90ec2c85b220be15a20e Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Wed, 13 Sep 2023 18:29:42 +0800 Subject: xfs: correct calculation for agend and blockcount The agend should be "start + length - 1", then, blockcount should be "end + 1 - start". Correct 2 calculation mistakes. Also, rename "agend" to "range_agend" because it's not the end of the AG per se; it's the end of the dead region within an AG's agblock space. Fixes: 5cf32f63b0f4 ("xfs: fix the calculation for "end" and "length"") Signed-off-by: Shiyang Ruan Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_notify_failure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 4a9bbd3fe120..a7daa522e00f 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -126,8 +126,8 @@ xfs_dax_notify_ddev_failure( struct xfs_rmap_irec ri_low = { }; struct xfs_rmap_irec ri_high; struct xfs_agf *agf; - xfs_agblock_t agend; struct xfs_perag *pag; + xfs_agblock_t range_agend; pag = xfs_perag_get(mp, agno); error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); @@ -148,10 +148,10 @@ xfs_dax_notify_ddev_failure( ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno); agf = agf_bp->b_addr; - agend = min(be32_to_cpu(agf->agf_length), + range_agend = min(be32_to_cpu(agf->agf_length) - 1, ri_high.rm_startblock); notify.startblock = ri_low.rm_startblock; - notify.blockcount = agend - ri_low.rm_startblock; + notify.blockcount = range_agend + 1 - ri_low.rm_startblock; error = xfs_rmap_query_range(cur, &ri_low, &ri_high, xfs_dax_failure_fn, ¬ify); -- cgit From f93b9300301d30f275f9bd7165cbb53d5094bd8d Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 24 Aug 2023 15:47:23 +0800 Subject: xfs: Remove duplicate include ./fs/xfs/scrub/xfile.c: xfs_format.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6209 Signed-off-by: Jiapeng Chong Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/scrub/xfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index d98e8e77c684..090c3ead43fd 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -10,7 +10,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_format.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/scrub.h" -- cgit From cbc06310c36f73a5f3b0c6f0d974d60cf66d816b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 29 Sep 2023 14:43:18 -0400 Subject: xfs: reinstate the old i_version counter as STATX_CHANGE_COOKIE The handling of STATX_CHANGE_COOKIE was moved into generic_fillattr in commit 0d72b92883c6 (fs: pass the request_mask to generic_fillattr), but we didn't account for the fact that xfs doesn't call generic_fillattr at all. Make XFS report its i_version as the STATX_CHANGE_COOKIE. Fixes: 0d72b92883c6 (fs: pass the request_mask to generic_fillattr) Signed-off-by: Jeff Layton Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_iops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1c1e6171209d..2b3b05c28e9e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -584,6 +584,11 @@ xfs_vn_getattr( } } + if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { + stat->change_cookie = inode_query_iversion(inode); + stat->result_mask |= STATX_CHANGE_COOKIE; + } + /* * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. -- cgit From f055ff23c331f28aa4ace4b72dc56f63b9a726c8 Mon Sep 17 00:00:00 2001 From: Ralph Siemsen Date: Wed, 4 Oct 2023 16:00:08 -0400 Subject: pinctrl: renesas: rzn1: Enable missing PINMUX Enable pin muxing (eg. programmable function), so that the RZ/N1 GPIO pins will be configured as specified by the pinmux in the DTS. This used to be enabled implicitly via CONFIG_GENERIC_PINMUX_FUNCTIONS, however that was removed, since the RZ/N1 driver does not call any of the generic pinmux functions. Fixes: 1308fb4e4eae14e6 ("pinctrl: rzn1: Do not select GENERIC_PIN{CTRL_GROUPS,MUX_FUNCTIONS}") Signed-off-by: Ralph Siemsen Reviewed-by: Miquel Raynal Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231004200008.1306798-1-ralph.siemsen@linaro.org Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- drivers/pinctrl/renesas/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig index 77730dc548ed..c8d519ca53eb 100644 --- a/drivers/pinctrl/renesas/Kconfig +++ b/drivers/pinctrl/renesas/Kconfig @@ -235,6 +235,7 @@ config PINCTRL_RZN1 depends on OF depends on ARCH_RZN1 || COMPILE_TEST select GENERIC_PINCONF + select PINMUX help This selects pinctrl driver for Renesas RZ/N1 devices. -- cgit From e2bca4870fdaf855651ee80b083d892599c5d982 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 9 Oct 2023 08:31:52 -0700 Subject: af_packet: Fix fortified memcpy() without flex array. Sergei Trofimovich reported a regression [0] caused by commit a0ade8404c3b ("af_packet: Fix warning of fortified memcpy() in packet_getname()."). It introduced a flex array sll_addr_flex in struct sockaddr_ll as a union-ed member with sll_addr to work around the fortified memcpy() check. However, a userspace program uses a struct that has struct sockaddr_ll in the middle, where a flex array is illegal to exist. include/linux/if_packet.h:24:17: error: flexible array member 'sockaddr_ll::::::sll_addr_flex' not at end of 'struct packet_info_t' 24 | __DECLARE_FLEX_ARRAY(unsigned char, sll_addr_flex); | ^~~~~~~~~~~~~~~~~~~~ To fix the regression, let's go back to the first attempt [1] telling memcpy() the actual size of the array. Reported-by: Sergei Trofimovich Closes: https://github.com/NixOS/nixpkgs/pull/252587#issuecomment-1741733002 [0] Link: https://lore.kernel.org/netdev/20230720004410.87588-3-kuniyu@amazon.com/ [1] Fixes: a0ade8404c3b ("af_packet: Fix warning of fortified memcpy() in packet_getname().") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20231009153151.75688-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_packet.h | 6 +----- net/packet/af_packet.c | 7 ++++++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 4d0ad22f83b5..9efc42382fdb 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -18,11 +18,7 @@ struct sockaddr_ll { unsigned short sll_hatype; unsigned char sll_pkttype; unsigned char sll_halen; - union { - unsigned char sll_addr[8]; - /* Actual length is in sll_halen. */ - __DECLARE_FLEX_ARRAY(unsigned char, sll_addr_flex); - }; + unsigned char sll_addr[8]; }; /* Packet types */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8f97648d652f..a84e00b5904b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3607,7 +3607,12 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; - memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len); + + /* Let __fortify_memcpy_chk() know the actual buffer size. */ + memcpy(((struct sockaddr_storage *)sll)->__data + + offsetof(struct sockaddr_ll, sll_addr) - + offsetofend(struct sockaddr_ll, sll_family), + dev->dev_addr, dev->addr_len); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; -- cgit From 354a6e707e29cb0c007176ee5b8db8be7bd2dee0 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 9 Oct 2023 16:00:54 -0400 Subject: nfc: nci: assert requested protocol is valid The protocol is used in a bit mask to determine if the protocol is supported. Assert the provided protocol is less than the maximum defined so it doesn't potentially perform a shift-out-of-bounds and provide a clearer error for undefined protocols vs unsupported ones. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reported-and-tested-by: syzbot+0839b78e119aae1fec78@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0839b78e119aae1fec78 Signed-off-by: Jeremy Cline Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231009200054.82557-1-jeremy@jcline.org Signed-off-by: Paolo Abeni --- net/nfc/nci/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index fff755dde30d..6c9592d05120 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -909,6 +909,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, return -EINVAL; } + if (protocol >= NFC_PROTO_MAX) { + pr_err("the requested nfc protocol is invalid\n"); + return -EINVAL; + } + if (!(nci_target->supported_protocols & (1 << protocol))) { pr_err("target does not support the requested protocol 0x%x\n", protocol); -- cgit From 50e492143374c17ad89c865a1a44837b3f5c8226 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 10 Oct 2023 09:18:42 +0530 Subject: octeontx2-pf: Fix page pool frag allocation warning Since page pool param's "order" is set to 0, will result in below warn message if interface is configured with higher rx buffer size. Steps to reproduce the issue. 1. devlink dev param set pci/0002:04:00.0 name receive_buffer_size \ value 8196 cmode runtime 2. ifconfig eth0 up [ 19.901356] ------------[ cut here ]------------ [ 19.901361] WARNING: CPU: 11 PID: 12331 at net/core/page_pool.c:567 page_pool_alloc_frag+0x3c/0x230 [ 19.901449] pstate: 82401009 (Nzcv daif +PAN -UAO +TCO -DIT +SSBS BTYPE=--) [ 19.901451] pc : page_pool_alloc_frag+0x3c/0x230 [ 19.901453] lr : __otx2_alloc_rbuf+0x60/0xbc [rvu_nicpf] [ 19.901460] sp : ffff80000f66b970 [ 19.901461] x29: ffff80000f66b970 x28: 0000000000000000 x27: 0000000000000000 [ 19.901464] x26: ffff800000d15b68 x25: ffff000195b5c080 x24: ffff0002a5a32dc0 [ 19.901467] x23: ffff0001063c0878 x22: 0000000000000100 x21: 0000000000000000 [ 19.901469] x20: 0000000000000000 x19: ffff00016f781000 x18: 0000000000000000 [ 19.901472] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 19.901474] x14: 0000000000000000 x13: ffff0005ffdc9c80 x12: 0000000000000000 [ 19.901477] x11: ffff800009119a38 x10: 4c6ef2e3ba300519 x9 : ffff800000d13844 [ 19.901479] x8 : ffff0002a5a33cc8 x7 : 0000000000000030 x6 : 0000000000000030 [ 19.901482] x5 : 0000000000000005 x4 : 0000000000000000 x3 : 0000000000000a20 [ 19.901484] x2 : 0000000000001080 x1 : ffff80000f66b9d4 x0 : 0000000000001000 [ 19.901487] Call trace: [ 19.901488] page_pool_alloc_frag+0x3c/0x230 [ 19.901490] __otx2_alloc_rbuf+0x60/0xbc [rvu_nicpf] [ 19.901494] otx2_rq_aura_pool_init+0x1c4/0x240 [rvu_nicpf] [ 19.901498] otx2_open+0x228/0xa70 [rvu_nicpf] [ 19.901501] otx2vf_open+0x20/0xd0 [rvu_nicvf] [ 19.901504] __dev_open+0x114/0x1d0 [ 19.901507] __dev_change_flags+0x194/0x210 [ 19.901510] dev_change_flags+0x2c/0x70 [ 19.901512] devinet_ioctl+0x3a4/0x6c4 [ 19.901515] inet_ioctl+0x228/0x240 [ 19.901518] sock_ioctl+0x2ac/0x480 [ 19.901522] __arm64_sys_ioctl+0x564/0xe50 [ 19.901525] invoke_syscall.constprop.0+0x58/0xf0 [ 19.901529] do_el0_svc+0x58/0x150 [ 19.901531] el0_svc+0x30/0x140 [ 19.901533] el0t_64_sync_handler+0xe8/0x114 [ 19.901535] el0t_64_sync+0x1a0/0x1a4 [ 19.901537] ---[ end trace 678c0bf660ad8116 ]--- Fixes: b2e3406a38f0 ("octeontx2-pf: Add support for page pool") Signed-off-by: Ratheesh Kannoth Reviewed-by: Yunsheng Lin Link: https://lore.kernel.org/r/20231010034842.3807816-1-rkannoth@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 997fedac3a98..818ce76185b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1403,6 +1403,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, return 0; } + pp_params.order = get_order(buf_size); pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); pp_params.nid = NUMA_NO_NODE; -- cgit From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Oct 2023 13:12:58 +0200 Subject: netfilter: nf_tables: do not remove elements if set backend implements .abort pipapo set backend maintains two copies of the datastructure, removing the elements from the copy that is going to be discarded slows down the abort path significantly, from several minutes to few seconds after this patch. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a72b6aeefb1b..c3de3791cabd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10347,7 +10347,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, &te->elem); + if (!te->set->ops->abort || + nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_remove(net, te->set, &te->elem); + if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); -- cgit From 2e1d175410972285333193837a4250a74cd472e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Oct 2023 10:53:08 +0200 Subject: netfilter: nfnetlink_log: silence bogus compiler warning net/netfilter/nfnetlink_log.c:800:18: warning: variable 'ctinfo' is uninitialized The warning is bogus, the variable is only used if ct is non-NULL and always initialised in that case. Init to 0 too to silence this. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309100514.ndBFebXN-lkp@intel.com/ Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 53c9e76473ba..f03f4d4d7d88 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -698,8 +698,8 @@ nfulnl_log_packet(struct net *net, unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; + enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; -- cgit From d51c42cdef5f961f63e39e172e4dfdcac54acd5e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:17:51 -0700 Subject: netfilter: nf_tables: Annotate struct nft_pipapo_match with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct nft_pipapo_match. Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583e..2e164a319945 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -147,7 +147,7 @@ struct nft_pipapo_match { unsigned long * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; - struct nft_pipapo_field f[]; + struct nft_pipapo_field f[] __counted_by(field_count); }; /** -- cgit From 4c90bba60c26db7dc7df450f748e86440149786e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Oct 2023 11:57:42 +0200 Subject: netfilter: nf_tables: do not refresh timeout when resetting element The dump and reset command should not refresh the timeout, this command is intended to allow users to list existing stateful objects and reset them, element expiration should be refresh via transaction instead with a specific command to achieve this, otherwise this is entering combo semantics that will be hard to be undone later (eg. a user asking to retrieve counters but _not_ requiring to refresh expiration). Fixes: 079cd633219d ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c3de3791cabd..aae6ffebb413 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5556,7 +5556,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - u64 timeout = 0; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) @@ -5592,15 +5591,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { - timeout = *nft_set_ext_timeout(ext); - if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(timeout), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; - } else if (set->flags & NFT_SET_TIMEOUT) { - timeout = READ_ONCE(set->timeout); - } + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { u64 expires, now = get_jiffies_64(); @@ -5615,9 +5610,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; - - if (reset) - *nft_set_ext_expiration(ext) = now + timeout; } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { -- cgit From 52177bbf19e6e9398375a148d2e13ed492b40b80 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:14 +0800 Subject: nf_tables: fix NULL pointer dereference in nft_inner_init() We should check whether the NFTA_INNER_NUM netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load4+0x84/0xa0 nft_inner_init+0x128/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nft_inner.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 28e2873ba24e..928312d01eb1 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -298,6 +298,7 @@ static int nft_inner_init(const struct nft_ctx *ctx, int err; if (!tb[NFTA_INNER_FLAGS] || + !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) -- cgit From 505ce0630ad5d31185695f8a29dde8d29f28faa7 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:15 +0800 Subject: nf_tables: fix NULL pointer dereference in nft_expr_inner_parse() We should check whether the NFTA_EXPR_NAME netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load2+0x7d/0xa0 nla_strcmp+0x2f/0x90 __nft_expr_type_get+0x41/0xb0 nft_expr_inner_parse+0xe3/0x200 nft_inner_init+0x1be/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index aae6ffebb413..a623d31b6518 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3166,7 +3166,7 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, if (err < 0) return err; - if (!tb[NFTA_EXPR_DATA]) + if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME]) return -EINVAL; type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); -- cgit From d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 8 Oct 2023 19:36:53 +0200 Subject: netfilter: nft_payload: fix wrong mac header matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mcast packets get looped back to the local machine. Such packets have a 0-length mac header, we should treat this like "mac header not set" and abort rule evaluation. As-is, we just copy data from the network header instead. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Blažej Krajňák Signed-off-by: Florian Westphal --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 120f6d395b98..0a689c8e0295 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -179,7 +179,7 @@ void nft_payload_eval(const struct nft_expr *expr, switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: - if (!skb_mac_header_was_set(skb)) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; if (skb_vlan_tag_present(skb) && -- cgit From b7fd68ab1538e3adb665670414bea440f399fda9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Oct 2023 14:56:47 +0100 Subject: drm: Do not overrun array in drm_gem_get_pages() If the shared memory object is larger than the DRM object that it backs, we can overrun the page array. Limit the number of pages we install from each folio to prevent this. Signed-off-by: "Matthew Wilcox (Oracle)" Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Link: https://lore.kernel.org/lkml/13360591.uLZWGnKmhe@natalenko.name/ Fixes: 3291e09a4638 ("drm: convert drm_gem_put_pages() to use a folio_batch") Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231005135648.2317298-1-willy@infradead.org --- drivers/gpu/drm/drm_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6129b89bb366..44a948b80ee1 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -540,7 +540,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) struct page **pages; struct folio *folio; struct folio_batch fbatch; - int i, j, npages; + long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); @@ -564,11 +564,13 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) i = 0; while (i < npages) { + long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) goto fail; - for (j = 0; j < folio_nr_pages(folio); j++, i++) + nr = min(npages - i, folio_nr_pages(folio)); + for (j = 0; j < nr; j++, i++) pages[i] = folio_file_page(folio, i); /* Make sure shmem keeps __GFP_DMA32 allocated pages in the -- cgit From c1165df2be2fffe3adeeaa68f4ee4325108c5e4e Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 10 Oct 2023 18:46:52 +0100 Subject: drm/tiny: correctly print `struct resource *` on error The `res` variable is already a `struct resource *`, don't take the address of it. Fixes incorrect output: simple-framebuffer 9e20dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff4be88a387d00-0xfffffefffde0a240 flags 0x0]: -16 To be correct: simple-framebuffer 9e20dc000.framebuffer: [drm] *ERROR* could not acquire memory range [mem 0x9e20dc000-0x9e307bfff flags 0x200]: -16 Signed-off-by: Joey Gouly Fixes: 9a10c7e6519b ("drm/simpledrm: Add support for system memory framebuffers") Cc: Thomas Zimmermann Cc: Thierry Reding Cc: Javier Martinez Canillas Cc: dri-devel@lists.freedesktop.org Cc: # v6.3+ Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20231010174652.2439513-1-joey.gouly@arm.com --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index ff86ba1ae1b8..8ea120eb8674 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -745,7 +745,7 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res)); if (ret) { - drm_err(dev, "could not acquire memory range %pr: %d\n", &res, ret); + drm_err(dev, "could not acquire memory range %pr: %d\n", res, ret); return ERR_PTR(ret); } -- cgit From 510b18cf23b9bd8a982ef7f1fb19f3968a2bf787 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 10 Oct 2023 21:48:57 +0900 Subject: rswitch: Fix renesas_eth_sw_remove() implementation Fix functions calling order and a condition in renesas_eth_sw_remove(). Otherwise, kernel NULL pointer dereference happens from phy_stop() if a net device opens. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/rswitch.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index fc01ad3f340d..4d7c48288047 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1964,15 +1964,17 @@ static void rswitch_deinit(struct rswitch_private *priv) rswitch_gwca_hw_deinit(priv); rcar_gen4_ptp_unregister(priv->ptp_priv); - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { struct rswitch_device *rdev = priv->rdev[i]; - phy_exit(priv->rdev[i]->serdes); - rswitch_ether_port_deinit_one(rdev); unregister_netdev(rdev->ndev); - rswitch_device_free(priv, i); + rswitch_ether_port_deinit_one(rdev); + phy_exit(priv->rdev[i]->serdes); } + for (i = 0; i < RSWITCH_NUM_PORTS; i++) + rswitch_device_free(priv, i); + rswitch_gwca_ts_queue_free(priv); rswitch_gwca_linkfix_free(priv); -- cgit From 053f13f67be6d02781730c9ac71abde6e9140610 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 10 Oct 2023 21:48:58 +0900 Subject: rswitch: Fix imbalance phy_power_off() calling The phy_power_off() should not be called if phy_power_on() failed. So, add a condition .power_count before calls phy_power_off(). Fixes: 5cb630925b49 ("net: renesas: rswitch: Add phy_power_{on,off}() calling") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 4d7c48288047..0fc0b6bea753 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1254,7 +1254,7 @@ static void rswitch_adjust_link(struct net_device *ndev) phy_print_status(phydev); if (phydev->link) phy_power_on(rdev->serdes); - else + else if (rdev->serdes->power_count) phy_power_off(rdev->serdes); rdev->etha->link = phydev->link; -- cgit From 26de14831cf6e6a6ff96609a0623e1266a636467 Mon Sep 17 00:00:00 2001 From: Krzysztof Hałasa Date: Wed, 11 Oct 2023 07:06:29 +0200 Subject: IXP4xx MAINTAINERS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update MAINTAINERS entries for Intel IXP4xx SoCs. Linus has been handling all IXP4xx stuff since 2019 or so. Signed-off-by: Krzysztof Hałasa Acked-by: Linus Walleij Acked-by: Deepak Saxena Link: https://lore.kernel.org/r/m3ttqxu4ru.fsf@t19.piap.pl Signed-off-by: Arnd Bergmann --- MAINTAINERS | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b6a1b7a86875..0e99887487f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2211,21 +2211,28 @@ F: arch/arm/boot/dts/ti/omap/omap3-igep* ARM/INTEL IXP4XX ARM ARCHITECTURE M: Linus Walleij M: Imre Kaloz -M: Krzysztof Halasa L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml F: Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt F: Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml F: Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion* +F: Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml F: Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml F: arch/arm/boot/dts/intel/ixp/ F: arch/arm/mach-ixp4xx/ F: drivers/bus/intel-ixp4xx-eb.c +F: drivers/char/hw_random/ixp4xx-rng.c F: drivers/clocksource/timer-ixp4xx.c F: drivers/crypto/intel/ixp4xx/ixp4xx_crypto.c F: drivers/gpio/gpio-ixp4xx.c F: drivers/irqchip/irq-ixp4xx.c +F: drivers/net/ethernet/xscale/ixp4xx_eth.c +F: drivers/net/wan/ixp4xx_hss.c +F: drivers/soc/ixp4xx/ixp4xx-npe.c +F: drivers/soc/ixp4xx/ixp4xx-qmgr.c +F: include/linux/soc/ixp4xx/npe.h +F: include/linux/soc/ixp4xx/qmgr.h ARM/INTEL KEEMBAY ARCHITECTURE M: Paul J. Murphy @@ -10621,22 +10628,6 @@ L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/intel/ixp4xx/ixp4xx_crypto.c -INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT -M: Krzysztof Halasa -S: Maintained -F: drivers/net/ethernet/xscale/ixp4xx_eth.c -F: drivers/net/wan/ixp4xx_hss.c -F: drivers/soc/ixp4xx/ixp4xx-npe.c -F: drivers/soc/ixp4xx/ixp4xx-qmgr.c -F: include/linux/soc/ixp4xx/npe.h -F: include/linux/soc/ixp4xx/qmgr.h - -INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT -M: Deepak Saxena -S: Maintained -F: Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml -F: drivers/char/hw_random/ixp4xx-rng.c - INTEL KEEM BAY DRM DRIVER M: Anitha Chrisanthus M: Edmund Dea -- cgit From 14a270bfab7ab1c4b605c01eeca5557447ad5a2b Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 22 Aug 2023 16:49:03 +0000 Subject: riscv: signal: fix sigaltstack frame size checking The alternative stack checking in get_sigframe introduced by the Vector support is not needed and has a problem. It is not needed as we have already validate it at the beginning of the function if we are already on an altstack. If not, the size of an altstack is always validated at its allocation stage with sigaltstack_size_valid(). Besides, we must only regard the size of an altstack if the handler of a signal is registered with SA_ONSTACK. So, blindly checking overflow of an altstack if sas_ss_size not equals to zero will check against wrong signal handlers if only a subset of signals are registered with SA_ONSTACK. Fixes: 8ee0b41898fa ("riscv: signal: Add sigcontext save/restore for vector") Reported-by: Prashanth Swaminathan Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20230822164904.21660-1-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 180d951d3624..21a4d0e111bc 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -311,13 +311,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig, /* Align the stack frame. */ sp &= ~0xfUL; - /* - * Fail if the size of the altstack is not large enough for the - * sigframe construction. - */ - if (current->sas_ss_size && sp < current->sas_ss_sp) - return (void __user __force *)-1UL; - return (void __user *)sp; } -- cgit From e95f3f74465072c2545d8e65a3c3a96e37129cf8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Oct 2023 16:04:25 -0300 Subject: smb: client: make laundromat a delayed worker By having laundromat kthread processing cached directories on every second turned out to be overkill, especially when having multiple SMB mounts. Relax it by using a delayed worker instead that gets scheduled on every @dir_cache_timeout (default=30) seconds per tcon. This also fixes the 1s delay when tearing down tcon. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 89 ++++++++++++++++++---------------------------- fs/smb/client/cached_dir.h | 2 +- 2 files changed, 36 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index e2be8aedb26e..a9e5d3b7e9a0 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -15,6 +15,7 @@ static struct cached_fid *init_cached_dir(const char *path); static void free_cached_dir(struct cached_fid *cfid); static void smb2_close_cached_fid(struct kref *ref); +static void cfids_laundromat_worker(struct work_struct *work); static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, const char *path, @@ -572,53 +573,46 @@ static void free_cached_dir(struct cached_fid *cfid) kfree(cfid); } -static int -cifs_cfids_laundromat_thread(void *p) +static void cfids_laundromat_worker(struct work_struct *work) { - struct cached_fids *cfids = p; + struct cached_fids *cfids; struct cached_fid *cfid, *q; - struct list_head entry; + LIST_HEAD(entry); - while (!kthread_should_stop()) { - ssleep(1); - INIT_LIST_HEAD(&entry); - if (kthread_should_stop()) - return 0; - spin_lock(&cfids->cfid_list_lock); - list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { - list_del(&cfid->entry); - list_add(&cfid->entry, &entry); - cfids->num_entries--; - } + cfids = container_of(work, struct cached_fids, laundromat_work.work); + + spin_lock(&cfids->cfid_list_lock); + list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { + if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + list_move(&cfid->entry, &entry); + cfids->num_entries--; } - spin_unlock(&cfids->cfid_list_lock); + } + spin_unlock(&cfids->cfid_list_lock); - list_for_each_entry_safe(cfid, q, &entry, entry) { - cfid->on_list = false; - list_del(&cfid->entry); + list_for_each_entry_safe(cfid, q, &entry, entry) { + cfid->on_list = false; + list_del(&cfid->entry); + /* + * Cancel and wait for the work to finish in case we are racing + * with it. + */ + cancel_work_sync(&cfid->lease_break); + if (cfid->has_lease) { /* - * Cancel, and wait for the work to finish in - * case we are racing with it. + * Our lease has not yet been cancelled from the server + * so we need to drop the reference. */ - cancel_work_sync(&cfid->lease_break); - if (cfid->has_lease) { - /* - * We lease has not yet been cancelled from - * the server so we need to drop the reference. - */ - spin_lock(&cfids->cfid_list_lock); - cfid->has_lease = false; - spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); - } + spin_lock(&cfids->cfid_list_lock); + cfid->has_lease = false; + spin_unlock(&cfids->cfid_list_lock); + kref_put(&cfid->refcount, smb2_close_cached_fid); } } - - return 0; + queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + dir_cache_timeout * HZ); } - struct cached_fids *init_cached_dirs(void) { struct cached_fids *cfids; @@ -629,19 +623,10 @@ struct cached_fids *init_cached_dirs(void) spin_lock_init(&cfids->cfid_list_lock); INIT_LIST_HEAD(&cfids->entries); - /* - * since we're in a cifs function already, we know that - * this will succeed. No need for try_module_get(). - */ - __module_get(THIS_MODULE); - cfids->laundromat = kthread_run(cifs_cfids_laundromat_thread, - cfids, "cifsd-cfid-laundromat"); - if (IS_ERR(cfids->laundromat)) { - cifs_dbg(VFS, "Failed to start cfids laundromat thread.\n"); - kfree(cfids); - module_put(THIS_MODULE); - return NULL; - } + INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker); + queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + dir_cache_timeout * HZ); + return cfids; } @@ -657,11 +642,7 @@ void free_cached_dirs(struct cached_fids *cfids) if (cfids == NULL) return; - if (cfids->laundromat) { - kthread_stop(cfids->laundromat); - cfids->laundromat = NULL; - module_put(THIS_MODULE); - } + cancel_delayed_work_sync(&cfids->laundromat_work); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index a82ff2cea789..81ba0fd5cc16 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -57,7 +57,7 @@ struct cached_fids { spinlock_t cfid_list_lock; int num_entries; struct list_head entries; - struct task_struct *laundromat; + struct delayed_work laundromat_work; }; extern struct cached_fids *init_cached_dirs(void); -- cgit From 81ba10959970d15c388bf29866b01b62f387e6a3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 9 Oct 2023 17:37:40 -0300 Subject: smb: client: prevent new fids from being removed by laundromat Check if @cfid->time is set in laundromat so we guarantee that only fully cached fids will be selected for removal. While we're at it, add missing locks to protect access of @cfid fields in order to avoid races with open_cached_dir() and cfids_laundromat_worker(), respectively. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 56 +++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index a9e5d3b7e9a0..fe1bf5b6e0cb 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -170,15 +170,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -ENOENT; } /* - * At this point we either have a lease already and we can just - * return it. If not we are guaranteed to be the only thread accessing - * this cfid. + * Return cached fid if it has a lease. Otherwise, it is either a new + * entry or laundromat worker removed it from @cfids->entries. Caller + * will put last reference if the latter. */ + spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease) { + spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; kfree(utf16_path); return 0; } + spin_unlock(&cfids->cfid_list_lock); /* * Skip any prefix paths in @path as lookup_positive_unlocked() ends up @@ -295,9 +298,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, goto oshr_free; } } + spin_lock(&cfids->cfid_list_lock); cfid->dentry = dentry; cfid->time = jiffies; cfid->has_lease = true; + spin_unlock(&cfids->cfid_list_lock); oshr_free: kfree(utf16_path); @@ -306,24 +311,28 @@ oshr_free: free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); spin_lock(&cfids->cfid_list_lock); - if (rc && !cfid->has_lease) { - if (cfid->on_list) { - list_del(&cfid->entry); - cfid->on_list = false; - cfids->num_entries--; + if (!cfid->has_lease) { + if (rc) { + if (cfid->on_list) { + list_del(&cfid->entry); + cfid->on_list = false; + cfids->num_entries--; + } + rc = -ENOENT; + } else { + /* + * We are guaranteed to have two references at this + * point. One for the caller and one for a potential + * lease. Release the Lease-ref so that the directory + * will be closed when the caller closes the cached + * handle. + */ + spin_unlock(&cfids->cfid_list_lock); + kref_put(&cfid->refcount, smb2_close_cached_fid); + goto out; } - rc = -ENOENT; } spin_unlock(&cfids->cfid_list_lock); - if (!rc && !cfid->has_lease) { - /* - * We are guaranteed to have two references at this point. - * One for the caller and one for a potential lease. - * Release the Lease-ref so that the directory will be closed - * when the caller closes the cached handle. - */ - kref_put(&cfid->refcount, smb2_close_cached_fid); - } if (rc) { if (cfid->is_open) SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, @@ -331,7 +340,7 @@ oshr_free: free_cached_dir(cfid); cfid = NULL; } - +out: if (rc == 0) { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -583,15 +592,18 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + if (cfid->time && + time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + cfid->on_list = false; list_move(&cfid->entry, &entry); cfids->num_entries--; + /* To prevent race with smb2_cached_lease_break() */ + kref_get(&cfid->refcount); } } spin_unlock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &entry, entry) { - cfid->on_list = false; list_del(&cfid->entry); /* * Cancel and wait for the work to finish in case we are racing @@ -608,6 +620,8 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_unlock(&cfids->cfid_list_lock); kref_put(&cfid->refcount, smb2_close_cached_fid); } + /* Drop the extra reference opened above */ + kref_put(&cfid->refcount, smb2_close_cached_fid); } queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); -- cgit From 18164f66e6c59fda15c198b371fa008431efdb22 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:52 -0700 Subject: x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can constrain which xfeatures are saved into the userspace buffer without having to modify the user_xfeatures field in KVM's guest_fpu state. KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to guest must not show up in the effective xstate_bv field of the buffer. Saving only the guest-supported xfeatures allows userspace to load the saved state on a different host with a fewer xfeatures, so long as the target host supports the xfeatures that are exposed to the guest. KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to the set of guest-supported xfeatures, but doing so broke KVM's historical ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that are supported by the *host*. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 3 ++- arch/x86/kernel/fpu/core.c | 5 +++-- arch/x86/kernel/fpu/xstate.c | 7 +++++-- arch/x86/kernel/fpu/xstate.h | 3 ++- arch/x86/kvm/x86.c | 21 +++++++++------------ 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 31089b851c4f..a2be3aefff9f 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { static inline void fpu_sync_guest_vmexit_xfd_state(void) { } #endif -extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); static inline void fpstate_set_confidential(struct fpu_guest *gfpu) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a86d37052a64..a21a4d0ecc34 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, - unsigned int size, u32 pkru) + unsigned int size, u64 xfeatures, u32 pkru) { struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { - __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); + __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, + XSTATE_COPY_XSAVE); } else { memcpy(&ustate->fxsave, &kstate->regs.fxsave, sizeof(ustate->fxsave)); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cadf68737e6b..76408313ed7f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy + * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * @@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode) + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; @@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= fpstate->user_xfeatures; + header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } @@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, + tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index a4ecb04d8d64..3518fb26d06b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -43,7 +43,8 @@ enum xstate_copy_mode { struct membuf; extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode); + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..41d8e6c8570c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5382,26 +5382,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) + +static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - guest_xsave->region, - sizeof(guest_xsave->region), + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, + vcpu->arch.guest_fpu.fpstate->user_xfeatures, vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; - - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - state, size, vcpu->arch.pkru); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, -- cgit From 8647c52e9504c99752a39f1d44f6268f82c40a5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:53 -0700 Subject: KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2} Mask off xfeatures that aren't exposed to the guest only when saving guest state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly. Preserving the maximal set of xfeatures in user_xfeatures restores KVM's ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") allowed userspace to load xfeatures that are supported by the host, irrespective of what xfeatures are exposed to the guest. There is no known use case where userspace *intentionally* loads xfeatures that aren't exposed to the guest, but the bug fixed by commit ad856280ddea was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't exposed to the guest, e.g. would lead to userspace unintentionally loading guest-unsupported xfeatures when live migrating a VM. Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially problematic for QEMU-based setups, as QEMU has a bug where instead of terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops loading guest state, i.e. resumes the guest after live migration with incomplete guest state, and ultimately results in guest data corruption. Note, letting userspace restore all host-supported xfeatures does not fix setups where a VM is migrated from a host *without* commit ad856280ddea, to a target with a subset of host-supported xfeatures. However there is no way to safely address that scenario, e.g. KVM could silently drop the unsupported features, but that would be a clear violation of KVM's ABI and so would require userspace to opt-in, at which point userspace could simply be updated to sanitize the to-be-loaded XSAVE state. Reported-by: Tyler Stachecki Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") Cc: stable@vger.kernel.org Cc: Leonardo Bras Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Message-Id: <20230928001956.924301-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/fpu/xstate.c | 5 +---- arch/x86/kvm/cpuid.c | 8 -------- arch/x86/kvm/x86.c | 18 ++++++++++++++++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 76408313ed7f..ef6906107c54 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1539,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - - if (!guest_fpu) - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; - + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0544e30b4946..773132c3bf5a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - /* - * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if - * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't - * supported by the host. - */ - vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | - XFEATURE_MASK_FPSSE; - kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41d8e6c8570c..1e645f5b1e2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5386,12 +5386,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, u8 *state, unsigned int size) { + /* + * Only copy state for features that are enabled for the guest. The + * state itself isn't problematic, but setting bits in the header for + * features that are supported in *this* host but not exposed to the + * guest can result in KVM_SET_XSAVE failing when live migrating to a + * compatible host without the features that are NOT exposed to the + * guest. + * + * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if + * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't + * supported by the host. + */ + u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | + XFEATURE_MASK_FPSSE; + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, - vcpu->arch.guest_fpu.fpstate->user_xfeatures, - vcpu->arch.pkru); + supported_xcr0, vcpu->arch.pkru); } static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, -- cgit From 60d351f18f7a8acd08964ef1d5c948354a7907be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:54 -0700 Subject: KVM: selftests: Touch relevant XSAVE state in guest for state test Modify support XSAVE state in the "state test's" guest code so that saving and loading state via KVM_{G,S}ET_XSAVE actually does something useful, i.e. so that xstate_bv in XSAVE state isn't empty. Punt on BNDCSR for now, it's easier to just stuff that xfeature from the host side. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 14 ++++ tools/testing/selftests/kvm/x86_64/state_test.c | 77 ++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4fd042112526..6f66861175ad 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -68,6 +68,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4c4925a8ab45..df3e93df4343 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + xsetbv(0, xgetbv(0) | supported_xcr0); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { -- cgit From 77709820787355f45755fe454e26fca8584ecf40 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:55 -0700 Subject: KVM: selftests: Load XSAVE state into untouched vCPU during state test Expand x86's state test to load XSAVE state into a "dummy" vCPU prior to KVM_SET_CPUID2, and again with an empty guest CPUID model. Except for off-by-default features, i.e. AMX, KVM's ABI for KVM_SET_XSAVE is that userspace is allowed to load xfeatures so long as they are supported by the host. This is a regression test for a combination of KVM bugs where the state saved by KVM_GET_XSAVE{2} could not be loaded via KVM_SET_XSAVE if the saved xstate_bv would load guest-unsupported xfeatures. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/state_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index df3e93df4343..115b2cdf9279 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -231,9 +231,9 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -286,6 +286,21 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + */ + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); -- cgit From 87e3ca055cdc6c63fb82b9bec7c370405d03f6ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:56 -0700 Subject: KVM: selftests: Force load all supported XSAVE state in state test Extend x86's state to forcefully load *all* host-supported xfeatures by modifying xstate_bv in the saved state. Stuffing xstate_bv ensures that the selftest is verifying KVM's full ABI regardless of whether or not the guest code is successful in getting various xfeatures out of their INIT state, e.g. see the disaster that is/was MPX. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 9 +++++++++ tools/testing/selftests/kvm/x86_64/state_test.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6f66861175ad..25bc61dac5fb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -922,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) !kvm_cpu_has(feature.anti_feature); } +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 115b2cdf9279..88b58aab7207 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -230,6 +230,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; @@ -294,12 +295,25 @@ int main(int argc, char *argv[]) * allow KVM_SET_XSAVE regardless of guest CPUID. Manually * load only XSAVE state, MSRs in particular have a much more * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); vcpu_init_cpuid(vcpuN, &empty_cpuid); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); kvm_x86_state_cleanup(state); -- cgit From b65235f6e102354ccafda601eaa1c5bef5284d21 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:51 +0300 Subject: x86: KVM: SVM: always update the x2avic msr interception The following problem exists since x2avic was enabled in the KVM: svm_set_x2apic_msr_interception is called to enable the interception of the x2apic msrs. In particular it is called at the moment the guest resets its apic. Assuming that the guest's apic was in x2apic mode, the reset will bring it back to the xapic mode. The svm_set_x2apic_msr_interception however has an erroneous check for '!apic_x2apic_mode()' which prevents it from doing anything in this case. As a result of this, all x2apic msrs are left unintercepted, and that exposes the bare metal x2apic (if enabled) to the guest. Oops. Remove the erroneous '!apic_x2apic_mode()' check to fix that. This fixes CVE-2023-5090 Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Suravee Suthikulpanit Tested-by: Suravee Suthikulpanit Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410..acdd0b89e471 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) if (intercept == svm->x2avic_msrs_intercepted) return; - if (!x2avic_enabled || - !apic_x2apic_mode(svm->vcpu.arch.apic)) + if (!x2avic_enabled) return; for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { -- cgit From 2dcf37abf9d3aab7f975002d29fc7c17272def38 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:52 +0300 Subject: x86: KVM: SVM: add support for Invalid IPI Vector interception In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code: "Invalid IPI Vector - The vector for the specified IPI was set to an illegal value (VEC < 16)" Note that tests on Zen2 machine show that this VM exit doesn't happen and instead AVIC just does nothing. Add support for this exit code by doing nothing, instead of filling the kernel log with errors. Also replace an unthrottled 'pr_err()' if another unknown incomplete IPI exit happens with vcpu_unimpl() (e.g in case AMD adds yet another 'Invalid IPI' exit reason) Cc: Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/avic.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 19bf955b67e0..3ac0ffc4f3e2 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -268,6 +268,7 @@ enum avic_ipi_failure_cause { AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, AVIC_IPI_FAILURE_INVALID_TARGET, AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, }; #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 2092db892d7d..4b74ea91f4e6 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); break; + case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR: + /* Invalid IPI with vector < 16 */ + break; default: - pr_err("Unknown IPI interception\n"); + vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n"); } return 1; -- cgit From 3fdc6087df3be73a212a81ce5dd6516638568806 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:53 +0300 Subject: x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested() svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested mode and thus should end the local inhibition of AVIC on this vCPU. Failure to do so, can lead to hangs on guest reboot. Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the current vCPU in this case. Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index dd496c9e5f91..3fea8c47679e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu) nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); + + if (kvm_apicv_activated(vcpu->kvm)) + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); -- cgit From 3e9346734661cc20bd77aeb43dbf4e5f46a2c16e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Oct 2023 13:28:54 -0500 Subject: KVM: SVM: Fix build error when using -Werror=unused-but-set-variable Commit 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") introduced a local variable used for the rdmsr() function for the high 32-bits of the MSR value. This variable is not used after being set and triggers a warning or error, when treating warnings as errors, when the unused-but-set-variable flag is set. Mark this variable as __maybe_unused to fix this. Fixes: 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") Signed-off-by: Tom Lendacky Reviewed-by: Sean Christopherson Message-Id: <0da9874b6e9fcbaaa5edeb345d7e2a7c859fc818.1696271334.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index acdd0b89e471..beea99c8e8e0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -691,7 +691,7 @@ static int svm_hardware_enable(void) */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { struct sev_es_save_area *hostsa; - u32 msr_hi; + u32 __maybe_unused msr_hi; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); -- cgit From 60197a4631b907b30343e25af702257d92f359cf Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Oct 2023 12:16:17 +0530 Subject: KVM: arm64: pmu: Drop redundant check for non-NULL kvm_pmu_events There is an allocated and valid struct kvm_pmu_events for each cpu on the system via DEFINE_PER_CPU(). Hence there cannot be a NULL pointer accessed via this_cpu_ptr() in the helper kvm_get_pmu_events(). Hence non-NULL check for pmu in such places are redundant and can be dropped. Cc: Marc Zyngier Cc: Oliver Upton Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012064617.897346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 0eea225fd09a..a243934c5568 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu) + if (!kvm_arm_support_pmu_v3()) return; pmu->events_host &= ~clr; -- cgit From e2145c99b53ec88105c69bbbb577265660d08c69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Oct 2023 11:25:40 -0400 Subject: KVM: MIPS: fix -Wunused-but-set-variable warning The variable is completely unused, remove it. Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 7b2ac1319d70..467ee6b95ae1 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, gfn_t gfn = gpa >> PAGE_SHIFT; int srcu_idx, err; kvm_pfn_t pfn; - pte_t *ptep, entry, old_pte; + pte_t *ptep, entry; bool writeable; unsigned long prot_bits; unsigned long mmu_seq; @@ -664,7 +664,6 @@ retry: entry = pfn_pte(pfn, __pgprot(prot_bits)); /* Write the PTE */ - old_pte = *ptep; set_pte(ptep, entry); err = 0; -- cgit From 0fd76865006dfdc3cdc6dade538ca2257a847364 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:58 +0100 Subject: KVM: arm64: Add nPIR{E0}_EL1 to HFG traps nPIR_EL1 and nPIREO_EL1 are part of the 'reverse polarity' set of bits, set them so that we disable the traps for a guest. Unfortunately, these bits are not yet described in the ARM ARM, but only live in the XML description. Also add them to the NV FGT forwarding infrastructure. Signed-off-by: Joey Gouly Fixes: e930694e6145 ("KVM: arm64: Restructure FGT register switching") Cc: Oliver Upton [maz: add entries to the NV FGT array, commit message update] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-2-joey.gouly@arm.com --- arch/arm64/include/asm/kvm_arm.h | 4 ++-- arch/arm64/kvm/emulate-nested.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5882b2415596..1095c6647e96 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,14 +344,14 @@ */ #define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) #define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGITR_EL2_RES0 GENMASK(63, 57) #define __HFGITR_EL2_MASK GENMASK(54, 0) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 9ced1bf0c2b7..ee902ff2a50f 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -977,6 +977,8 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), + SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), -- cgit From 839d90357b7ce6b129045d28ac22bd3a247e2350 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:59 +0100 Subject: KVM: arm64: POR{E0}_EL1 do not need trap handlers These will not be trapped by KVM, so don't need a handler. Signed-off-by: Joey Gouly Cc: Marc Zyngier Cc: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-3-joey.gouly@arm.com --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e92ec810d449..0afd6136e275 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, - { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, - { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, -- cgit From e001d1447cd4585d7f23a44ff668ba2bc624badb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 15:24:17 +0300 Subject: fs: factor out vfs_parse_monolithic_sep() helper Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), so filesystems could use it with a custom option separator callback. Acked-by: Christian Brauner Signed-off-by: Amir Goldstein --- fs/fs_context.c | 34 +++++++++++++++++++++++++++++----- include/linux/fs_context.h | 2 ++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index a0ad7a0c4680..98589aae5208 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, EXPORT_SYMBOL(vfs_parse_fs_string); /** - * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse + * @sep: callback for separating next option * - * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be - * called from the ->monolithic_mount_data() fs_context operation. + * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom + * option separator callback. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ -int generic_parse_monolithic(struct fs_context *fc, void *data) +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)) { char *options = data, *key; int ret = 0; @@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) if (ret) return ret; - while ((key = strsep(&options, ",")) != NULL) { + while ((key = sep(&options)) != NULL) { if (*key) { size_t v_len = 0; char *value = strchr(key, '='); @@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) return ret; } +EXPORT_SYMBOL(vfs_parse_monolithic_sep); + +static char *vfs_parse_comma_sep(char **s) +{ + return strsep(s, ","); +} + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @fc: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); +} EXPORT_SYMBOL(generic_parse_monolithic); /** diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 96332db693d5..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -136,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit From c34706acf40b43dd31f67c92c5a95d39666a1eb3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 16:08:28 +0300 Subject: ovl: fix regression in parsing of mount options with escaped comma Ever since commit 91c77947133f ("ovl: allow filenames with comma"), the following example was legit overlayfs mount options: mount -t overlay overlay -o 'lowerdir=/tmp/a\,b/lower' /mnt The conversion to new mount api moved to using the common helper generic_parse_monolithic() and discarded the specialized ovl_next_opt() option separator. Bring back ovl_next_opt() and use vfs_parse_monolithic_sep() to fix the regression. Reported-by: Ryan Hendrickson Closes: https://lore.kernel.org/r/8da307fb-9318-cf78-8a27-ba5c5a0aef6d@alum.mit.edu/ Fixes: 1784fbc2ed9c ("ovl: port to new mount api") Signed-off-by: Amir Goldstein --- fs/overlayfs/params.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 95b751507ac8..17c74ef4f089 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -157,6 +157,34 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { {} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + +static int ovl_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, ovl_next_opt); +} + static ssize_t ovl_parse_param_split_lowerdirs(char *str) { ssize_t nr_layers = 1, nr_colons = 0; @@ -682,6 +710,7 @@ static int ovl_reconfigure(struct fs_context *fc) } static const struct fs_context_operations ovl_context_ops = { + .parse_monolithic = ovl_parse_monolithic, .parse_param = ovl_parse_param, .get_tree = ovl_get_tree, .reconfigure = ovl_reconfigure, -- cgit From 9404673293b065cbb16b8915530147cac7e80b4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Aug 2023 13:18:10 +0100 Subject: KVM: arm64: timers: Correctly handle TGE flip with CNTPOFF_EL2 Contrary to common belief, HCR_EL2.TGE has a direct and immediate effect on the way the EL0 physical counter is offset. Flipping TGE from 1 to 0 while at EL2 immediately changes the way the counter compared to the CVAL limit. This means that we cannot directly save/restore the guest's view of CVAL, but that we instead must treat it as if CNTPOFF didn't exist. Only in the world switch, once we figure out that we do have CNTPOFF, can we must the offset back and forth depending on the polarity of TGE. Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer") Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 13 +++--------- arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++++++++++ include/kvm/arm_arch_timer.h | 7 +++++++ 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6dcdae4d38cb..a1e24228aaaa 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; -static bool has_cntpoff(void) -{ - return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); -} - static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { if (vcpu_has_nv(vcpu)) { if (is_hyp_ctxt(vcpu)) { @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx) timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); - if (!has_cntpoff()) - cval -= timer_get_offset(ctx); + cval -= timer_get_offset(ctx); timer_set_cval(ctx, cval); @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); - if (!has_cntpoff()) - cval += offset; + cval += offset; write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8c..448b17080d36 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); + if (has_cntpoff()) { + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * We're entrering the guest. Reload the correct + * values from memory now that TGE is clear. + */ + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); + + if (map.direct_ptimer) { + write_sysreg_el0(val, SYS_CNTP_CVAL); + isb(); + } + } + val = read_sysreg(cpacr_el1); val |= CPACR_ELx_TTA; val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | @@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (has_cntpoff()) { + struct timer_map map; + u64 val, offset; + + get_timer_map(vcpu, &map); + + /* + * We're exiting the guest. Save the latest CVAL value + * to memory and apply the offset now that TGE is set. + */ + val = read_sysreg_el0(SYS_CNTP_CVAL); + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + + offset = read_sysreg_s(SYS_CNTPOFF_EL2); + + if (map.direct_ptimer && offset) { + write_sysreg_el0(val + offset, SYS_CNTP_CVAL); + isb(); + } + } + /* * ARM errata 1165522 and 1530923 require the actual execution of the * above before we can switch to the EL2/EL0 translation regime used by diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index bb3cb005873e..e748bc957d83 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -82,6 +82,8 @@ struct timer_map { struct arch_timer_context *emul_ptimer; }; +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map); + struct arch_timer_cpu { struct arch_timer_context timers[NR_KVM_TIMERS]; @@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt); void kvm_timer_cpu_up(void); void kvm_timer_cpu_down(void); +static inline bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + #endif -- cgit From 7b821db95140e2c118567aee22a78bf85f3617e0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 2 Oct 2023 16:54:06 -0700 Subject: drm/bridge: ti-sn65dsi86: Associate DSI device lifetime with auxiliary device The kernel produces a warning splat and the DSI device fails to register in this driver if the i2c driver probes, populates child auxiliary devices, and then somewhere in ti_sn_bridge_probe() a function call returns -EPROBE_DEFER. When the auxiliary driver probe defers, the dsi device created by devm_mipi_dsi_device_register_full() is left registered because the devm managed device used to manage the lifetime of the DSI device is the parent i2c device, not the auxiliary device that is being probed. Associate the DSI device created and managed by this driver to the lifetime of the auxiliary device, not the i2c device, so that the DSI device is removed when the auxiliary driver unbinds. Similarly change the device pointer used for dev_err_probe() so the deferred probe errors are associated with the auxiliary device instead of the parent i2c device so we can narrow down future problems faster. Cc: Douglas Anderson Cc: Maxime Ripard Fixes: c3b75d4734cb ("drm/bridge: sn65dsi86: Register and attach our DSI device at probe") Signed-off-by: Stephen Boyd Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231002235407.769399-1-swboyd@chromium.org --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f448b903e190..84148a79414b 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -692,7 +692,7 @@ static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) return container_of(bridge, struct ti_sn65dsi86, bridge); } -static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) +static int ti_sn_attach_host(struct auxiliary_device *adev, struct ti_sn65dsi86 *pdata) { int val; struct mipi_dsi_host *host; @@ -707,7 +707,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) if (!host) return -EPROBE_DEFER; - dsi = devm_mipi_dsi_device_register_full(dev, host, &info); + dsi = devm_mipi_dsi_device_register_full(&adev->dev, host, &info); if (IS_ERR(dsi)) return PTR_ERR(dsi); @@ -725,7 +725,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) pdata->dsi = dsi; - return devm_mipi_dsi_attach(dev, dsi); + return devm_mipi_dsi_attach(&adev->dev, dsi); } static int ti_sn_bridge_attach(struct drm_bridge *bridge, @@ -1298,9 +1298,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, struct device_node *np = pdata->dev->of_node; int ret; - pdata->next_bridge = devm_drm_of_get_bridge(pdata->dev, np, 1, 0); + pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); if (IS_ERR(pdata->next_bridge)) - return dev_err_probe(pdata->dev, PTR_ERR(pdata->next_bridge), + return dev_err_probe(&adev->dev, PTR_ERR(pdata->next_bridge), "failed to create panel bridge\n"); ti_sn_bridge_parse_lanes(pdata, np); @@ -1319,9 +1319,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, drm_bridge_add(&pdata->bridge); - ret = ti_sn_attach_host(pdata); + ret = ti_sn_attach_host(adev, pdata); if (ret) { - dev_err_probe(pdata->dev, ret, "failed to attach dsi host\n"); + dev_err_probe(&adev->dev, ret, "failed to attach dsi host\n"); goto err_remove_bridge; } -- cgit From ad3e33fe071dffea07279f96dab4f3773c430fe2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 25 Sep 2023 15:00:11 -0700 Subject: drm/panel: Move AUX B116XW03 out of panel-edp back to panel-simple In commit 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") I moved a pile of panels out of panel-simple driver into the newly created panel-edp driver. One of those panels, however, shouldn't have been moved. As is clear from commit e35e305eff0f ("drm/panel: simple: Add AUO B116XW03 panel support"), AUX B116XW03 is an LVDS panel. It's used in exynos5250-snow and exynos5420-peach-pit where it's clear that the panel is hooked up with LVDS. Furthermore, searching for datasheets I found one that makes it clear that this panel is LVDS. As far as I can tell, I got confused because in commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Jitao Shi added "DRM_MODE_CONNECTOR_eDP". That seems wrong. Looking at the downstream ChromeOS trees, it seems like some Mediatek boards are using a panel that they call "auo,b116xw03" that's an eDP panel. The best I can guess is that they actually have a different panel that has similar timing. If so then the proper panel should be used or they should switch to the generic "edp-panel" compatible. When moving this back to panel-edp, I wasn't sure what to use for .bus_flags and .bus_format and whether to add the extra "enable" delay from commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on"). I've added formats/flags/delays based on my (inexpert) analysis of the datasheet. These are untested. NOTE: if/when this is backported to stable, we might run into some trouble. Specifically, before 474c162878ba ("arm64: dts: mt8183: jacuzzi: Move panel under aux-bus") this panel was used by "mt8183-kukui-jacuzzi", which assumed it was an eDP panel. I don't know what to suggest for that other than someone making up a bogus panel for jacuzzi that's just for the stable channel. Fixes: 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Fixes: 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") Tested-by: Anton Bambura Acked-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230925150010.1.Iff672233861bcc4cf25a7ad0a81308adc3bda8a4@changeid --- drivers/gpu/drm/panel/panel-edp.c | 29 ----------------------------- drivers/gpu/drm/panel/panel-simple.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index feb665df35a1..95c8472d878a 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -976,32 +976,6 @@ static const struct panel_desc auo_b116xak01 = { }, }; -static const struct drm_display_mode auo_b116xw03_mode = { - .clock = 70589, - .hdisplay = 1366, - .hsync_start = 1366 + 40, - .hsync_end = 1366 + 40 + 40, - .htotal = 1366 + 40 + 40 + 32, - .vdisplay = 768, - .vsync_start = 768 + 10, - .vsync_end = 768 + 10 + 12, - .vtotal = 768 + 10 + 12 + 6, - .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, -}; - -static const struct panel_desc auo_b116xw03 = { - .modes = &auo_b116xw03_mode, - .num_modes = 1, - .bpc = 6, - .size = { - .width = 256, - .height = 144, - }, - .delay = { - .enable = 400, - }, -}; - static const struct drm_display_mode auo_b133han05_mode = { .clock = 142600, .hdisplay = 1920, @@ -1725,9 +1699,6 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b116xa01", .data = &auo_b116xak01, - }, { - .compatible = "auo,b116xw03", - .data = &auo_b116xw03, }, { .compatible = "auo,b133han05", .data = &auo_b133han05, diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 95959dcc6e0e..dd7928d9570f 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -919,6 +919,38 @@ static const struct panel_desc auo_b101xtn01 = { }, }; +static const struct drm_display_mode auo_b116xw03_mode = { + .clock = 70589, + .hdisplay = 1366, + .hsync_start = 1366 + 40, + .hsync_end = 1366 + 40 + 40, + .htotal = 1366 + 40 + 40 + 32, + .vdisplay = 768, + .vsync_start = 768 + 10, + .vsync_end = 768 + 10 + 12, + .vtotal = 768 + 10 + 12 + 6, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static const struct panel_desc auo_b116xw03 = { + .modes = &auo_b116xw03_mode, + .num_modes = 1, + .bpc = 6, + .size = { + .width = 256, + .height = 144, + }, + .delay = { + .prepare = 1, + .enable = 200, + .disable = 200, + .unprepare = 500, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing auo_g070vvn01_timings = { .pixelclock = { 33300000, 34209000, 45000000 }, .hactive = { 800, 800, 800 }, @@ -4102,6 +4134,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b101xtn01", .data = &auo_b101xtn01, + }, { + .compatible = "auo,b116xw03", + .data = &auo_b116xw03, }, { .compatible = "auo,g070vvn01", .data = &auo_g070vvn01, -- cgit From 13cc9ee8f8ed58e563294d87d74a62006be40f21 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Oct 2023 13:09:02 -0400 Subject: cgroup: Fix incorrect css_set_rwsem reference in comment Since commit f0d9a5f17575 ("cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock"), css_set_rwsem has been replaced by css_set_lock. That commit, however, missed the css_set_rwsem reference in include/linux/cgroup-defs.h. Fix that by changing it to css_set_lock as well. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f1b3151ac30b..265da00a1a8b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -238,7 +238,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; -- cgit From bd9e7326b8d512ee724006d4ec06dfbf3096ae9e Mon Sep 17 00:00:00 2001 From: WangJinchao Date: Thu, 12 Oct 2023 15:17:38 +0800 Subject: workqueue: doc: Fix function and sysfs path errors alloc_ordered_queue -> alloc_ordered_workqueue /sys/devices/virtual/WQ_NAME/ -> /sys/devices/virtual/workqueue/WQ_NAME/ Signed-off-by: WangJinchao Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 4 ++-- Documentation/translations/zh_CN/core-api/workqueue.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index 5d7b01aed1fe..0046af06531a 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -244,7 +244,7 @@ unbound worker-pools and only one work item could be active at any given time thus achieving the same ordering property as ST wq. In the current implementation the above configuration only guarantees -ST behavior within a given NUMA node. Instead ``alloc_ordered_queue()`` should +ST behavior within a given NUMA node. Instead ``alloc_ordered_workqueue()`` should be used to achieve system-wide ST behavior. @@ -390,7 +390,7 @@ The default affinity scope can be changed with the module parameter scope can be changed using ``apply_workqueue_attrs()``. If ``WQ_SYSFS`` is set, the workqueue will have the following affinity scope -related interface files under its ``/sys/devices/virtual/WQ_NAME/`` +related interface files under its ``/sys/devices/virtual/workqueue/WQ_NAME/`` directory. ``affinity_scope`` diff --git a/Documentation/translations/zh_CN/core-api/workqueue.rst b/Documentation/translations/zh_CN/core-api/workqueue.rst index 6c1b5ec31d75..7fac6f75d078 100644 --- a/Documentation/translations/zh_CN/core-api/workqueue.rst +++ b/Documentation/translations/zh_CN/core-api/workqueue.rst @@ -202,7 +202,7 @@ workqueue将自动创建与属性相匹配的后备工作者池。调节并发 同的排序属性。 在目前的实现中,上述配置只保证了特定NUMA节点内的ST行为。相反, -``alloc_ordered_queue()`` 应该被用来实现全系统的ST行为。 +``alloc_ordered_workqueue()`` 应该被用来实现全系统的ST行为。 执行场景示例 -- cgit From 7b42f401fc6571b6604441789d892d440829e33c Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 11 Oct 2023 16:27:59 +0800 Subject: workqueue: Use the kmem_cache_free() instead of kfree() to release pwq Currently, the kfree() be used for pwq objects allocated with kmem_cache_alloc() in alloc_and_link_pwqs(), this isn't wrong. but usually, use "trace_kmem_cache_alloc/trace_kmem_cache_free" to track memory allocation and free. this commit therefore use kmem_cache_free() instead of kfree() in alloc_and_link_pwqs() and also consistent with release of the pwq in rcu_free_pwq(). Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ebe24a5e1435..6f74cab2bd5a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4610,8 +4610,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) enomem: if (wq->cpu_pwq) { - for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(wq->cpu_pwq, cpu)); + for_each_possible_cpu(cpu) { + struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); + + if (pwq) + kmem_cache_free(pwq_cache, pwq); + } free_percpu(wq->cpu_pwq); wq->cpu_pwq = NULL; } -- cgit From f2cab4b318ee8023f4ad640b906ae268942a7db4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 07:02:31 -0700 Subject: drm/nouveau: exec: fix ioctl kernel-doc warning kernel-doc emits a warning: include/uapi/drm/nouveau_drm.h:49: warning: Cannot understand * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX on line 49 - I thought it was a doc line We don't have a way to document a macro value via kernel-doc, so change the "/**" kernel-doc marker to a C comment and format the comment more like a kernel-doc comment for consistency. Fixes: d59e75eef52d ("drm/nouveau: exec: report max pushs through getparam") Signed-off-by: Randy Dunlap Cc: Dave Airlie Cc: Danilo Krummrich Cc: Karol Herbst Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: Bragatheswaran Manickavel Reviewed-by: Lyude Paul Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231008140231.17921-1-rdunlap@infradead.org --- include/uapi/drm/nouveau_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index eaf9f248619f..0bade1592f34 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -45,8 +45,8 @@ extern "C" { #define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 #define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 -/** - * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX +/* + * NOUVEAU_GETPARAM_EXEC_PUSH_MAX - query max pushes through getparam * * Query the maximum amount of IBs that can be pushed through a single * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC -- cgit From 8698cb92eeece1e326a4d6a051bcf143037c4d31 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 8 Aug 2023 14:21:30 +0300 Subject: net/mlx5: Perform DMA operations in the right locations The cited patch change mlx5 driver so that during probe DMA operations were performed before pci_enable_device(), and during teardown DMA operations were performed after pci_disable_device(). DMA operations require PCI to be enabled. Hence, The above leads to the following oops in PPC systems[1]. On s390x systems, as reported by Niklas Schnelle, this is a problem because mlx5_pci_init() is where the DMA and coherent mask is set but mlx5_cmd_init() already does a dma_alloc_coherent(). Thus a DMA allocation is done during probe before the correct mask is set. This causes probe to fail initialization of the cmdif SW structs on s390x after that is converted to the common dma-iommu code. This is because on s390x DMA addresses below 4 GiB are reserved on current machines and unlike the old s390x specific DMA API implementation common code enforces DMA masks. Fix it by performing the DMA operations during probe after pci_enable_device() and after the dma mask is set, and during teardown before pci_disable_device(). [1] Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 netconsole rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm ib_uverbs ib_core mlx5_core(-) ptp pps_core fuse vmx_crypto crc32c_vpmsum [last unloaded: mlx5_ib] CPU: 1 PID: 8937 Comm: modprobe Not tainted 6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02 #1 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c000000000423388 LR: c0000000001e733c CTR: c0000000001e4720 REGS: c0000000055636d0 TRAP: 0380 Not tainted (6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02) MSR: 8000000000009033 CR: 24008884 XER: 20040000 CFAR: c0000000001e7338 IRQMASK: 0 NIP [c000000000423388] __free_pages+0x28/0x160 LR [c0000000001e733c] dma_direct_free+0xac/0x190 Call Trace: [c000000005563970] [5deadbeef0000100] 0x5deadbeef0000100 (unreliable) [c0000000055639b0] [c0000000003d46cc] kfree+0x7c/0x150 [c000000005563a40] [c0000000001e47c8] dma_free_attrs+0xa8/0x1a0 [c000000005563aa0] [c008000000d0064c] mlx5_cmd_cleanup+0xa4/0x100 [mlx5_core] [c000000005563ad0] [c008000000cf629c] mlx5_mdev_uninit+0xf4/0x140 [mlx5_core] [c000000005563b00] [c008000000cf6448] remove_one+0x160/0x1d0 [mlx5_core] [c000000005563b40] [c000000000958540] pci_device_remove+0x60/0x110 [c000000005563b80] [c000000000a35e80] device_remove+0x70/0xd0 [c000000005563bb0] [c000000000a37a38] device_release_driver_internal+0x2a8/0x330 [c000000005563c00] [c000000000a37b8c] driver_detach+0x8c/0x160 [c000000005563c40] [c000000000a35350] bus_remove_driver+0x90/0x110 [c000000005563c80] [c000000000a38948] driver_unregister+0x48/0x90 [c000000005563cf0] [c000000000957e38] pci_unregister_driver+0x38/0x150 [c000000005563d40] [c008000000eb6140] mlx5_cleanup+0x38/0x90 [mlx5_core] Fixes: 06cd555f73ca ("net/mlx5: split mlx5_cmd_init() to probe and reload routines") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Leon Romanovsky Reviewed-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 64 ++++++++++++--------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index afb348579577..c22b0ad0c870 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2186,52 +2186,23 @@ static u16 cmdif_rev(struct mlx5_core_dev *dev) int mlx5_cmd_init(struct mlx5_core_dev *dev) { - int size = sizeof(struct mlx5_cmd_prot_block); - int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; - u32 cmd_l; - int err; - - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; - err = alloc_cmd_page(dev, cmd); - if (err) - goto err_free_pool; - - cmd_l = (u32)(cmd->dma); - if (cmd_l & 0xfff) { - mlx5_core_err(dev, "invalid command queue address\n"); - err = -ENOMEM; - goto err_cmd_page; - } cmd->checksum_disabled = 1; spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); - create_msg_cache(dev); - set_wqname(dev); cmd->wq = create_singlethread_workqueue(cmd->wq_name); if (!cmd->wq) { mlx5_core_err(dev, "failed to create command workqueue\n"); - err = -ENOMEM; - goto err_cache; + return -ENOMEM; } mlx5_cmdif_debugfs_init(dev); return 0; - -err_cache: - destroy_msg_cache(dev); -err_cmd_page: - free_cmd_page(dev, cmd); -err_free_pool: - dma_pool_destroy(cmd->pool); - return err; } void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) @@ -2240,15 +2211,15 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_cleanup(dev); destroy_workqueue(cmd->wq); - destroy_msg_cache(dev); - free_cmd_page(dev, cmd); - dma_pool_destroy(cmd->pool); } int mlx5_cmd_enable(struct mlx5_core_dev *dev) { + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; u32 cmd_h, cmd_l; + int err; memset(&cmd->vars, 0, sizeof(cmd->vars)); cmd->vars.cmdif_rev = cmdif_rev(dev); @@ -2281,10 +2252,21 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); - if (WARN_ON(cmd_l & 0xfff)) - return -EINVAL; + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_cmd_page; + } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); @@ -2297,17 +2279,27 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) cmd->mode = CMD_MODE_POLLING; cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + create_msg_cache(dev); create_debugfs_files(dev); return 0; + +err_cmd_page: + free_cmd_page(dev, cmd); +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; } void mlx5_cmd_disable(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; - clean_debug_files(dev); flush_workqueue(cmd->wq); + clean_debug_files(dev); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, -- cgit From 7624e58a8b3a251e3e5108b32f2183b34453db32 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 27 Aug 2023 13:31:53 +0300 Subject: net/mlx5: E-switch, register event handler before arming the event Currently, mlx5 is registering event handler for vport context change event some time after arming the event. this can lead to missing an event, which will result in wrong rules in the FDB. Hence, register the event handler before arming the event. This solution is valid since FW is sending vport context change event only on vports which SW armed, and SW arming the vport when enabling it, which is done after the FDB has been created. Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d4cde6555063..8d0b915a3121 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1038,11 +1038,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } -static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, ESW_FUNCTIONS_CHANGED); @@ -1050,13 +1047,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) } } -static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - flush_workqueue(esw->work_queue); } @@ -1483,6 +1478,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1495,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; - mlx5_eswitch_event_handlers_register(esw); + mlx5_eswitch_event_handler_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1622,7 +1620,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) */ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); - mlx5_eswitch_event_handlers_unregister(esw); + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", -- cgit From 7a3ce8074878a68a75ceacec93d9ae05906eec86 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 9 Aug 2023 11:10:57 +0200 Subject: net/mlx5: Bridge, fix peer entry ageing in LAG mode With current implementation in single FDB LAG mode all packets are processed by eswitch 0 rules. As such, 'peer' FDB entries receive the packets for rules of other eswitches and are responsible for updating the main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their background update wq task. However, this introduces a race condition when non-zero eswitch instance decides to delete a FDB entry, sends SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task refreshes the same entry concurrently while its async delete work is still pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE event may be generated and entry will remain stuck in FDB marked as 'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are sent for deleting the peer entries. Fix the issue by synchronously marking deleted entries with MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update job. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 11 ++++++++++ .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 25 +++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 3 +++ .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0fef853eab62..5d128c5b4529 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -467,6 +467,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, /* only handle the event on peers */ if (mlx5_esw_bridge_is_local(dev, rep, esw)) break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + /* Mark for deletion to prevent the update wq task from + * spuriously refreshing the entry which would mark it again as + * offloaded in SW bridge. After this fallthrough to regular + * async delete code. + */ + mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); fallthrough; case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index e36294b7ade2..1b9bc32efd6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 entry->lastuse = jiffies; } +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED; +} + void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) @@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) unsigned long lastuse = (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); - if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | + MLX5_ESW_BRIDGE_FLAG_DELETED)) continue; if (time_after(lastuse, entry->lastuse)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index c2c7c70d99eb..d6f539161993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 4911cc32161b..7c251af566c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), + MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2), }; enum { -- cgit From 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 6 Sep 2023 21:48:30 +0300 Subject: net/mlx5: Handle fw tracer change ownership event based on MTRC Currently, whenever fw issues a change ownership event, the PF that owns the fw tracer drops its ownership directly and the other PFs try to pick up the ownership via what MTRC register suggests. In some cases, driver releases the ownership of the tracer and reacquires it later on. Whenever the driver releases ownership of the tracer, fw issues a change ownership event. This event can be delayed and come after driver has reacquired ownership of the tracer. Thus the late event will trigger the tracer owner PF to release the ownership again and lead to a scenario where no PF is owning the tracer. To prevent the scenario described above, when handling a change ownership event, do not drop ownership of the tracer directly, instead read the fw MTRC register to retrieve the up-to-date owner of the tracer and set it accordingly in driver level. Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7c0f2adbea00..ad789349c06e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -848,7 +848,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { - tracer->owner = false; + mlx5_fw_tracer_ownership_acquire(tracer); return; } -- cgit From be43b7489a3c4702799e50179da69c3df7d6899b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 2 Oct 2023 14:05:29 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq When a page allocation fails during refill in mlx5e_post_rx_mpwqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 2436.447717] WARNING: CPU: 1 PID: 2419 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 2436.447895] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.447991] Call Trace: [ 2436.447975] mlx5e_post_rx_mpwqes+0x1d5/0xcf0 [mlx5_core] [ 2436.447994] [ 2436.447996] ? __warn+0x7d/0x120 [ 2436.448009] ? mlx5e_handle_rx_cqe_mpwrq+0x109/0x1d0 [mlx5_core] [ 2436.448002] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448044] ? mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core] [ 2436.448061] ? report_bug+0x155/0x180 [ 2436.448065] ? handle_bug+0x36/0x70 [ 2436.448067] ? exc_invalid_op+0x13/0x60 [ 2436.448070] ? asm_exc_invalid_op+0x16/0x20 [ 2436.448079] mlx5e_napi_poll+0x122/0x6b0 [mlx5_core] [ 2436.448077] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448113] ? generic_exec_single+0x35/0x100 [ 2436.448117] __napi_poll+0x25/0x1a0 [ 2436.448120] net_rx_action+0x28a/0x300 [ 2436.448122] __do_softirq+0xcd/0x279 [ 2436.448126] irq_exit_rcu+0x6a/0x90 [ 2436.448128] sysvec_apic_timer_interrupt+0x6e/0x90 [ 2436.448130] This patch fixes the striding rq case by setting the skip flag on all the wqe pages that were expected to have new pages allocated. Fixes: 4c2a13236807 ("net/mlx5e: RX, Defer page release in striding rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3fd11b0761e0..7988b3a9598c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -816,6 +816,8 @@ err_unmap: mlx5e_page_release_fragmented(rq, frag_page); } + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + err: rq->stats->buff_alloc_err++; -- cgit From ef9369e9c30846f5e052a11ccc70e1f6b8dc557a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 29 Sep 2023 17:31:49 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for legacy rq When a page allocation fails during refill in mlx5e_refill_rx_wqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 338.326070] WARNING: CPU: 4 PID: 0 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 338.328993] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329094] Call Trace: [ 338.329097] [ 338.329100] ? __warn+0x7d/0x120 [ 338.329105] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329173] ? report_bug+0x155/0x180 [ 338.329179] ? handle_bug+0x3c/0x60 [ 338.329183] ? exc_invalid_op+0x13/0x60 [ 338.329187] ? asm_exc_invalid_op+0x16/0x20 [ 338.329192] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329259] mlx5e_post_rx_wqes+0x210/0x5a0 [mlx5_core] [ 338.329327] ? mlx5e_poll_rx_cq+0x88/0x6f0 [mlx5_core] [ 338.329394] mlx5e_napi_poll+0x127/0x6b0 [mlx5_core] [ 338.329461] __napi_poll+0x25/0x1a0 [ 338.329465] net_rx_action+0x28a/0x300 [ 338.329468] __do_softirq+0xcd/0x279 [ 338.329473] irq_exit_rcu+0x6a/0x90 [ 338.329477] common_interrupt+0x82/0xa0 [ 338.329482] This patch fixes the legacy rq case by releasing all allocated fragments and then setting the skip flag on all released fragments. It is important to note that the number of released fragments will be higher than the number of allocated fragments when an allocation error occurs. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7988b3a9598c..8d9743a5e42c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -457,26 +457,41 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { int remaining = wqe_bulk; - int i = 0; + int total_alloc = 0; + int refill_alloc; + int refill; /* The WQE bulk is split into smaller bulks that are sized * according to the page pool cache refill size to avoid overflowing * the page pool cache due to too many page releases at once. */ do { - int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - int alloc_count; + refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - mlx5e_free_rx_wqes(rq, ix + i, refill); - alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); - i += alloc_count; - if (unlikely(alloc_count != refill)) - break; + mlx5e_free_rx_wqes(rq, ix + total_alloc, refill); + refill_alloc = mlx5e_alloc_rx_wqes(rq, ix + total_alloc, refill); + if (unlikely(refill_alloc != refill)) + goto err_free; + total_alloc += refill_alloc; remaining -= refill; } while (remaining); - return i; + return total_alloc; + +err_free: + mlx5e_free_rx_wqes(rq, ix, total_alloc + refill_alloc); + + for (int i = 0; i < total_alloc + refill; i++) { + int j = mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + + frag = get_frag(rq, j); + for (int k = 0; k < rq->wqe.info.num_frags; k++, frag++) + frag->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return 0; } static void -- cgit From aaab619ccd07a32e5b29aa7e59b20de1dcc7a29e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 25 Sep 2023 17:50:18 +0300 Subject: net/mlx5e: XDP, Fix XDP_REDIRECT mpwqe page fragment leaks on shutdown When mlx5e_xdp_xmit is called without the XDP_XMIT_FLUSH set it is possible that it leaves a mpwqe session open. That is ok during runtime: the session will be closed on the next call to mlx5e_xdp_xmit. But having a mpwqe session still open at XDP sq close time is problematic: the pc counter is not updated before flushing the contents of the xdpi_fifo. This results in leaking page fragments. The fix is to always close the mpwqe session at the end of mlx5e_xdp_xmit, regardless of the XDP_XMIT_FLUSH flag being set or not. Fixes: 5e0d2eef771e ("net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 12f56d0db0af..8bed17d8fe56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -874,11 +874,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } out: - if (flags & XDP_XMIT_FLUSH) { - if (sq->mpwqe.wqe) - mlx5e_xdp_mpwqe_complete(sq); + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + if (flags & XDP_XMIT_FLUSH) mlx5e_xmit_xdp_doorbell(sq); - } return nxmit; } -- cgit From c51c673462a266fb813cf189f8190798a12d3124 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 12 Sep 2023 10:06:24 +0300 Subject: net/mlx5e: Take RTNL lock before triggering netdev notifiers Hold RTNL lock when calling xdp_set_features() with a registered netdev, as the call triggers the netdev notifiers. This could happen when switching from nic profile to uplink representor for example. Similar logic which fixed a similar scenario was previously introduced in the following commit: commit 72cc65497065 net/mlx5e: Take RTNL lock when needed before calling xdp_set_features(). This fixes the following assertion and warning call trace: RTNL: assertion failed at net/core/dev.c (1961) WARNING: CPU: 13 PID: 2529 at net/core/dev.c:1961 call_netdevice_notifiers_info+0x7c/0x80 Modules linked in: rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 13 PID: 2529 Comm: devlink Not tainted 6.5.0_for_upstream_min_debug_2023_09_07_20_04 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:call_netdevice_notifiers_info+0x7c/0x80 Code: 8f ff 80 3d 77 0d 16 01 00 75 c5 ba a9 07 00 00 48 c7 c6 c4 bb 0d 82 48 c7 c7 18 c8 06 82 c6 05 5b 0d 16 01 01 e8 44 f6 8c ff <0f> 0b eb a2 0f 1f 44 00 00 55 48 89 e5 41 54 48 83 e4 f0 48 83 ec RSP: 0018:ffff88819930f7f0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffff8309f740 RCX: 0000000000000027 RDX: ffff88885fb5b5c8 RSI: 0000000000000001 RDI: ffff88885fb5b5c0 RBP: 0000000000000028 R08: ffff88887ffabaa8 R09: 0000000000000003 R10: ffff88887fecbac0 R11: ffff88887ff7bac0 R12: ffff88819930f810 R13: ffff88810b7fea40 R14: ffff8881154e8fd8 R15: ffff888107e881a0 FS: 00007f3ad248f800(0000) GS:ffff88885fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563b85f164e0 CR3: 0000000113b5c006 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x79/0x120 ? call_netdevice_notifiers_info+0x7c/0x80 ? report_bug+0x17c/0x190 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? call_netdevice_notifiers_info+0x7c/0x80 call_netdevice_notifiers+0x2e/0x50 mlx5e_set_xdp_feature+0x21/0x50 [mlx5_core] mlx5e_build_rep_params+0x97/0x130 [mlx5_core] mlx5e_init_ul_rep+0x9f/0x100 [mlx5_core] mlx5e_netdev_init_profile+0x76/0x110 [mlx5_core] mlx5e_netdev_attach_profile+0x1f/0x90 [mlx5_core] mlx5e_netdev_change_profile+0x92/0x160 [mlx5_core] mlx5e_vport_rep_load+0x329/0x4a0 [mlx5_core] mlx5_esw_offloads_rep_load+0x9e/0xf0 [mlx5_core] esw_offloads_enable+0x4bc/0xe90 [mlx5_core] mlx5_eswitch_enable_locked+0x3c8/0x570 [mlx5_core] ? kmalloc_trace+0x25/0x80 mlx5_devlink_eswitch_mode_set+0x224/0x680 [mlx5_core] ? devlink_get_from_attrs_lock+0x9e/0x110 devlink_nl_cmd_eswitch_set_doit+0x60/0xe0 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? devlink_get_from_attrs_lock+0x110/0x110 ? devlink_nl_cmd_eswitch_get_doit+0x290/0x290 ? devlink_pernet_pre_exit+0xf0/0xf0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2c0 netlink_sendmsg+0x232/0x4a0 sock_sendmsg+0x38/0x60 ? _copy_from_user+0x2a/0x60 __sys_sendto+0x110/0x160 ? handle_mm_fault+0x161/0x260 ? do_user_addr_fault+0x276/0x620 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3ad231340a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffd70aad4b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000c36b00 RCX:00007f3ad231340a RDX: 0000000000000038 RSI: 0000000000c36b00 RDI: 0000000000000003 RBP: 0000000000c36910 R08: 00007f3ad2625200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2fdb8895aecd..5ca9bc337dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -769,6 +769,7 @@ static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) static void mlx5e_build_rep_params(struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; @@ -794,8 +795,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + /* If netdev is already registered (e.g. move from nic profile to uplink, + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); -- cgit From 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 12 Sep 2023 02:28:47 +0000 Subject: net/mlx5e: Don't offload internal port if filter device is out device In the cited commit, if the routing device is ovs internal port, the out device is set to uplink, and packets go out after encapsulation. If filter device is uplink, it can trigger the following syndrome: mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22) Fix this issue by not offloading internal port if filter device is out device. In this case, packets are not forwarded to the root table to be processed, the termination table is used instead to forward them from uplink to uplink. Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 1730f6a716ee..b10e40e1a9c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -24,7 +24,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev)) + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) goto out; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, -- cgit From 80f1241484dd1b1d4eab1a0211d52ec2bd83e2f1 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Mon, 4 Sep 2023 18:26:47 +0300 Subject: net/mlx5e: Fix VF representors reporting zero counters to "ip -s" command Although vf_vport entry of struct mlx5e_stats is never updated, its values are mistakenly copied to the caller structure in the VF representor .ndo_get_stat_64 callback mlx5e_rep_get_stats(). Remove redundant entry and use the updated one, rep_stats, instead. Fixes: 64b68e369649 ("net/mlx5: Refactor and expand rep vport stat group") Reviewed-by: Patrisious Haddad Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 ++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ca9bc337dc6..fd1cce542b68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -701,7 +701,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) /* update HW stats in background for next time */ mlx5e_queue_update_stats(priv); - memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); + mlx5e_stats_copy_rep_stats(stats, &priv->stats.rep_stats); } static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 176fa5976259..477c547dcc04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -484,11 +484,20 @@ struct mlx5e_stats { struct mlx5e_vnic_env_stats vnic; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct rtnl_link_stats64 vf_vport; struct mlx5e_pcie_stats pcie; struct mlx5e_rep_stats rep_stats; }; +static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport, + struct mlx5e_rep_stats *rep_stats) +{ + memset(vf_vport, 0, sizeof(*vf_vport)); + vf_vport->rx_packets = rep_stats->vport_rx_packets; + vf_vport->tx_packets = rep_stats->vport_tx_packets; + vf_vport->rx_bytes = rep_stats->vport_rx_bytes; + vf_vport->tx_bytes = rep_stats->vport_tx_bytes; +} + extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c24828b688ac..c8590483ddc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4972,7 +4972,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, if (err) return err; - rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, + &priv->stats.rep_stats); break; default: NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); @@ -5012,7 +5013,7 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, u64 dbytes; u64 dpkts; - cur_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; -- cgit From d35652a5fc9944784f6f50a5c979518ff8dacf61 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Oct 2023 13:04:24 +0300 Subject: x86/alternatives: Disable KASAN in apply_alternatives() Fei has reported that KASAN triggers during apply_alternatives() on a 5-level paging machine: BUG: KASAN: out-of-bounds in rcu_is_watching() Read of size 4 at addr ff110003ee6419a0 by task swapper/0/0 ... __asan_load4() rcu_is_watching() trace_hardirqs_on() text_poke_early() apply_alternatives() ... On machines with 5-level paging, cpu_feature_enabled(X86_FEATURE_LA57) gets patched. It includes KASAN code, where KASAN_SHADOW_START depends on __VIRTUAL_MASK_SHIFT, which is defined with cpu_feature_enabled(). KASAN gets confused when apply_alternatives() patches the KASAN_SHADOW_START users. A test patch that makes KASAN_SHADOW_START static, by replacing __VIRTUAL_MASK_SHIFT with 56, works around the issue. Fix it for real by disabling KASAN while the kernel is patching alternatives. [ mingo: updated the changelog ] Fixes: 6657fca06e3f ("x86/mm: Allow to boot without LA57 if CONFIG_X86_5LEVEL=y") Reported-by: Fei Yang Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231012100424.1456-1-kirill.shutemov@linux.intel.com --- arch/x86/kernel/alternative.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 517ee01503be..73be3931e4f0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 insn_buff[MAX_PATCH_LEN]; DPRINTK(ALT, "alt table %px, -> %px", start, end); + + /* + * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using + * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. + * During the process, KASAN becomes confused seeing partial LA57 + * conversion and triggers a false-positive out-of-bound report. + * + * Disable KASAN until the patching is complete. + */ + kasan_disable_current(); + /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, text_poke_early(instr, insn_buff, insn_buff_sz); } + + kasan_enable_current(); } static inline bool is_jcc32(struct insn *insn) -- cgit From 505b02957e74f0c5c4655647ccb04bdc945d18f6 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Thu, 14 Sep 2023 17:13:34 +0800 Subject: riscv: Remove duplicate objcopy flag There are two duplicate `-O binary` flags when objcopying from vmlinux to Image/xipImage. RISC-V set `-O binary` flag in both OBJCOPYFLAGS in the top-level riscv Makefile and OBJCOPYFLAGS_* in the boot/Makefile, and the objcopy cmd in Kbuild would join them together. The `-O binary` flag is only needed for objcopying Image, so remove the OBJCOPYFLAGS in the top-level riscv Makefile. Fixes: c0fbcd991860 ("RISC-V: Build flat and compressed kernel images") Signed-off-by: Song Shuai Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230914091334.1458542-1-songshuaishuai@tinylab.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1329e060c548..b43a6bb7e4dc 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -6,7 +6,6 @@ # for more details. # -OBJCOPYFLAGS := -O binary LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs -- cgit From 1d6cd2146c2b58bc91266db1d5d6a5f9632e14c0 Mon Sep 17 00:00:00 2001 From: Chen Jiahao Date: Mon, 25 Sep 2023 10:43:33 +0800 Subject: riscv: kdump: fix crashkernel reserving problem on RISC-V When testing on risc-v QEMU environment with "crashkernel=" parameter enabled, a problem occurred with the following message: [ 0.000000] crashkernel low memory reserved: 0xf8000000 - 0x100000000 (128 MB) [ 0.000000] crashkernel reserved: 0x0000000177e00000 - 0x0000000277e00000 (4096 MB) [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:779 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc2-next-20230920 #1 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] epc : __insert_resource+0x8e/0xd0 [ 0.000000] ra : insert_resource+0x28/0x4e [ 0.000000] epc : ffffffff80017344 ra : ffffffff8001742e sp : ffffffff81203db0 [ 0.000000] gp : ffffffff812ece98 tp : ffffffff8120dac0 t0 : ff600001f7ff2b00 [ 0.000000] t1 : 0000000000000000 t2 : 3428203030303030 s0 : ffffffff81203dc0 [ 0.000000] s1 : ffffffff81211e18 a0 : ffffffff81211e18 a1 : ffffffff81289380 [ 0.000000] a2 : 0000000277dfffff a3 : 0000000177e00000 a4 : 0000000177e00000 [ 0.000000] a5 : ffffffff81289380 a6 : 0000000277dfffff a7 : 0000000000000078 [ 0.000000] s2 : ffffffff81289380 s3 : ffffffff80a0bac8 s4 : ff600001f7ff2880 [ 0.000000] s5 : 0000000000000280 s6 : 8000000a00006800 s7 : 000000000000007f [ 0.000000] s8 : 0000000080017038 s9 : 0000000080038ea0 s10: 0000000000000000 [ 0.000000] s11: 0000000000000000 t3 : ffffffff80a0bc00 t4 : ffffffff80a0bc00 [ 0.000000] t5 : ffffffff80a0bbd0 t6 : ffffffff80a0bc00 [ 0.000000] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.000000] [] __insert_resource+0x8e/0xd0 [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Failed to add a Crash kernel resource at 177e00000 The crashkernel memory has been allocated successfully, whereas it failed to insert into iomem_resource. This is due to the unique reserving logic in risc-v arch specific code, i.e. crashk_res/crashk_low_res will be added into iomem_resource later in init_resources(), which is not aligned with current unified reserving logic in reserve_crashkernel_{generic,low}() and therefore leads to the failure of crashkernel reservation. Removing the arch specific code within #ifdef CONFIG_KEXEC_CORE in init_resources() to fix above problem. Fixes: 31549153088e ("riscv: kdump: use generic interface to simplify crashkernel reservation") Signed-off-by: Chen Jiahao Link: https://lore.kernel.org/r/20230925024333.730964-1-chenjiahao16@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e600aab116a4..aac853ae4eb7 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -173,19 +173,6 @@ static void __init init_resources(void) if (ret < 0) goto error; -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.start != crashk_res.end) { - ret = add_resource(&iomem_resource, &crashk_res); - if (ret < 0) - goto error; - } - if (crashk_low_res.start != crashk_low_res.end) { - ret = add_resource(&iomem_resource, &crashk_low_res); - if (ret < 0) - goto error; - } -#endif - #ifdef CONFIG_CRASH_DUMP if (elfcorehdr_size > 0) { elfcorehdr_res.start = elfcorehdr_addr; -- cgit From 07a27665754bf649b5de8e55c655e4d6837406be Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Wed, 13 Sep 2023 13:29:40 +0800 Subject: RISC-V: Fix wrong use of CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK If configuration options SOFTIRQ_ON_OWN_STACK and PREEMPT_RT are enabled simultaneously under RISC-V architecture, it will result in a compilation failure: arch/riscv/kernel/irq.c:64:6: error: redefinition of 'do_softirq_own_stack' 64 | void do_softirq_own_stack(void) | ^~~~~~~~~~~~~~~~~~~~ In file included from ./arch/riscv/include/generated/asm/softirq_stack.h:1, from arch/riscv/kernel/irq.c:15: ./include/asm-generic/softirq_stack.h:8:20: note: previous definition of 'do_softirq_own_stack' was here 8 | static inline void do_softirq_own_stack(void) | ^~~~~~~~~~~~~~~~~~~~ After changing CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK to CONFIG_SOFTIRQ_ON_OWN_STACK, compilation can be successful. Fixes: dd69d07a5a6c ("riscv: stack: Support HAVE_SOFTIRQ_ON_OWN_STACK") Reviewed-by: Guo Ren Signed-off-by: Jiexun Wang Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20230913052940.374686-1-wangjiexun@tinylab.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index a8efa053c4a5..9cc0a7669271 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -60,7 +60,7 @@ static void init_irq_stacks(void) } #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { #ifdef CONFIG_IRQ_STACKS @@ -92,7 +92,7 @@ void do_softirq_own_stack(void) #endif __do_softirq(); } -#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */ +#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else static void init_irq_stacks(void) {} -- cgit From a87e7d3e8832271ecb7d5eaaabc5b49fe25a469b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 3 Oct 2023 20:24:07 +0200 Subject: riscv: Fix ftrace syscall handling which are now prefixed with __riscv_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ftrace creates entries for each syscall in the tracefs but has failed since commit 08d0ce30e0e4 ("riscv: Implement syscall wrappers") which prefixes all riscv syscalls with __riscv_. So fix this by implementing arch_syscall_match_sym_name() which allows us to ignore this prefix. And also ignore compat syscalls like x86/arm64 by implementing arch_trace_is_compat_syscall(). Fixes: 08d0ce30e0e4 ("riscv: Implement syscall wrappers") Signed-off-by: Alexandre Ghiti Reviewed-by: Sami Tolvanen Acked-by: Masami Hiramatsu (Google) Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231003182407.32198-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 740a979171e5..2b2f5df7ef2c 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -31,6 +31,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* + * Let's do like x86/arm64 and ignore the compat syscalls. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __riscv_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __riscv_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} + struct dyn_arch_ftrace { }; #endif -- cgit From 3fec323339a4a9801a54e8b282eb571965b67b23 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 6 Oct 2023 10:20:10 +0200 Subject: drivers: perf: Fix panic in riscv SBI mmap support The following panic can happen when mmap is called before the pmu add callback which sets the hardware counter index: this happens for example with the following command `perf record --no-bpf-event -n kill`. [ 99.461486] CPU: 1 PID: 1259 Comm: perf Tainted: G E 6.6.0-rc4ubuntu-defconfig #2 [ 99.461669] Hardware name: riscv-virtio,qemu (DT) [ 99.461748] epc : pmu_sbi_set_scounteren+0x42/0x44 [ 99.462337] ra : smp_call_function_many_cond+0x126/0x5b0 [ 99.462369] epc : ffffffff809f9d24 ra : ffffffff800f93e0 sp : ff60000082153aa0 [ 99.462407] gp : ffffffff82395c98 tp : ff6000009a218040 t0 : ff6000009ab3a4f0 [ 99.462425] t1 : 0000000000000004 t2 : 0000000000000100 s0 : ff60000082153ab0 [ 99.462459] s1 : 0000000000000000 a0 : ff60000098869528 a1 : 0000000000000000 [ 99.462473] a2 : 000000000000001f a3 : 0000000000f00000 a4 : fffffffffffffff8 [ 99.462488] a5 : 00000000000000cc a6 : 0000000000000000 a7 : 0000000000735049 [ 99.462502] s2 : 0000000000000001 s3 : ffffffff809f9ce2 s4 : ff60000098869528 [ 99.462516] s5 : 0000000000000002 s6 : 0000000000000004 s7 : 0000000000000001 [ 99.462530] s8 : ff600003fec98bc0 s9 : ffffffff826c5890 s10: ff600003fecfcde0 [ 99.462544] s11: ff600003fec98bc0 t3 : ffffffff819e2558 t4 : ff1c000004623840 [ 99.462557] t5 : 0000000000000901 t6 : ff6000008feeb890 [ 99.462570] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 99.462658] [] pmu_sbi_set_scounteren+0x42/0x44 [ 99.462979] Code: 1060 4785 97bb 00d7 8fd9 9073 1067 6422 0141 8082 (9002) 0013 [ 99.463335] Kernel BUG [#2] To circumvent this, try to enable userspace access to the hardware counter when it is selected in addition to when the event is mapped. And vice-versa when the event is stopped/unmapped. Fixes: cc4c07c89aad ("drivers: perf: Implement perf event mmap support in the SBI backend") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231006082010.11963-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu.c | 3 ++- drivers/perf/riscv_pmu_sbi.c | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 1f9a35f724f5..0dda70e1ef90 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -23,7 +23,8 @@ static bool riscv_perf_user_access(struct perf_event *event) return ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE) || (event->attr.type == PERF_TYPE_RAW)) && - !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && + (event->hw.idx != -1); } void arch_perf_update_userpage(struct perf_event *event, diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 9a51053b1f99..96c7f670c8f0 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -510,16 +510,18 @@ static void pmu_sbi_set_scounteren(void *arg) { struct perf_event *event = (struct perf_event *)arg; - csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); + if (event->hw.idx != -1) + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); } static void pmu_sbi_reset_scounteren(void *arg) { struct perf_event *event = (struct perf_event *)arg; - csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); + if (event->hw.idx != -1) + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); } static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) @@ -541,7 +543,8 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - pmu_sbi_set_scounteren((void *)event); + on_each_cpu_mask(mm_cpumask(event->owner->mm), + pmu_sbi_set_scounteren, (void *)event, 1); } static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) @@ -551,7 +554,8 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - pmu_sbi_reset_scounteren((void *)event); + on_each_cpu_mask(mm_cpumask(event->owner->mm), + pmu_sbi_reset_scounteren, (void *)event, 1); ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && -- cgit From ca10d851b9ad0338c19e8e3089e24d565ebfffd7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 10 Oct 2023 22:48:42 -0400 Subject: workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask() Commit 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") enabled implicit ordered attribute to be added to WQ_UNBOUND workqueues with max_active of 1. This prevented the changing of attributes to these workqueues leading to fix commit 0a94efb5acbb ("workqueue: implicit ordered attribute should be overridable"). However, workqueue_apply_unbound_cpumask() was not updated at that time. So sysfs changes to wq_unbound_cpumask has no effect on WQ_UNBOUND workqueues with implicit ordered attribute. Since not all WQ_UNBOUND workqueues are visible on sysfs, we are not able to make all the necessary cpumask changes even if we iterates all the workqueue cpumasks in sysfs and changing them one by one. Fix this problem by applying the corresponding change made to apply_workqueue_attrs_locked() in the fix commit to workqueue_apply_unbound_cpumask(). Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6f74cab2bd5a..51177ffe16f1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5792,9 +5792,13 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND)) continue; + /* creating multiple pwqs breaks ordering guarantee */ - if (wq->flags & __WQ_ORDERED) - continue; + if (!list_empty(&wq->pwqs)) { + if (wq->flags & __WQ_ORDERED_EXPLICIT) + continue; + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { -- cgit From 5d9c7a1e3e8e18db8e10c546de648cda2a57be52 Mon Sep 17 00:00:00 2001 From: Lucy Mielke Date: Mon, 9 Oct 2023 19:09:46 +0200 Subject: workqueue: fix -Wformat-truncation in create_worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling with W=1 emitted the following warning (Compiler: gcc (x86-64, ver. 13.2.1, .config: result of make allyesconfig, "Treat warnings as errors" turned off): kernel/workqueue.c:2188:54: warning: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size between 5 and 14 [-Wformat-truncation=] kernel/workqueue.c:2188:50: note: directive argument in the range [0, 2147483647] kernel/workqueue.c:2188:17: note: ‘snprintf’ output between 4 and 23 bytes into a destination of size 16 setting "id_buf" to size 23 will silence the warning, since GCC determines snprintf's output to be max. 23 bytes in line 2188. Please let me know if there are any mistakes in my patch! Signed-off-by: Lucy Mielke Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 51177ffe16f1..a3522b70218d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2166,7 +2166,7 @@ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; - char id_buf[16]; + char id_buf[23]; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); -- cgit From 4366faf43308bd91c59a20c43a9f853a9c3bb6e4 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 11 Oct 2023 13:41:34 +0200 Subject: drm/nouveau/disp: fix DP capable DSM connectors Just special case DP DSM connectors until we properly figure out how to deal with this. This resolves user regressions on GPUs with such connectors without reverting the original fix. Cc: Lyude Paul Cc: stable@vger.kernel.org # 6.4+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/255 Fixes: 2b5d1c29f6c4 ("drm/nouveau/disp: PIOR DP uses GPIO for HPD, not PMGR AUX interrupts") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20231011114134.861818-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c index 46b057fe1412..3249e5c1c893 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c @@ -62,6 +62,18 @@ nvkm_uconn_uevent_gpio(struct nvkm_object *object, u64 token, u32 bits) return object->client->event(token, &args, sizeof(args.v0)); } +static bool +nvkm_connector_is_dp_dms(u8 type) +{ + switch (type) { + case DCB_CONNECTOR_DMS59_DP0: + case DCB_CONNECTOR_DMS59_DP1: + return true; + default: + return false; + } +} + static int nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent) { @@ -101,7 +113,7 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_ if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO; if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) { /* TODO: support DP IRQ on ANX9805 and remove this hack. */ - if (!outp->info.location) + if (!outp->info.location && !nvkm_connector_is_dp_dms(conn->info.type)) return -EINVAL; } -- cgit From 17bfcd6a81535d7d580ddafb6e54806890aaca6c Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 11:49:58 -0300 Subject: rust: error: fix the description for `ECHILD` A mistake was made and the description of `ECHILD` is wrong (it reuses the description of `ENOEXEC`). This fixes it to reflect what's in `errno-base.h`. Signed-off-by: Wedson Almeida Filho Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Trevor Gross Reviewed-by: Finn Behrens Reviewed-by: Alice Ryhl Fixes: 266def2a0f5b ("rust: error: add codes from `errno-base.h`") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230930144958.46051-1-wedsonaf@gmail.com [ Use the plural, as noticed by Benno. ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 05fcab6abfe6..0f29e7b2194c 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -37,7 +37,7 @@ pub mod code { declare_err!(E2BIG, "Argument list too long."); declare_err!(ENOEXEC, "Exec format error."); declare_err!(EBADF, "Bad file number."); - declare_err!(ECHILD, "Exec format error."); + declare_err!(ECHILD, "No child processes."); declare_err!(EAGAIN, "Try again."); declare_err!(ENOMEM, "Out of memory."); declare_err!(EACCES, "Permission denied."); -- cgit From 2a7e0a52ec98566a863aba42ea35690c65e3da27 Mon Sep 17 00:00:00 2001 From: Manmohan Shukla Date: Thu, 7 Sep 2023 02:18:57 +0530 Subject: rust: error: Markdown style nit This patch fixes a trivial markdown style nit in the `SAFETY` comment. Signed-off-by: Manmohan Shukla Reviewed-by: Alice Ryhl Reviewed-by: Jianguo Bao Reviewed-by: Vincenzo Palazzo Reviewed-by: Finn Behrens Reviewed-by: Benno Lossin Reviewed-by: Andreas Hindborg Fixes: c7e20faa5fca ("rust: error: Add Error::to_ptr()") Link: https://lore.kernel.org/r/20230906204857.85619-1-manmshuk@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 0f29e7b2194c..032b64543953 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -133,7 +133,7 @@ impl Error { /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr(self) -> *mut T { - // SAFETY: self.0 is a valid error due to its invariant. + // SAFETY: `self.0` is a valid error due to its invariant. unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ } } -- cgit From 344b6c0a7514b044ed12b8ad3cdeecd262292f3e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 15 Aug 2023 08:53:46 +0200 Subject: rust: fix bindgen build error with fstrict-flex-arrays Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") enabled '-fstrict-flex-arrays=3' globally, but bindgen does not recognized this compiler option, triggering the following build error: error: unknown argument: '-fstrict-flex-arrays=3', err: true [ Miguel: Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") did it so only conditionally (i.e. only if the C compiler supports it). This explains what Andrea was seeing: he was compiling with a modern enough GCC, which enables the option, but with an old enough Clang. Andrea confirmed this was the case: he was using Clang 14 with GCC 13; and that Clang 15 worked for him. While it is possible to construct code (see mailing list for an example I came up with) where this could break, it is fairly contrived, and anyway GCC-built kernels with Rust enabled should only be used for experimentation until we get support for `rustc_codegen_gcc` and/or GCC Rust. So let's add this for the time being in case it helps somebody. ] Add '-fstrict-flex-arrays' to the list of cflags that should be ignored by bindgen. Fixes: df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") Signed-off-by: Andrea Righi Tested-by: Gary Guo Link: https://lore.kernel.org/r/20230815065346.131387-1-andrea.righi@canonical.com Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/Makefile b/rust/Makefile index 87958e864be0..1e78c82a18a8 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -290,6 +290,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ + -fstrict-flex-arrays=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. -- cgit From b35726396390eb445668cecdbcd41d0285dbefdf Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Oct 2023 15:54:21 -0700 Subject: Revert "Input: psmouse - add delay when deactivating for SMBus mode" This reverts commit 92e24e0e57f72e06c2df87116557331fd2d4dda2. While the patch itself is correct, it uncovered an issue with fallback to PS/2 mode, where we were leaving psmouse->fast_reconnect handler set to psmouse_smbus_reconnect(), which caused crashes. While discussing various approaches to fix the issue it was noted that this patch ass undesired delay in the "fast" resume path of PS/2 device, and it would be better to actually use "reset_delay" option defined in struct rmi_device_platform_data and have RMI code handle it for SMBus transport as well. So this patch is being reverted to deal with crashes and a better solution will be merged shortly. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/all/ca0109fa-c64b-43c1-a651-75b294d750a1@leemhuis.info/ Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-smbus.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c index 7b13de979908..2a2459b1b4f2 100644 --- a/drivers/input/mouse/psmouse-smbus.c +++ b/drivers/input/mouse/psmouse-smbus.c @@ -5,7 +5,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include @@ -119,18 +118,13 @@ static psmouse_ret_t psmouse_smbus_process_byte(struct psmouse *psmouse) return PSMOUSE_FULL_PACKET; } -static void psmouse_activate_smbus_mode(struct psmouse_smbus_dev *smbdev) -{ - if (smbdev->need_deactivate) { - psmouse_deactivate(smbdev->psmouse); - /* Give the device time to switch into SMBus mode */ - msleep(30); - } -} - static int psmouse_smbus_reconnect(struct psmouse *psmouse) { - psmouse_activate_smbus_mode(psmouse->private); + struct psmouse_smbus_dev *smbdev = psmouse->private; + + if (smbdev->need_deactivate) + psmouse_deactivate(psmouse); + return 0; } @@ -263,7 +257,8 @@ int psmouse_smbus_init(struct psmouse *psmouse, } } - psmouse_activate_smbus_mode(smbdev); + if (need_deactivate) + psmouse_deactivate(psmouse); psmouse->private = smbdev; psmouse->protocol_handler = psmouse_smbus_process_byte; -- cgit From f43328357defc0dc9d28dbd06dc3361fd2b22e28 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 6 Oct 2023 10:41:36 +0900 Subject: ksmbd: not allow to open file if delelete on close bit is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cthon test fail with the following error. check for proper open/unlink operation nfsjunk files before unlink: -rwxr-xr-x 1 root root 0 9월 25 11:03 ./nfs2y8Jm9 ./nfs2y8Jm9 open; unlink ret = 0 nfsjunk files after unlink: -rwxr-xr-x 1 root root 0 9월 25 11:03 ./nfs2y8Jm9 data compare ok nfsjunk files after close: ls: cannot access './nfs2y8Jm9': No such file or directory special tests failed Cthon expect to second unlink failure when file is already unlinked. ksmbd can not allow to open file if flags of ksmbd inode is set with S_DEL_ON_CLS flags. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index c4b80ab7df74..1c5c39733652 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -106,7 +106,7 @@ int ksmbd_query_inode_status(struct inode *inode) ci = __ksmbd_inode_lookup(inode); if (ci) { ret = KSMBD_INODE_STATUS_OK; - if (ci->m_flags & S_DEL_PENDING) + if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) ret = KSMBD_INODE_STATUS_PENDING_DELETE; atomic_dec(&ci->m_count); } @@ -116,7 +116,7 @@ int ksmbd_query_inode_status(struct inode *inode) bool ksmbd_inode_pending_delete(struct ksmbd_file *fp) { - return (fp->f_ci->m_flags & S_DEL_PENDING); + return (fp->f_ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)); } void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp) -- cgit From be0f89d4419dc5413a1cf06db3671c9949be0d52 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 9 Oct 2023 23:58:15 +0900 Subject: ksmbd: fix wrong error response status by using set_smb2_rsp_status() set_smb2_rsp_status() after __process_request() sets the wrong error status. This patch resets all iov vectors and sets the error status on clean one. Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 898860adf929..87c6401a6007 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -231,11 +231,12 @@ void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err) { struct smb2_hdr *rsp_hdr; - if (work->next_smb2_rcv_hdr_off) - rsp_hdr = ksmbd_resp_buf_next(work); - else - rsp_hdr = smb2_get_msg(work->response_buf); + rsp_hdr = smb2_get_msg(work->response_buf); rsp_hdr->Status = err; + + work->iov_idx = 0; + work->iov_cnt = 0; + work->next_smb2_rcv_hdr_off = 0; smb2_set_err_rsp(work); } -- cgit From 414849040fcf11d45025b8ae26c9fd91da1465da Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 11 Oct 2023 23:29:49 +0900 Subject: ksmbd: fix Null pointer dereferences in ksmbd_update_fstate() Coverity Scan report the following one. This report is a false alarm. Because fp is never NULL when rc is zero. This patch add null check for fp in ksmbd_update_fstate to make alarm silence. *** CID 1568583: Null pointer dereferences (FORWARD_NULL) /fs/smb/server/smb2pdu.c: 3408 in smb2_open() 3402 path_put(&path); 3403 path_put(&parent_path); 3404 } 3405 ksmbd_revert_fsids(work); 3406 err_out1: 3407 if (!rc) { >>> CID 1568583: Null pointer dereferences (FORWARD_NULL) >>> Passing null pointer "fp" to "ksmbd_update_fstate", which dereferences it. 3408 ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); 3409 rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); 3410 } 3411 if (rc) { 3412 if (rc == -EINVAL) 3413 rsp->hdr.Status = STATUS_INVALID_PARAMETER; Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: Coverity Scan Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1c5c39733652..c91eac6514dd 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -603,6 +603,9 @@ err_out: void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, unsigned int state) { + if (!fp) + return; + write_lock(&ft->lock); fp->f_state = state; write_unlock(&ft->lock); -- cgit From 1903e6d0578118e9aab1ee23f4a9de55737d1d05 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 11 Oct 2023 23:30:26 +0900 Subject: ksmbd: fix potential double free on smb2_read_pipe() error path Fix new smatch warnings: fs/smb/server/smb2pdu.c:6131 smb2_read_pipe() error: double free of 'rpc_resp' Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 87c6401a6007..93262ca3f58a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6152,12 +6152,12 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) memcpy(aux_payload_buf, rpc_resp->payload, rpc_resp->payload_sz); nbytes = rpc_resp->payload_sz; - kvfree(rpc_resp); err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); if (err) goto out; + kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer)); -- cgit From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2023 10:24:29 +0000 Subject: xfrm: fix a data-race in xfrm_lookup_with_ifid() syzbot complains about a race in xfrm_lookup_with_ifid() [1] When preparing commit 0a9e5794b21e ("xfrm: annotate data-race around use_time") I thought xfrm_lookup_with_ifid() was modifying a still private structure. [1] BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 99df2862bdc9..d24b4d4f620e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3220,7 +3220,7 @@ no_transform: } for (i = 0; i < num_pols; i++) - pols[i]->curlft.use_time = ktime_get_real_seconds(); + WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); if (num_xfrms < 0) { /* Prohibit the flow */ -- cgit From 6a7be48e9bd18d309ba25c223a27790ad1bf0fa3 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 5 Sep 2023 09:37:24 +0200 Subject: USB: serial: option: add Telit LE910C4-WWX 0x1035 composition Add support for the following Telit LE910C4-WWX composition: 0x1035: TTY, TTY, ECM T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1035 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=e1b117c7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Fabio Porcedda Cc: stable@vger.kernel.org Reviewed-by: Daniele Palmas Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7994a4549a6c..f612bc9129bd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1290,6 +1290,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1033, 0xff), /* Telit LE910C1-EUX (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1035, 0xff) }, /* Telit LE910C4-WWX (ECM) */ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), -- cgit From 064f6e2ba9eb59b2c87b866e1e968e79ccedf9dd Mon Sep 17 00:00:00 2001 From: Benoît Monin Date: Mon, 2 Oct 2023 17:51:40 +0200 Subject: USB: serial: option: add entry for Sierra EM9191 with new firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following a firmware update of the modem, the interface for the AT command port changed, so add it back. T: Bus=08 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=1199 ProdID=90d3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9191 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f612bc9129bd..c0908e6d4921 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2263,6 +2263,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { } /* Terminating entry */ -- cgit From ff07186b4d774ac22a5345d30763045af4569416 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 11 Oct 2023 22:25:28 +0300 Subject: x86/efistub: Don't try to print after ExitBootService() setup_e820() is executed after UEFI's ExitBootService has been called. This causes the firmware to throw an exception because the Console IO protocol is supposed to work only during boot service environment. As per UEFI 2.9, section 12.1: "This protocol is used to handle input and output of text-based information intended for the system user during the operation of code in the boot services environment." So drop the diagnostic warning from this function. We might add back a warning that is issued later when initializing the kernel itself. Signed-off-by: Nikolay Borisov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 2fee52ed335d..3b8bccd7c216 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -605,11 +605,8 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s break; case EFI_UNACCEPTED_MEMORY: - if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) { - efi_warn_once( -"The system has unaccepted memory, but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n"); + if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) continue; - } e820_type = E820_TYPE_RAM; process_unaccepted_memory(d->phys_addr, d->phys_addr + PAGE_SIZE * d->num_pages); -- cgit From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Tue, 10 Oct 2023 18:39:33 +0200 Subject: bonding: Return pointer to data after pull on skb Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers"), header offsets used to compute a hash in bond_xmit_hash() are relative to skb->data and not skb->head. If the tail of the header buffer of an skb really needs to be advanced and the operation is successful, the pointer to the data must be returned (and not a pointer to the head of the buffer). Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers") Signed-off-by: Jiri Wiesner Acked-by: Jay Vosburgh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ed7212e61c54..51d47eda1c87 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4023,7 +4023,7 @@ static inline const void *bond_pull_data(struct sk_buff *skb, if (likely(n <= hlen)) return data; else if (skb && likely(pskb_may_pull(skb, n))) - return skb->head; + return skb->data; return NULL; } -- cgit From 0d3ad1917996839a5042d18f04e41915cfa1b74a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 24 Sep 2023 22:26:33 +0800 Subject: efi: fix memory leak in krealloc failure handling In the previous code, there was a memory leak issue where the previously allocated memory was not freed upon a failed krealloc operation. This patch addresses the problem by releasing the old memory before setting the pointer to NULL in case of a krealloc failure. This ensures that memory is properly managed and avoids potential memory leaks. Signed-off-by: Kuan-Wei Chiu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1599f1176842..9cfac61812f6 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -273,9 +273,13 @@ static __init int efivar_ssdt_load(void) if (status == EFI_NOT_FOUND) { break; } else if (status == EFI_BUFFER_TOO_SMALL) { - name = krealloc(name, name_size, GFP_KERNEL); - if (!name) + efi_char16_t *name_tmp = + krealloc(name, name_size, GFP_KERNEL); + if (!name_tmp) { + kfree(name); return -ENOMEM; + } + name = name_tmp; continue; } -- cgit From 4eaf0932c69bdc56d2c2af30404f9c918b1f6295 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 3 Oct 2023 12:02:09 +0200 Subject: block: Fix regression in sed-opal for a saved key. The commit 3bfeb61256643281ac4be5b8a57e9d9da3db4335 introduced the use of keyring for sed-opal. Unfortunately, there is also a possibility to save the Opal key used in opal_lock_unlock(). This patch switches the order of operation, so the cached key is used instead of failure for opal_get_key. The problem was found by the cryptsetup Opal test recently added to the cryptsetup tree. Fixes: 3bfeb6125664 ("block: sed-opal: keyring support for SED keys") Tested-by: Ondrej Kozina Signed-off-by: Milan Broz Link: https://lore.kernel.org/r/20231003100209.380037-1-gmazyland@gmail.com Signed-off-by: Jens Axboe --- block/sed-opal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 6d7f25d1711b..04f38a3f5d95 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -2888,12 +2888,11 @@ static int opal_lock_unlock(struct opal_dev *dev, if (lk_unlk->session.who > OPAL_USER9) return -EINVAL; - ret = opal_get_key(dev, &lk_unlk->session.opal_key); - if (ret) - return ret; mutex_lock(&dev->dev_lock); opal_lock_check_for_saved_key(dev, lk_unlk); - ret = __opal_lock_unlock(dev, lk_unlk); + ret = opal_get_key(dev, &lk_unlk->session.opal_key); + if (!ret) + ret = __opal_lock_unlock(dev, lk_unlk); mutex_unlock(&dev->dev_lock); return ret; -- cgit From f88dfbf333b3661faff996bb03af2024d907b76a Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 13 Oct 2023 17:45:25 +0800 Subject: ASoC: rt5650: fix the wrong result of key button The RT5650 should enable a power setting for button detection to avoid the wrong result. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20231013094525.715518-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 1a137ca3f496..7938b52d741d 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3257,6 +3257,8 @@ int rt5645_set_jack_detect(struct snd_soc_component *component, RT5645_GP1_PIN_IRQ, RT5645_GP1_PIN_IRQ); regmap_update_bits(rt5645->regmap, RT5645_GEN_CTRL1, RT5645_DIG_GATE_CTRL, RT5645_DIG_GATE_CTRL); + regmap_update_bits(rt5645->regmap, RT5645_DEPOP_M1, + RT5645_HP_CB_MASK, RT5645_HP_CB_PU); } rt5645_irq(0, rt5645); -- cgit From 4e9a429ae80657bdc502d3f5078e2073656ec5fd Mon Sep 17 00:00:00 2001 From: Roy Chateau Date: Fri, 13 Oct 2023 13:02:39 +0200 Subject: ASoC: codecs: tas2780: Fix log of failed reset via I2C. Correctly log failures of reset via I2C. Signed-off-by: Roy Chateau Link: https://lore.kernel.org/r/20231013110239.473123-1-roy.chateau@mep-info.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2780.c b/sound/soc/codecs/tas2780.c index 86bd6c18a944..41076be23854 100644 --- a/sound/soc/codecs/tas2780.c +++ b/sound/soc/codecs/tas2780.c @@ -39,7 +39,7 @@ static void tas2780_reset(struct tas2780_priv *tas2780) usleep_range(2000, 2050); } - snd_soc_component_write(tas2780->component, TAS2780_SW_RST, + ret = snd_soc_component_write(tas2780->component, TAS2780_SW_RST, TAS2780_RST); if (ret) dev_err(tas2780->dev, "%s:errCode:0x%x Reset error!\n", -- cgit From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 From: Dan Clash Date: Thu, 12 Oct 2023 14:55:18 -0700 Subject: audit,io_uring: io_uring openat triggers audit reference count underflow An io_uring openat operation can update an audit reference count from multiple threads resulting in the call trace below. A call to io_uring_submit() with a single openat op with a flag of IOSQE_ASYNC results in the following reference count updates. These first part of the system call performs two increments that do not race. do_syscall_64() __do_sys_io_uring_enter() io_submit_sqes() io_openat_prep() __io_openat_prep() getname() getname_flags() /* update 1 (increment) */ __audit_getname() /* update 2 (increment) */ The openat op is queued to an io_uring worker thread which starts the opportunity for a race. The system call exit performs one decrement. do_syscall_64() syscall_exit_to_user_mode() syscall_exit_to_user_mode_prepare() __audit_syscall_exit() audit_reset_context() putname() /* update 3 (decrement) */ The io_uring worker thread performs one increment and two decrements. These updates can race with the system call decrement. io_wqe_worker() io_worker_handle_work() io_wq_submit_work() io_issue_sqe() io_openat() io_openat2() do_filp_open() path_openat() __audit_inode() /* update 4 (increment) */ putname() /* update 5 (decrement) */ __audit_uring_exit() audit_reset_context() putname() /* update 6 (decrement) */ The fix is to change the refcnt member of struct audit_names from int to atomic_t. kernel BUG at fs/namei.c:262! Call Trace: ... ? putname+0x68/0x70 audit_reset_context.part.0.constprop.0+0xe1/0x300 __audit_uring_exit+0xda/0x1c0 io_issue_sqe+0x1f3/0x450 ? lock_timer_base+0x3b/0xd0 io_wq_submit_work+0x8d/0x2b0 ? __try_to_del_timer_sync+0x67/0xa0 io_worker_handle_work+0x17c/0x2b0 io_wqe_worker+0x10a/0x350 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Dan Clash Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- fs/namei.c | 9 +++++---- include/linux/fs.h | 2 +- kernel/auditsc.c | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 567ee547492b..94565bd7e73f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -188,7 +188,7 @@ getname_flags(const char __user *filename, int flags, int *empty) } } - result->refcnt = 1; + atomic_set(&result->refcnt, 1); /* The empty path is special. */ if (unlikely(!len)) { if (empty) @@ -249,7 +249,7 @@ getname_kernel(const char * filename) memcpy((char *)result->name, filename, len); result->uptr = NULL; result->aname = NULL; - result->refcnt = 1; + atomic_set(&result->refcnt, 1); audit_getname(result); return result; @@ -261,9 +261,10 @@ void putname(struct filename *name) if (IS_ERR(name)) return; - BUG_ON(name->refcnt <= 0); + if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) + return; - if (--name->refcnt > 0) + if (!atomic_dec_and_test(&name->refcnt)) return; if (name->name != name->iname) { diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..4a40823c3c67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2403,7 +2403,7 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21d2fa815e78..6f0d6fb6523f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2212,7 +2212,7 @@ __audit_reusename(const __user char *uptr) if (!n->name) continue; if (n->name->uptr == uptr) { - n->name->refcnt++; + atomic_inc(&n->name->refcnt); return n->name; } } @@ -2241,7 +2241,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - name->refcnt++; + atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2373,7 +2373,7 @@ out_alloc: return; if (name) { n->name = name; - name->refcnt++; + atomic_inc(&name->refcnt); } out: @@ -2500,7 +2500,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - found_child->name->refcnt++; + atomic_inc(&found_child->name->refcnt); } } -- cgit From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 9 Oct 2023 18:38:14 -0700 Subject: tcp: Fix listen() warning with v4-mapped-v6 address. syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address."). After the cited commit, a v4 socket's address matches the corresponding v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa. During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses bhash and conflicts properly without checking bhash2 so that we need not check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in inet_bind2_bucket_match_addr(). However, the repro shows that we need to check that in a no-conflict case. The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls listen() for the first socket: from socket import * s1 = socket() s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s1.bind(('127.0.0.1', 0)) s2 = socket(AF_INET6) s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1])) s1.listen() The second socket should belong to the first socket's tb2, but the second bind() creates another tb2 bucket because inet_bind2_bucket_find() returns NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the corresponding v4 address tb2. bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X) Then, listen() for the first socket calls inet_csk_get_port(), where the v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered. To avoid that, we need to check if v4-mapped-v6 sk address matches with the corresponding v4 address tb2 in inet_bind2_bucket_match(). The same checks are needed in inet_bind2_bucket_addr_match() too, so we can move all checks there and call it from inet_bind2_bucket_match(). Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr and not of sockets in the bucket. This could be refactored later by defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending ::ffff: when creating v4 tb2. [0]: WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Modules linked in: CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31 RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000 RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38 RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200 R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100 FS: 00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256 __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217 inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239 __sys_listen+0x194/0x270 net/socket.c:1866 __do_sys_listen net/socket.c:1875 [inline] __se_sys_listen net/socket.c:1873 [inline] __x64_sys_listen+0x53/0x80 net/socket.c:1873 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f3a5bce3af9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9 RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001 R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.") Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c32f5e28758b..598c1b114d2c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -149,8 +149,14 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb2->family) - return false; + if (sk->sk_family != tb2->family) { + if (sk->sk_family == AF_INET) + return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && + tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; + + return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && + sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; + } if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, @@ -819,19 +825,7 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, tb->l3mdev != l3mdev) return false; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && - tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; - - return false; - } - - if (sk->sk_family == AF_INET6) - return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); -#endif - return tb->rcv_saddr == sk->sk_rcv_saddr; + return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, -- cgit From 9c97790a07dc4f9bdc6e1701003dc9b86f749c71 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Oct 2023 18:37:33 +0100 Subject: ASoC: dwc: Fix non-DT instantiation Commit d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") converted the DesignWare I2S driver to use a DT specific function to obtain platform data but this breaks at least non-DT systems such as AMD. Revert it. Fixes: d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231013-asoc-fix-dwc-v1-1-63211bb746b9@kernel.org Signed-off-by: Mark Brown --- sound/soc/dwc/dwc-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 22c004179214..9ea4be56d3b7 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -917,7 +917,7 @@ static int jh7110_i2stx0_clk_cfg(struct i2s_clk_config_data *config) static int dw_i2s_probe(struct platform_device *pdev) { - const struct i2s_platform_data *pdata = of_device_get_match_data(&pdev->dev); + const struct i2s_platform_data *pdata = pdev->dev.platform_data; struct dw_i2s_dev *dev; struct resource *res; int ret, irq; -- cgit From dbb13378ba306484214b84304e232723a2aaf10a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:12 +0300 Subject: selftests: fib_tests: Disable RP filter in multipath list receive test The test relies on the fib:fib_table_lookup trace point being triggered once for each forwarded packet. If RP filter is not disabled, the trace point will be triggered twice for each packet (for source validation and forwarding), potentially masking actual bugs. Fix by explicitly disabling RP filter. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.99) [ OK ] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e7d2a530618a..0dbb26b4fa4a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2437,6 +2437,9 @@ ipv4_mpath_list_test() run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') -- cgit From aa13e5241a8fa32b26d5a6b8b63d04c6357243f4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:13 +0300 Subject: selftests: fib_tests: Count all trace point invocations The tests rely on the IPv{4,6} FIB trace points being triggered once for each forwarded packet. If receive processing is deferred to the ksoftirqd task these invocations will not be counted and the tests will fail. Fix by specifying the '-a' flag to avoid perf from filtering on the mausezahn task. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.68) [FAIL] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.27) [FAIL] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.00) [ OK ] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0dbb26b4fa4a..66d0db7a2614 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2452,7 +2452,7 @@ ipv4_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2497,7 +2497,7 @@ ipv6_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff -- cgit From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Oct 2023 11:24:19 +0800 Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register() In bcm_sf2_mdio_register(), the class_find_device() will call get_device() to increment reference count for priv->master_mii_bus->dev if of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register() fails, it will call get_device() twice without decrement reference count for the device. And it is the same if bcm_sf2_mdio_register() succeeds but fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the reference count has not decremented to zero, the dev related resource will not be freed. So remove the get_device() in bcm_sf2_mdio_register(), and call put_device() if mdiobus_alloc() or mdiobus_register() fails and in bcm_sf2_mdio_unregister() to solve the issue. And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and just return 0 if it succeeds to make it cleaner. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Jinjie Ruan Suggested-by: Simon Horman Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 72374b066f64..cd1f240c90f3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); if (!priv->master_mii_bus) { - of_node_put(dn); - return -EPROBE_DEFER; + err = -EPROBE_DEFER; + goto err_of_node_put; } - get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { - of_node_put(dn); - return -ENOMEM; + err = -ENOMEM; + goto err_put_master_mii_bus_dev; } priv->slave_mii_bus->priv = priv; @@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) { - mdiobus_free(priv->slave_mii_bus); - of_node_put(dn); - } + if (err && dn) + goto err_free_slave_mii_bus; + return 0; + +err_free_slave_mii_bus: + mdiobus_free(priv->slave_mii_bus); +err_put_master_mii_bus_dev: + put_device(&priv->master_mii_bus->dev); +err_of_node_put: + of_node_put(dn); return err; } @@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); mdiobus_free(priv->slave_mii_bus); + put_device(&priv->master_mii_bus->dev); of_node_put(priv->master_mii_dn); } -- cgit From 242e34500a32631f85c2b4eb6cb42a368a39e54f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 10 Oct 2023 13:30:59 -0700 Subject: ice: fix over-shifted variable Since the introduction of the ice driver the code has been double-shifting the RSS enabling field, because the define already has shifts in it and can't have the regular pattern of "a << shiftval & mask" applied. Most places in the code got it right, but one line was still wrong. Fix this one location for easy backports to stable. An in-progress patch fixes the defines to "standard" and will be applied as part of the regular -next process sometime after this one. Fixes: d76a60ba7afb ("ice: Add support for VLANs and offloads") Reviewed-by: Przemek Kitszel CC: stable@vger.kernel.org Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231010203101.406248-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7bf9b7069754..73bbf06a76db 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1201,8 +1201,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } static void -- cgit From 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 11 Oct 2023 09:20:55 +0200 Subject: tcp: allow again tcp_disconnect() when threads are waiting As reported by Tom, .NET and applications build on top of it rely on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP socket. The blamed commit below caused a regression, as such cancellation can now fail. As suggested by Eric, this change addresses the problem explicitly causing blocking I/O operation to terminate immediately (with an error) when a concurrent disconnect() is executed. Instead of tracking the number of threads blocked on a given socket, track the number of disconnect() issued on such socket. If such counter changes after a blocking operation releasing and re-acquiring the socket lock, error out the current operation. Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting") Reported-by: Tom Deseyn Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305 Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++++----- include/net/sock.h | 10 +++--- net/core/stream.c | 12 +++++--- net/ipv4/af_inet.c | 10 ++++-- net/ipv4/inet_connection_sock.c | 1 - net/ipv4/tcp.c | 16 +++++----- net/ipv4/tcp_bpf.c | 4 +++ net/mptcp/protocol.c | 7 ----- net/tls/tls_main.c | 10 ++++-- net/tls/tls_sw.c | 19 ++++++++---- 10 files changed, 80 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 5fc64e47568a..d567e42e1760 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev, struct sock *sk, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; + int ret, err = 0; long current_timeo; long vm_wait = 0; bool noblock; @@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev, set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk->sk_err || - (sk->sk_shutdown & SEND_SHUTDOWN) || - (csk_mem_free(cdev, sk) && !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || + (sk->sk_shutdown & SEND_SHUTDOWN) || + (csk_mem_free(cdev, sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; @@ -1348,6 +1351,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; int target; long timeo; + int ret; buffers_freed = 0; @@ -1423,7 +1427,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: if (!skb->len) { @@ -1518,6 +1526,8 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); + +unlock: release_sock(sk); return copied; } @@ -1534,6 +1544,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, int copied = 0; size_t avail; /* amount of available data in current skb */ long timeo; + int ret; lock_sock(sk); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1585,7 +1596,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, release_sock(sk); lock_sock(sk); } else { - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + /* here 'copied' is 0 due to previous checks */ + copied = ret; + break; + } } if (unlikely(peek_seq != tp->copied_seq)) { @@ -1656,6 +1672,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; long timeo; int target; /* Read at least this many bytes */ + int ret; buffers_freed = 0; @@ -1747,7 +1764,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: @@ -1816,6 +1837,7 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); +unlock: release_sock(sk); return copied; } diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf..92f7ea62a915 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,7 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -429,7 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -1189,8 +1189,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1200,8 +1199,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) diff --git a/net/core/stream.c b/net/core/stream.c index f5c4e47df165..96fbcb9bbb30 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close); */ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) { - int err = 0; + int ret, err = 0; long vm_wait = 0; long current_timeo = *timeo_p; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || - (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || - (sk_stream_memory_free(sk) && - !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..2713c9b06c4c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -597,7 +597,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; - sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -613,7 +612,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; - sk->sk_wait_pending--; return timeo; } @@ -642,6 +640,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { + sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; @@ -696,6 +695,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; + int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) @@ -704,6 +704,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; + + if (dis != sk->sk_disconnects) { + err = -EPIPE; + goto out; + } } /* Connection was closed by RST, timeout, ICMP error @@ -725,6 +730,7 @@ out: sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; + sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index aeebe8816689..394a498c2823 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1145,7 +1145,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); - newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; newicsk->icsk_bind2_hash = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3f66cdeef7de..d3456cf840de 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -831,7 +831,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) + break; if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -2442,7 +2444,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, last); + err = sk_wait_data(sk, &timeo, last); + if (err < 0) { + err = copied ? : err; + goto out; + } } if ((flags & MSG_PEEK) && @@ -2966,12 +2972,6 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 327268203001..ba2e92188124 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -307,6 +307,8 @@ msg_bytes_ready: } data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data && !sk_psock_queue_empty(psock)) goto msg_bytes_ready; copied = -EAGAIN; @@ -351,6 +353,8 @@ msg_bytes_ready: timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c3b83cb390d9..d1902373c974 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3098,12 +3098,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under * msk->firstsocket lock). @@ -3173,7 +3167,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif - nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff9239..002483e60c19 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -139,8 +139,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) int wait_on_pending_writer(struct sock *sk, long *timeo) { - int rc = 0; DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret, rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { @@ -154,9 +154,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) break; } - if (sk_wait_event(sk, timeo, - !READ_ONCE(sk->sk_write_pending), &wait)) + ret = sk_wait_event(sk, timeo, + !READ_ONCE(sk->sk_write_pending), &wait); + if (ret) { + if (ret < 0) + rc = ret; break; + } } remove_wait_queue(sk_sleep(sk), &wait); return rc; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d1fc295b83b5..e9d1e83a859d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1291,6 +1291,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; long timeo; timeo = sock_rcvtimeo(sk, nonblock); @@ -1302,6 +1303,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sk->sk_err) return sock_error(sk); + if (ret < 0) + return ret; + if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) @@ -1320,10 +1324,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + ret = sk_wait_event(sk, &timeo, + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), + &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1852,6 +1856,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; + int ret; timeo = sock_rcvtimeo(sk, nonblock); @@ -1861,14 +1866,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); - sk_wait_event(sk, &timeo, - !READ_ONCE(ctx->reader_present), &wait); + ret = sk_wait_event(sk, &timeo, + !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); + if (ret < 0) + return ret; } WRITE_ONCE(ctx->reader_present, 1); -- cgit From c68681ae46eaaa1640b52fe366d21a93b2185df5 Mon Sep 17 00:00:00 2001 From: Albert Huang Date: Wed, 11 Oct 2023 15:48:51 +0800 Subject: net/smc: fix smc clc failed issue when netdevice not in init_net If the netdevice is within a container and communicates externally through network technologies such as VxLAN, we won't be able to find routing information in the init_net namespace. To address this issue, we need to add a struct net parameter to the smc_ib_find_route function. This allow us to locate the routing information within the corresponding net namespace, ensuring the correct completion of the SMC CLC interaction. Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Albert Huang Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- net/smc/smc_ib.c | 7 ++++--- net/smc/smc_ib.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bacdd971615e..7a874da90c7f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = smc_get_clc_first_contact_ext(clc_v2, false); + struct net *net = sock_net(&smc->sk); int rc; if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) @@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); ini->smcrv2.uses_gateway = false; } else { - if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, + if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr, smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), ini->smcrv2.nexthop_mac, &ini->smcrv2.uses_gateway)) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9b66d6aeeb1a..89981dbe46c9 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway) { struct neighbour *neigh = NULL; @@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr, if (daddr == cpu_to_be32(INADDR_NONE)) goto out; - rt = ip_route_output_flow(&init_net, &fl4, NULL); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) @@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { struct in_device *in_dev = __in_dev_get_rcu(ndev); + struct net *net = dev_net(ndev); const struct in_ifaddr *ifa; bool subnet_match = false; @@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, } if (!subnet_match) goto out; - if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, + if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr, smcrv2->daddr, smcrv2->nexthop_mac, &smcrv2->uses_gateway)) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 4df5f8c8a0a1..ef8ac2b7546d 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index, struct smc_init_info_smcrv2 *smcrv2); -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); -- cgit From 0f4d44f6ee048818abf80c69b6bc48927c178abe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Oct 2023 13:58:11 +0200 Subject: netlink: specs: devlink: fix reply command values Make sure that the command values used for replies are correct. This is only affecting generated userspace helpers, no change on kernel code. Fixes: 7199c86247e9 ("netlink: specs: devlink: add commands that do per-instance dump") Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20231012115811.298129-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 18 +++++------ tools/net/ynl/generated/devlink-user.c | 54 ++++++++++++++++---------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index d1ebcd927149..065661acb878 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -323,7 +323,7 @@ operations: - dev-name - sb-index reply: &sb-get-reply - value: 11 + value: 13 attributes: *sb-id-attrs dump: request: @@ -350,7 +350,7 @@ operations: - sb-index - sb-pool-index reply: &sb-pool-get-reply - value: 15 + value: 17 attributes: *sb-pool-id-attrs dump: request: @@ -378,7 +378,7 @@ operations: - sb-index - sb-pool-index reply: &sb-port-pool-get-reply - value: 19 + value: 21 attributes: *sb-port-pool-id-attrs dump: request: @@ -407,7 +407,7 @@ operations: - sb-pool-type - sb-tc-index reply: &sb-tc-pool-bind-get-reply - value: 23 + value: 25 attributes: *sb-tc-pool-bind-id-attrs dump: request: @@ -538,7 +538,7 @@ operations: - dev-name - trap-name reply: &trap-get-reply - value: 61 + value: 63 attributes: *trap-id-attrs dump: request: @@ -564,7 +564,7 @@ operations: - dev-name - trap-group-name reply: &trap-group-get-reply - value: 65 + value: 67 attributes: *trap-group-id-attrs dump: request: @@ -590,7 +590,7 @@ operations: - dev-name - trap-policer-id reply: &trap-policer-get-reply - value: 69 + value: 71 attributes: *trap-policer-id-attrs dump: request: @@ -617,7 +617,7 @@ operations: - port-index - rate-node-name reply: &rate-get-reply - value: 74 + value: 76 attributes: *rate-id-attrs dump: request: @@ -643,7 +643,7 @@ operations: - dev-name - linecard-index reply: &linecard-get-reply - value: 78 + value: 80 attributes: *linecard-id-attrs dump: request: diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 3a8d8499fab6..2cb2518500cb 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -16,19 +16,19 @@ static const char * const devlink_op_strmap[] = { [3] = "get", [7] = "port-get", - [DEVLINK_CMD_SB_GET] = "sb-get", - [DEVLINK_CMD_SB_POOL_GET] = "sb-pool-get", - [DEVLINK_CMD_SB_PORT_POOL_GET] = "sb-port-pool-get", - [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = "sb-tc-pool-bind-get", + [13] = "sb-get", + [17] = "sb-pool-get", + [21] = "sb-port-pool-get", + [25] = "sb-tc-pool-bind-get", [DEVLINK_CMD_PARAM_GET] = "param-get", [DEVLINK_CMD_REGION_GET] = "region-get", [DEVLINK_CMD_INFO_GET] = "info-get", [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", - [DEVLINK_CMD_TRAP_GET] = "trap-get", - [DEVLINK_CMD_TRAP_GROUP_GET] = "trap-group-get", - [DEVLINK_CMD_TRAP_POLICER_GET] = "trap-policer-get", - [DEVLINK_CMD_RATE_GET] = "rate-get", - [DEVLINK_CMD_LINECARD_GET] = "linecard-get", + [63] = "trap-get", + [67] = "trap-group-get", + [71] = "trap-policer-get", + [76] = "rate-get", + [80] = "linecard-get", [DEVLINK_CMD_SELFTESTS_GET] = "selftests-get", }; @@ -838,7 +838,7 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_GET; + yrs.rsp_cmd = 13; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -876,7 +876,7 @@ devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_get_list); yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_GET; + yds.rsp_cmd = 13; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); @@ -987,7 +987,7 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yrs.rsp_cmd = 17; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1026,7 +1026,7 @@ devlink_sb_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yds.rsp_cmd = 17; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); @@ -1147,7 +1147,7 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yrs.rsp_cmd = 21; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1187,7 +1187,7 @@ devlink_sb_port_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yds.rsp_cmd = 21; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); @@ -1316,7 +1316,7 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yrs.rsp_cmd = 25; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1356,7 +1356,7 @@ devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yds.rsp_cmd = 25; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); @@ -2183,7 +2183,7 @@ devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yrs.rsp_cmd = 63; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2223,7 +2223,7 @@ devlink_trap_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_get_list); yds.cb = devlink_trap_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yds.rsp_cmd = 63; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); @@ -2336,7 +2336,7 @@ devlink_trap_group_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_group_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yrs.rsp_cmd = 67; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2376,7 +2376,7 @@ devlink_trap_group_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_group_get_list); yds.cb = devlink_trap_group_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yds.rsp_cmd = 67; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); @@ -2483,7 +2483,7 @@ devlink_trap_policer_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_policer_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yrs.rsp_cmd = 71; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2523,7 +2523,7 @@ devlink_trap_policer_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list); yds.cb = devlink_trap_policer_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yds.rsp_cmd = 71; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); @@ -2642,7 +2642,7 @@ devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_rate_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RATE_GET; + yrs.rsp_cmd = 76; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2682,7 +2682,7 @@ devlink_rate_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_rate_get_list); yds.cb = devlink_rate_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_RATE_GET; + yds.rsp_cmd = 76; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); @@ -2786,7 +2786,7 @@ devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_linecard_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yrs.rsp_cmd = 80; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2825,7 +2825,7 @@ devlink_linecard_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_linecard_get_list); yds.cb = devlink_linecard_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yds.rsp_cmd = 80; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); -- cgit From a258c804aa8742763dce694b5e992d7ccf4294f2 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Thu, 12 Oct 2023 08:31:44 -0400 Subject: docs: fix info about representor identification Update the "How are representors identified?" documentation subchapter. For newer kernels driver should use SET_NETDEV_DEVLINK_PORT instead of ndo_get_devlink_port() callback. Fixes: 7712b3e966ea ("Merge branch 'net-fix-netdev-to-devlink_port-linkage-and-expose-to-user'") Signed-off-by: Mateusz Polchlopek Reviewed-by: Wojciech Drewek Reviewed-by: Przemek Kitszel Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20231012123144.15768-1-mateusz.polchlopek@intel.com Signed-off-by: Jakub Kicinski --- Documentation/networking/representors.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index ee1f5cd54496..decb39c19b9e 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -162,9 +162,11 @@ How are representors identified? The representor netdevice should *not* directly refer to a PCIe device (e.g. through ``net_dev->dev.parent`` / ``SET_NETDEV_DEV()``), either of the representee or of the switchdev function. -Instead, it should implement the ``ndo_get_devlink_port()`` netdevice op, which -the kernel uses to provide the ``phys_switch_id`` and ``phys_port_name`` sysfs -nodes. (Some legacy drivers implement ``ndo_get_port_parent_id()`` and +Instead, the driver should use the ``SET_NETDEV_DEVLINK_PORT`` macro to +assign a devlink port instance to the netdevice before registering the +netdevice; the kernel uses the devlink port to provide the ``phys_switch_id`` +and ``phys_port_name`` sysfs nodes. +(Some legacy drivers implement ``ndo_get_port_parent_id()`` and ``ndo_get_phys_port_name()`` directly, but this is deprecated.) See :ref:`Documentation/networking/devlink/devlink-port.rst ` for the details of this API. -- cgit From 2c0d808f36cc6e0617f9dda055a6651c777a9d64 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 12 Oct 2023 12:16:26 +0530 Subject: net: ti: icssg-prueth: Fix tx_total_bytes count ICSSG HW stats on TX side considers 8 preamble bytes as data bytes. Due to this the tx_bytes of ICSSG interface doesn't match the rx_bytes of the link partner. There is no public errata available yet. As a workaround to fix this, decrease tx_bytes by 8 bytes for every tx frame. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20231012064626.977466-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index bb0b33927e3b..3dbadddd7e35 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -9,6 +9,9 @@ #include "icssg_stats.h" #include +#define ICSSG_TX_PACKET_OFFSET 0xA0 +#define ICSSG_TX_BYTE_OFFSET 0xEC + static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ }; @@ -18,6 +21,7 @@ void emac_update_hardware_stats(struct prueth_emac *emac) struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); u32 base = stats_base[slice]; + u32 tx_pkt_cnt = 0; u32 val; int i; @@ -29,7 +33,12 @@ void emac_update_hardware_stats(struct prueth_emac *emac) base + icssg_all_stats[i].offset, val); + if (icssg_all_stats[i].offset == ICSSG_TX_PACKET_OFFSET) + tx_pkt_cnt = val; + emac->stats[i] += val; + if (icssg_all_stats[i].offset == ICSSG_TX_BYTE_OFFSET) + emac->stats[i] -= tx_pkt_cnt * 8; } } -- cgit From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 11 Oct 2023 16:33:32 -0700 Subject: i40e: prevent crash on probe if hw registers have invalid values The hardware provides the indexes of the first and the last available queue and VF. From the indexes, the driver calculates the numbers of queues and VFs. In theory, a faulty device might say the last index is smaller than the first index. In that case, the driver's calculation would underflow, it would attempt to write to non-existent registers outside of the ioremapped range and crash. I ran into this not by having a faulty device, but by an operator error. I accidentally ran a QE test meant for i40e devices on an ice device. The test used 'echo i40e > /sys/...ice PCI device.../driver_override', bound the driver to the device and crashed in one of the wr32 calls in i40e_clear_hw. Add checks to prevent underflows in the calculations of num_queues and num_vfs. With this fix, the wrong device probing reports errors and returns a failure without crashing. Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index eeef20f77106..1b493854f522 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> I40E_PFLAN_QALLOC_LASTQ_SHIFT; - if (val & I40E_PFLAN_QALLOC_VALID_MASK) + if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) num_queues = (j - base_queue) + 1; else num_queues = 0; @@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> I40E_PF_VT_PFALLOC_LASTVF_SHIFT; - if (val & I40E_PF_VT_PFALLOC_VALID_MASK) + if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) num_vfs = (j - i) + 1; else num_vfs = 0; -- cgit From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 11 Oct 2023 16:33:33 -0700 Subject: ice: reset first in crash dump kernels When the system boots into the crash dump kernel after a panic, the ice networking device may still have pending transactions that can cause errors or machine checks when the device is re-enabled. This can prevent the crash dump kernel from loading the driver or collecting the crash data. To avoid this issue, perform a function level reset (FLR) on the ice device via PCIe config space before enabling it on the crash kernel. This will clear any outstanding transactions and stop all queues and interrupts. Restore the config space after the FLR, otherwise it was found in testing that the driver wouldn't load successfully. The following sequence causes the original issue: - Load the ice driver with modprobe ice - Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs - Trigger a crash with echo c > /proc/sysrq-trigger - Load the ice driver again (or let it load automatically) with modprobe ice - The system crashes again during pcim_enable_device() Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series") Reported-by: Vishal Agrawal Reviewed-by: Jay Vosburgh Reviewed-by: Przemek Kitszel Signed-off-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c8286adae946..6550c46e4e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "ice.h" #include "ice_base.h" #include "ice_lib.h" @@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return -EINVAL; } + /* when under a kdump kernel initiate a reset before enabling the + * device in order to clear out any pending DMA transactions. These + * transactions can cause some systems to machine check when doing + * the pcim_enable_device() below. + */ + if (is_kdump_kernel()) { + pci_save_state(pdev); + pci_clear_master(pdev); + err = pcie_flr(pdev); + if (err) + return err; + pci_restore_state(pdev); + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ -- cgit From 42066c4d5d344cdf8564556cdbe0aa36854fefa4 Mon Sep 17 00:00:00 2001 From: Mateusz Pacuszka Date: Wed, 11 Oct 2023 16:33:34 -0700 Subject: ice: Fix safe mode when DDP is missing One thing is broken in the safe mode, that is ice_deinit_features() is being executed even that ice_init_features() was not causing stack trace during pci_unregister_driver(). Add check on the top of the function. Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Mateusz Pacuszka Signed-off-by: Jan Sokolowski Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-4-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6550c46e4e36..7784135160fd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4684,6 +4684,9 @@ static void ice_init_features(struct ice_pf *pf) static void ice_deinit_features(struct ice_pf *pf) { + if (ice_is_safe_mode(pf)) + return; + ice_deinit_lag(pf); if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) ice_cfg_lldp_mib_change(&pf->hw, false); -- cgit From e2cb5cc822b6c9ee72c56ce1d81671b22c05406a Mon Sep 17 00:00:00 2001 From: Jeffery Miller Date: Fri, 13 Oct 2023 15:23:49 -0700 Subject: Input: psmouse - fix fast_reconnect function for PS/2 mode When the SMBus connection is attempted psmouse_smbus_init() sets the fast_reconnect pointer to psmouse_smbus_reconnecti(). If SMBus initialization fails, elantech_setup_ps2() and synaptics_init_ps2() will fallback to PS/2 mode, replacing the psmouse private data. This can cause issues on resume, since psmouse_smbus_reconnect() expects to find an instance of struct psmouse_smbus_dev in psmouse->private. The issue was uncovered when in 92e24e0e57f7 ("Input: psmouse - add delay when deactivating for SMBus mode") psmouse_smbus_reconnect() started attempting to use more of the data structure. The commit was since reverted, not because it was at fault, but because there was found a better way of doing what it was attempting to do. Fix the problem by resetting the fast_reconnect pointer in psmouse structure in elantech_setup_ps2() and synaptics_init_ps2() when the PS/2 mode is used. Reported-by: Thorsten Leemhuis Tested-by: Thorsten Leemhuis Signed-off-by: Jeffery Miller Fixes: bf232e460a35 ("Input: psmouse-smbus - allow to control psmouse_deactivate") Link: https://lore.kernel.org/r/20231005002249.554877-1-jefferymiller@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 1 + drivers/input/mouse/synaptics.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 2118b2075f43..4e38229404b4 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -2114,6 +2114,7 @@ static int elantech_setup_ps2(struct psmouse *psmouse, psmouse->protocol_handler = elantech_process_byte; psmouse->disconnect = elantech_disconnect; psmouse->reconnect = elantech_reconnect; + psmouse->fast_reconnect = NULL; psmouse->pktsize = info->hw_version > 1 ? 6 : 4; return 0; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ada299ec5bba..cefc74b3b34b 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1623,6 +1623,7 @@ static int synaptics_init_ps2(struct psmouse *psmouse, psmouse->set_rate = synaptics_set_rate; psmouse->disconnect = synaptics_disconnect; psmouse->reconnect = synaptics_reconnect; + psmouse->fast_reconnect = NULL; psmouse->cleanup = synaptics_reset; /* Synaptics can usually stay in sync without extra help */ psmouse->resync_time = 0; -- cgit From 5030b2fe6aab37fe42d14f31842ea38be7c55c57 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Oct 2023 17:29:57 -0700 Subject: Input: synaptics-rmi4 - handle reset delay when using SMBus trsnsport Touch controllers need some time after receiving reset command for the firmware to finish re-initializing and be ready to respond to commands from the host. The driver already had handling for the post-reset delay for I2C and SPI transports, this change adds the handling to SMBus-connected devices. SMBus devices are peculiar because they implement legacy PS/2 compatibility mode, so reset is actually issued by psmouse driver on the associated serio port, after which the control is passed to the RMI4 driver with SMBus companion device. Note that originally the delay was added to psmouse driver in 92e24e0e57f7 ("Input: psmouse - add delay when deactivating for SMBus mode"), but that resulted in an unwanted delay in "fast" reconnect handler for the serio port, so it was decided to revert the patch and have the delay being handled in the RMI4 driver, similar to the other transports. Tested-by: Jeffery Miller Link: https://lore.kernel.org/r/ZR1yUFJ8a9Zt606N@penguin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + drivers/input/rmi4/rmi_smbus.c | 50 +++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index cefc74b3b34b..22d16d80efb9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1753,6 +1753,7 @@ static int synaptics_create_intertouch(struct psmouse *psmouse, psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) && !SYN_CAP_EXT_BUTTONS_STICK(info->ext_cap_10); const struct rmi_device_platform_data pdata = { + .reset_delay_ms = 30, .sensor_pdata = { .sensor_type = rmi_sensor_touchpad, .axis_align.flip_y = true, diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index 7059a2762aeb..b0b099b5528a 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -235,12 +235,29 @@ static void rmi_smb_clear_state(struct rmi_smb_xport *rmi_smb) static int rmi_smb_enable_smbus_mode(struct rmi_smb_xport *rmi_smb) { - int retval; + struct i2c_client *client = rmi_smb->client; + int smbus_version; + + /* + * psmouse driver resets the controller, we only need to wait + * to give the firmware chance to fully reinitialize. + */ + if (rmi_smb->xport.pdata.reset_delay_ms) + msleep(rmi_smb->xport.pdata.reset_delay_ms); /* we need to get the smbus version to activate the touchpad */ - retval = rmi_smb_get_version(rmi_smb); - if (retval < 0) - return retval; + smbus_version = rmi_smb_get_version(rmi_smb); + if (smbus_version < 0) + return smbus_version; + + rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", + smbus_version); + + if (smbus_version != 2 && smbus_version != 3) { + dev_err(&client->dev, "Unrecognized SMB version %d\n", + smbus_version); + return -ENODEV; + } return 0; } @@ -253,11 +270,10 @@ static int rmi_smb_reset(struct rmi_transport_dev *xport, u16 reset_addr) rmi_smb_clear_state(rmi_smb); /* - * we do not call the actual reset command, it has to be handled in - * PS/2 or there will be races between PS/2 and SMBus. - * PS/2 should ensure that a psmouse_reset is called before - * intializing the device and after it has been removed to be in a known - * state. + * We do not call the actual reset command, it has to be handled in + * PS/2 or there will be races between PS/2 and SMBus. PS/2 should + * ensure that a psmouse_reset is called before initializing the + * device and after it has been removed to be in a known state. */ return rmi_smb_enable_smbus_mode(rmi_smb); } @@ -272,7 +288,6 @@ static int rmi_smb_probe(struct i2c_client *client) { struct rmi_device_platform_data *pdata = dev_get_platdata(&client->dev); struct rmi_smb_xport *rmi_smb; - int smbus_version; int error; if (!pdata) { @@ -311,18 +326,9 @@ static int rmi_smb_probe(struct i2c_client *client) rmi_smb->xport.proto_name = "smb"; rmi_smb->xport.ops = &rmi_smb_ops; - smbus_version = rmi_smb_get_version(rmi_smb); - if (smbus_version < 0) - return smbus_version; - - rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", - smbus_version); - - if (smbus_version != 2 && smbus_version != 3) { - dev_err(&client->dev, "Unrecognized SMB version %d\n", - smbus_version); - return -ENODEV; - } + error = rmi_smb_enable_smbus_mode(rmi_smb); + if (error) + return error; i2c_set_clientdata(client, rmi_smb); -- cgit From a65cd7ef5a864bdbbe037267c327786b7759d4c6 Mon Sep 17 00:00:00 2001 From: Matthias Berndt Date: Fri, 13 Oct 2023 15:04:36 -0700 Subject: Input: xpad - add PXN V900 support Add VID and PID to the xpad_device table to allow driver to use the PXN V900 steering wheel, which is XTYPE_XBOX360 compatible in xinput mode. Signed-off-by: Matthias Berndt Link: https://lore.kernel.org/r/4932699.31r3eYUQgx@fedora Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index b22f9c6e7fa5..f5c21565bb3c 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -273,6 +273,7 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, + { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -479,6 +480,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries controllers */ XPAD_XBOXONE_VENDOR(0x10f5), /* Turtle Beach Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ + XPAD_XBOX360_VENDOR(0x11ff), /* PXN V900 */ XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* Xbox 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane Xbox 360 controllers */ -- cgit From b541260615f601ae1b5d6d0cc54e790de706303b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 13:59:59 -0700 Subject: Bluetooth: hci_event: Fix using memcmp when comparing keys memcmp is not consider safe to use with cryptographic secrets: 'Do not use memcmp() to compare security critical data, such as cryptographic secrets, because the required CPU time depends on the number of equal bytes.' While usage of memcmp for ZERO_KEY may not be considered a security critical data, it can lead to more usage of memcmp with pairing keys which could introduce more security problems. Fixes: 455c2ff0a558 ("Bluetooth: Fix BR/EDR out-of-band pairing with only initiator data") Fixes: 33155c4aae52 ("Bluetooth: hci_event: Ignore NULL link key") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7e1c875e0e88..9b252a65a7cd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -26,6 +26,8 @@ /* Bluetooth HCI event handling. */ #include +#include +#include #include #include @@ -4754,7 +4756,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, goto unlock; /* Ignore NULL link key against CVE-2020-26555 */ - if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", &ev->bdaddr); hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); @@ -5294,8 +5296,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * available, then do not declare that OOB data is * present. */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand256, ZERO_KEY, 16) || + !crypto_memneq(data->hash256, ZERO_KEY, 16)) return 0x00; return 0x02; @@ -5305,8 +5307,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * not supported by the hardware, then check that if * P-192 data values are present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand192, ZERO_KEY, 16) || + !crypto_memneq(data->hash192, ZERO_KEY, 16)) return 0x00; return 0x01; -- cgit From 35d91d95a0cd61ebb90e0246dc917fd25e519b8c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 14:12:19 -0700 Subject: Bluetooth: hci_event: Fix coding style This fixes the following code style problem: ERROR: that open brace { should be on the previous line + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 9b252a65a7cd..7cb41ac1c258 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,8 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) - { + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); -- cgit From 9ee252868787ab5a26012d69e335c7371f54563a Mon Sep 17 00:00:00 2001 From: Max Chou Date: Fri, 6 Oct 2023 10:47:07 +0800 Subject: Bluetooth: btrtl: Ignore error return for hci_devcd_register() If CONFIG_DEV_COREDUMP was not set, it would return -EOPNOTSUPP for hci_devcd_register(). In this commit, ignore error return for hci_devcd_register(). Otherwise Bluetooth initialization will be failed. Fixes: 044014ce85a1 ("Bluetooth: btrtl: Add Realtek devcoredump support") Cc: stable@vger.kernel.org Reported-by: Kirill A. Shutemov Closes: https://lore.kernel.org/all/ZRyqIn0_qqEFBPdy@debian.me/T/ Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84c2c2e1122f..277d039ecbb4 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -962,13 +962,10 @@ static void btrtl_dmp_hdr(struct hci_dev *hdev, struct sk_buff *skb) skb_put_data(skb, buf, strlen(buf)); } -static int btrtl_register_devcoredump_support(struct hci_dev *hdev) +static void btrtl_register_devcoredump_support(struct hci_dev *hdev) { - int err; + hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - err = hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - - return err; } void btrtl_set_driver_name(struct hci_dev *hdev, const char *driver_name) @@ -1255,8 +1252,7 @@ int btrtl_download_firmware(struct hci_dev *hdev, } done: - if (!err) - err = btrtl_register_devcoredump_support(hdev); + btrtl_register_devcoredump_support(hdev); return err; } -- cgit From 18f547f3fc074500ab5d419cf482240324e73a7e Mon Sep 17 00:00:00 2001 From: Edward AD Date: Tue, 10 Oct 2023 13:36:57 +0800 Subject: Bluetooth: hci_sock: fix slab oob read in create_monitor_event When accessing hdev->name, the actual string length should prevail Reported-by: syzbot+c90849c50ed209d77689@syzkaller.appspotmail.com Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Edward AD Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5e4f718073b7..72abe54c45dd 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, 8); + memcpy(ni->name, hdev->name, strlen(hdev->name)); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; -- cgit From 9d1a3c74746428102d55371fbf74b484733937d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Oct 2023 22:31:31 +0200 Subject: Bluetooth: avoid memcmp() out of bounds warning bacmp() is a wrapper around memcpy(), which contain compile-time checks for buffer overflow. Since the hci_conn_request_evt() also calls bt_dev_dbg() with an implicit NULL pointer check, the compiler is now aware of a case where 'hdev' is NULL and treats this as meaning that zero bytes are available: In file included from net/bluetooth/hci_event.c:32: In function 'bacmp', inlined from 'hci_conn_request_evt' at net/bluetooth/hci_event.c:3276:7: include/net/bluetooth/bluetooth.h:364:16: error: 'memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 364 | return memcmp(ba1, ba2, sizeof(bdaddr_t)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Add another NULL pointer check before the bacmp() to ensure the compiler understands the code flow enough to not warn about it. Since the patch that introduced the warning is marked for stable backports, this one should also go that way to avoid introducing build regressions. Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Cc: Kees Cook Cc: "Lee, Chun-Yi" Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7cb41ac1c258..1e1c9147356c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,7 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { + if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); -- cgit From cb3871b1cd135a6662b732fbc6b3db4afcdb4a64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Oct 2023 09:31:44 -0700 Subject: Bluetooth: hci_sock: Correctly bounds check and pad HCI_MON_NEW_INDEX name The code pattern of memcpy(dst, src, strlen(src)) is almost always wrong. In this case it is wrong because it leaves memory uninitialized if it is less than sizeof(ni->name), and overflows ni->name when longer. Normally strtomem_pad() could be used here, but since ni->name is a trailing array in struct hci_mon_new_index, compilers that don't support -fstrict-flex-arrays=3 can't tell how large this array is via __builtin_object_size(). Instead, open-code the helper and use sizeof() since it will work correctly. Additionally mark ni->name as __nonstring since it appears to not be a %NUL terminated C string. Cc: Luiz Augusto von Dentz Cc: Edward AD Cc: Marcel Holtmann Cc: Johan Hedberg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Fixes: 18f547f3fc07 ("Bluetooth: hci_sock: fix slab oob read in create_monitor_event") Link: https://lore.kernel.org/lkml/202310110908.F2639D3276@keescook/ Signed-off-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_mon.h | 2 +- net/bluetooth/hci_sock.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2d5fcda1bcd0..082f89531b88 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -56,7 +56,7 @@ struct hci_mon_new_index { __u8 type; __u8 bus; bdaddr_t bdaddr; - char name[8]; + char name[8] __nonstring; } __packed; #define HCI_MON_NEW_INDEX_SIZE 16 diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 72abe54c45dd..3e7cd330d731 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, strlen(hdev->name)); + memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, + strnlen(hdev->name, sizeof(ni->name)), '\0'); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; -- cgit From 5c15c60e7be615f05a45cd905093a54b11f461bc Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Fri, 13 Oct 2023 20:11:33 -0700 Subject: Input: powermate - fix use-after-free in powermate_config_complete syzbot has found a use-after-free bug [1] in the powermate driver. This happens when the device is disconnected, which leads to a memory free from the powermate_device struct. When an asynchronous control message completes after the kfree and its callback is invoked, the lock does not exist anymore and hence the bug. Use usb_kill_urb() on pm->config to cancel any in-progress requests upon device disconnection. [1] https://syzkaller.appspot.com/bug?extid=0434ac83f907a1dbdd1e Signed-off-by: Javier Carrasco Reported-by: syzbot+0434ac83f907a1dbdd1e@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20230916-topic-powermate_use_after_free-v3-1-64412b81a7a2@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/powermate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c index c1c733a9cb89..db2ba89adaef 100644 --- a/drivers/input/misc/powermate.c +++ b/drivers/input/misc/powermate.c @@ -425,6 +425,7 @@ static void powermate_disconnect(struct usb_interface *intf) pm->requires_update = 0; usb_kill_urb(pm->irq); input_unregister_device(pm->input); + usb_kill_urb(pm->config); usb_free_urb(pm->irq); usb_free_urb(pm->config); powermate_free_buffers(interface_to_usbdev(intf), pm); -- cgit From 32db510708507f6133f496ff385cbd841d8f9098 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 11 Oct 2023 17:07:03 +0300 Subject: ovl: fix regression in showing lowerdir mount option Before commit b36a5780cb44 ("ovl: modify layer parameter parsing"), spaces and commas in lowerdir mount option value used to be escaped using seq_show_option(). In current upstream, when lowerdir value has a space, it is not escaped in /proc/mounts, e.g.: none /mnt overlay rw,relatime,lowerdir=l l,upperdir=u,workdir=w 0 0 which results in broken output of the mount utility: none on /mnt type overlay (rw,relatime,lowerdir=l) Store the original lowerdir mount options before unescaping and show them using the same escaping used for seq_show_option() in addition to escaping the colon separator character. Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") Signed-off-by: Amir Goldstein --- Documentation/filesystems/overlayfs.rst | 12 +++++++++++ fs/overlayfs/params.c | 38 ++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index cdefbe73d85c..5b93268e400f 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -339,6 +339,18 @@ The specified lower directories will be stacked beginning from the rightmost one and going left. In the above example lower1 will be the top, lower2 the middle and lower3 the bottom layer. +Note: directory names containing colons can be provided as lower layer by +escaping the colons with a single backslash. For example: + + mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged + +Since kernel version v6.5, directory names containing colons can also +be provided as lower layer using the fsconfig syscall from new mount api: + + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); + +In the latter case, colons in lower layer directory names will be escaped +as an octal characters (\072) when displayed in /proc/self/mountinfo. Metadata only copy up --------------------- diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 17c74ef4f089..8b6bae320e8a 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -192,7 +192,8 @@ static ssize_t ovl_parse_param_split_lowerdirs(char *str) for (s = d = str;; s++, d++) { if (*s == '\\') { - s++; + /* keep esc chars in split lowerdir */ + *d++ = *s++; } else if (*s == ':') { bool next_colon = (*(s + 1) == ':'); @@ -267,7 +268,7 @@ static void ovl_unescape(char *s) } } -static int ovl_mount_dir(const char *name, struct path *path) +static int ovl_mount_dir(const char *name, struct path *path, bool upper) { int err = -ENOMEM; char *tmp = kstrdup(name, GFP_KERNEL); @@ -276,7 +277,7 @@ static int ovl_mount_dir(const char *name, struct path *path) ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { + if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) { pr_err("filesystem on '%s' not supported as upperdir\n", tmp); path_put_init(path); @@ -297,7 +298,7 @@ static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc, struct path path; char *dup; - err = ovl_mount_dir(name, &path); + err = ovl_mount_dir(name, &path, true); if (err) return err; @@ -500,7 +501,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) l = &ctx->lower[nr]; memset(l, 0, sizeof(*l)); - err = ovl_mount_dir_noesc(dup_iter, &l->path); + err = ovl_mount_dir(dup_iter, &l->path, false); if (err) goto out_put; @@ -979,16 +980,23 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; - char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower]; - - /* lowerdirs[] starts from offset 1 */ - seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]); - /* dump regular lower layers */ - for (nr = 2; nr < nr_merged_lower; nr++) - seq_printf(m, ":%s", ofs->config.lowerdirs[nr]); - /* dump data lower layers */ - for (nr = 0; nr < ofs->numdatalayer; nr++) - seq_printf(m, "::%s", lowerdatadirs[nr]); + + /* + * lowerdirs[] starts from offset 1, then + * >= 0 regular lower layers prefixed with : and + * >= 0 data-only lower layers prefixed with :: + * + * we need to escase comma and space like seq_show_option() does and + * we also need to escape the colon separator from lowerdir paths. + */ + seq_puts(m, ",lowerdir="); + for (nr = 1; nr < ofs->numlayer; nr++) { + if (nr > 1) + seq_putc(m, ':'); + if (nr >= nr_merged_lower) + seq_putc(m, ':'); + seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\"); + } if (ofs->config.upperdir) { seq_show_option(m, "upperdir", ofs->config.upperdir); seq_show_option(m, "workdir", ofs->config.workdir); -- cgit From beae836e9c61ee039e367a94b14f7fea08f0ad4c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 14 Oct 2023 22:30:04 +0300 Subject: ovl: temporarily disable appending lowedirs Kernel v6.5 converted overlayfs to new mount api. As an added bonus, it also added a feature to allow appending lowerdirs using lowerdir=:/lower2,lowerdir=::/data3 syntax. This new syntax has raised some concerns regarding escaping of colons. We decided to try and disable this syntax, which hasn't been in the wild for so long and introduce it again in 6.7 using explicit mount options lowerdir+=/lower2,datadir+=/data3. Suggested-by: Miklos Szeredi Link: https://lore.kernel.org/r/CAJfpegsr3A4YgF2YBevWa6n3=AcP7hNndG6EPMu3ncvV-AM71A@mail.gmail.com/ Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") Signed-off-by: Amir Goldstein --- fs/overlayfs/params.c | 52 +++------------------------------------------------ 1 file changed, 3 insertions(+), 49 deletions(-) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8b6bae320e8a..f6ff23fd101c 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -350,12 +350,6 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) * Set "/lower1", "/lower2", and "/lower3" as lower layers and * "/data1" and "/data2" as data lower layers. Any existing lower * layers are replaced. - * (2) lowerdir=:/lower4 - * Append "/lower4" to current stack of lower layers. This requires - * that there already is at least one lower layer configured. - * (3) lowerdir=::/lower5 - * Append data "/lower5" as data lower layer. This requires that - * there's at least one regular lower layer present. */ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) { @@ -377,49 +371,9 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) return 0; } - if (strncmp(name, "::", 2) == 0) { - /* - * This is a data layer. - * There must be at least one regular lower layer - * specified. - */ - if (ctx->nr == 0) { - pr_err("data lower layers without regular lower layers not allowed"); - return -EINVAL; - } - - /* Skip the leading "::". */ - name += 2; - data_layer = true; - /* - * A data layer is automatically an append as there - * must've been at least one regular lower layer. - */ - append = true; - } else if (*name == ':') { - /* - * This is a regular lower layer. - * If users want to append a layer enforce that they - * have already specified a first layer before. It's - * better to be strict. - */ - if (ctx->nr == 0) { - pr_err("cannot append layer if no previous layer has been specified"); - return -EINVAL; - } - - /* - * Once a sequence of data layers has started regular - * lower layers are forbidden. - */ - if (ctx->nr_data > 0) { - pr_err("regular lower layers cannot follow data lower layers"); - return -EINVAL; - } - - /* Skip the leading ":". */ - name++; - append = true; + if (*name == ':') { + pr_err("cannot append lower layer"); + return -EINVAL; } dup = kstrdup(name, GFP_KERNEL); -- cgit From ff9e8f41513669e290f6e1904e1bc75950584491 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2023 22:28:49 +1000 Subject: powerpc/mm: Allow ARCH_FORCE_MAX_ORDER up to 12 Christophe reported that the change to ARCH_FORCE_MAX_ORDER to limit the range to 10 had broken his ability to configure hugepages: # echo 1 > /sys/kernel/mm/hugepages/hugepages-8192kB/nr_hugepages sh: write error: Invalid argument Several of the powerpc defconfigs previously set the ARCH_FORCE_MAX_ORDER value to 12, via the definition in arch/powerpc/configs/fsl-emb-nonhw.config, used by: mpc85xx_defconfig mpc85xx_smp_defconfig corenet32_smp_defconfig corenet64_smp_defconfig mpc86xx_defconfig mpc86xx_smp_defconfig Fix it by increasing the allowed range to 12 to restore the previous behaviour. Fixes: 358e526a1648 ("powerpc/mm: Reinstate ARCH_FORCE_MAX_ORDER ranges") Reported-by: Christophe Leroy Closes: https://lore.kernel.org/all/8011d806-5b30-bf26-2bfe-a08c39d57e20@csgroup.eu/ Tested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230824122849.942072-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3aaadfd2c8eb..d5d5388973ac 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -910,7 +910,7 @@ config ARCH_FORCE_MAX_ORDER default "6" if PPC32 && PPC_64K_PAGES range 4 10 if PPC32 && PPC_256K_PAGES default "4" if PPC32 && PPC_256K_PAGES - range 10 10 + range 10 12 default "10" help The kernel page allocator limits the size of maximal physically -- cgit From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 13 Oct 2023 18:48:12 +0530 Subject: qed: fix LL2 RX buffer allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver allocates the LL2 rx buffers from kmalloc() area to construct the skb using slab_build_skb() The required size allocation seems to have overlooked for accounting both skb_shared_info size and device placement padding bytes which results into the below panic when doing skb_put() for a standard MTU sized frame. skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514 head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566 dev: … skb_panic+0x48/0x4a skb_put.cold+0x10/0x10 qed_ll2b_complete_rx_packet+0x14f/0x260 [qed] qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed] qed_ll2_rxq_completion+0xba/0x320 [qed] qed_int_sp_dpc+0x1a7/0x1e0 [qed] This patch fixes this by accouting skb_shared_info and device placement padding size bytes when allocating the buffers. Cc: David S. Miller Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support") Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 717a0b3f89bd..ab5ef254a748 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt, static int qed_ll2_alloc_buffer(struct qed_dev *cdev, u8 **data, dma_addr_t *phys_addr) { - *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + size_t size = cdev->ll2->rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + *data = kmalloc(size, GFP_ATOMIC); if (!(*data)) { DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); return -ENOMEM; @@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) INIT_LIST_HEAD(&cdev->ll2->list); spin_lock_init(&cdev->ll2->lock); - cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN + L1_CACHE_BYTES + params->mtu; /* Allocate memory for LL2. -- cgit From 8a540e990d7da36813cb71a4a422712bfba448a4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 7 Oct 2023 01:14:21 -0400 Subject: btrfs: fix stripe length calculation for non-zoned data chunk allocation Commit f6fca3917b4d "btrfs: store chunk size in space-info struct" broke data chunk allocations on non-zoned multi-device filesystems when using default chunk_size. Commit 5da431b71d4b "btrfs: fix the max chunk size and stripe length calculation" partially fixed that, and this patch completes the fix for that case. After commit f6fca3917b4d and 5da431b71d4b, the sequence of events for a data chunk allocation on a non-zoned filesystem is: 1. btrfs_create_chunk calls init_alloc_chunk_ctl, which copies space_info->chunk_size (default 10 GiB) to ctl->max_stripe_len unmodified. Before f6fca3917b4d, ctl->max_stripe_len value was 1 GiB for non-zoned data chunks and not configurable. 2. btrfs_create_chunk calls gather_device_info which consumes and produces more fields of chunk_ctl. 3. gather_device_info multiplies ctl->max_stripe_len by ctl->dev_stripes (which is 1 in all cases except dup) and calls find_free_dev_extent with that number as num_bytes. 4. find_free_dev_extent locates the first dev_extent hole on a device which is at least as large as num_bytes. With default max_chunk_size from f6fca3917b4d, it finds the first hole which is longer than 10 GiB, or the largest hole if that hole is shorter than 10 GiB. This is different from the pre-f6fca3917b4d behavior, where num_bytes is 1 GiB, and find_free_dev_extent may choose a different hole. 5. gather_device_info repeats step 4 with all devices to find the first or largest dev_extent hole that can be allocated on each device. 6. gather_device_info sorts the device list by the hole size on each device, using total unallocated space on each device to break ties, then returns to btrfs_create_chunk with the list. 7. btrfs_create_chunk calls decide_stripe_size_regular. 8. decide_stripe_size_regular finds the largest stripe_len that fits across the first nr_devs device dev_extent holes that were found by gather_device_info (and satisfies other constraints on stripe_len that are not relevant here). 9. decide_stripe_size_regular caps the length of the stripe it computed at 1 GiB. This cap appeared in 5da431b71d4b to correct one of the other regressions introduced in f6fca3917b4d. 10. btrfs_create_chunk creates a new chunk with the above computed size and number of devices. At step 4, gather_device_info() has found a location where stripe up to 10 GiB in length could be allocated on several devices, and selected which devices should have a dev_extent allocated on them, but at step 9, only 1 GiB of the space that was found on each device can be used. This mismatch causes new suboptimal chunk allocation cases that did not occur in pre-f6fca3917b4d kernels. Consider a filesystem using raid1 profile with 3 devices. After some balances, device 1 has 10x 1 GiB unallocated space, while devices 2 and 3 have 1x 10 GiB unallocated space, i.e. the same total amount of space, but distributed across different numbers of dev_extent holes. For visualization, let's ignore all the chunks that were allocated before this point, and focus on the remaining holes: Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10x 1 GiB unallocated) Device 2: [__________] (10 GiB contig unallocated) Device 3: [__________] (10 GiB contig unallocated) Before f6fca3917b4d, the allocator would fill these optimally by allocating chunks with dev_extents on devices 1 and 2 ([12]), 1 and 3 ([13]), or 2 and 3 ([23]): [after 0 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [__________] (10 GiB) Device 3: [__________] (10 GiB) [after 1 chunk allocation] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] Device 2: [12] [_________] (9 GiB) Device 3: [__________] (10 GiB) [after 2 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [12] [_________] (9 GiB) Device 3: [13] [_________] (9 GiB) [after 3 chunk allocations] Device 1: [12] [13] [12] [_] [_] [_] [_] [_] [_] [_] (7 GiB) Device 2: [12] [12] [________] (8 GiB) Device 3: [13] [_________] (9 GiB) [...] [after 12 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [_] [_] (2 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [__] (2 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 13 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [_] (1 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 14 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [_] (1 GiB) [after 15 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [23] (full) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [23] (full) This allocates all of the space with no waste. The sorting function used by gather_device_info considers free space holes above 1 GiB in length to be equal to 1 GiB, so once find_free_dev_extent locates a sufficiently long hole on each device, all the holes appear equal in the sort, and the comparison falls back to sorting devices by total free space. This keeps usable space on each device equal so they can all be filled completely. After f6fca3917b4d, the allocator prefers the devices with larger holes over the devices with more free space, so it makes bad allocation choices: [after 1 chunk allocation] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [_________] (9 GiB) Device 3: [23] [_________] (9 GiB) [after 2 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [________] (8 GiB) Device 3: [23] [23] [________] (8 GiB) [after 3 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [_______] (7 GiB) Device 3: [23] [23] [23] [_______] (7 GiB) [...] [after 9 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 10 chunk allocations] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] (9 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 11 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [13] (full) No further allocations are possible, with 8 GiB wasted (4 GiB of data space). The sort in gather_device_info now considers free space in holes longer than 1 GiB to be distinct, so it will prefer devices 2 and 3 over device 1 until all but 1 GiB is allocated on devices 2 and 3. At that point, with only 1 GiB unallocated on every device, the largest hole length on each device is equal at 1 GiB, so the sort finally moves to ordering the devices with the most free space, but by this time it is too late to make use of the free space on device 1. Note that it's possible to contrive a case where the pre-f6fca3917b4d allocator fails the same way, but these cases generally have extensive dev_extent fragmentation as a precondition (e.g. many holes of 768M in length on one device, and few holes 1 GiB in length on the others). With the regression in f6fca3917b4d, bad chunk allocation can occur even under optimal conditions, when all dev_extent holes are exact multiples of stripe_len in length, as in the example above. Also note that post-f6fca3917b4d kernels do treat dev_extent holes larger than 10 GiB as equal, so the bad behavior won't show up on a freshly formatted filesystem; however, as the filesystem ages and fills up, and holes ranging from 1 GiB to 10 GiB in size appear, the problem can show up as a failure to balance after adding or removing devices, or an unexpected shortfall in available space due to unequal allocation. To fix the regression and make data chunk allocation work again, set ctl->max_stripe_len back to the original SZ_1G, or space_info->chunk_size if that's smaller (the latter can happen if the user set space_info->chunk_size to less than 1 GiB via sysfs, or it's a 32 MiB system chunk with a hardcoded chunk_size and stripe_len). While researching the background of the earlier commits, I found that an identical fix was already proposed at: https://lore.kernel.org/linux-btrfs/de83ac46-a4a3-88d3-85ce-255b7abc5249@gmx.com/ The previous review missed one detail: ctl->max_stripe_len is used before decide_stripe_size_regular() is called, when it is too late for the changes in that function to have any effect. ctl->max_stripe_len is not used directly by decide_stripe_size_regular(), but the parameter does heavily influence the per-device free space data presented to the function. Fixes: f6fca3917b4d ("btrfs: store chunk size in space-info struct") CC: stable@vger.kernel.org # 6.1+ Link: https://lore.kernel.org/linux-btrfs/20231007051421.19657-1-ce3g8jdj@umail.furryterror.org/ Reviewed-by: Qu Wenruo Signed-off-by: Zygo Blaxell Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e3a3769fd92e..43fee429834a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5111,7 +5111,7 @@ static void init_alloc_chunk_ctl_policy_regular( ASSERT(space_info); ctl->max_chunk_size = READ_ONCE(space_info->chunk_size); - ctl->max_stripe_size = ctl->max_chunk_size; + ctl->max_stripe_size = min_t(u64, ctl->max_chunk_size, SZ_1G); if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM) ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK); -- cgit From 5720c43d5216b5dbd9ab25595f7c61e55d36d4fc Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 27 Sep 2023 13:52:46 +0800 Subject: virtio_net: fix the missing of the dma cpu sync Commit 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") unmaps the buffer with DMA_ATTR_SKIP_CPU_SYNC when the dma->ref is zero. We do that with DMA_ATTR_SKIP_CPU_SYNC, because we do not want to do the sync for the entire page_frag. But that misses the sync for the current area. This patch does cpu sync regardless of whether the ref is zero or not. Fixes: 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") Reported-by: Michael Roth Closes: http://lore.kernel.org/all/20230926130451.axgodaa6tvwqs3ut@amd.com Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Linus Torvalds --- drivers/net/virtio_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe7f314d65c9..d67f742fbd4c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -607,16 +607,16 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) --dma->ref; - if (dma->ref) { - if (dma->need_sync && len) { - offset = buf - (head + sizeof(*dma)); + if (dma->need_sync && len) { + offset = buf - (head + sizeof(*dma)); - virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, - len, DMA_FROM_DEVICE); - } + virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, + offset, len, + DMA_FROM_DEVICE); + } + if (dma->ref) return; - } virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); -- cgit From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Oct 2023 10:01:14 -0400 Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation Syzbot reported two new paths to hit an internal WARNING using the new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 skb len=64521 gso_size=344 and RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 Older virtio types have historically had loose restrictions, leading to many entirely impractical fuzzer generated packets causing problems deep in the kernel stack. Ideally, we would have had strict validation for all types from the start. New virtio types can have tighter validation. Limit UDP GSO packets inserted via virtio to the same limits imposed by the UDP_SEGMENT socket interface: 1. must use checksum offload 2. checksum offload matches UDP header 3. no more segments than UDP_MAX_SEGMENTS 4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..27cc1d464321 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include #include -#include #include static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) -- cgit From fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Oct 2023 12:02:02 -0700 Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible" This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the two subsequent fixes to it: 3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs") b1472a60a584 ("x86/smp: Don't send INIT to boot CPU") because it seems to result in hung machines at shutdown. Particularly some Dell machines, but Thomas says "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs - at least that's what I could figure out from the various bug reports. I don't know which CPUs the DELL machines have, so I can't say it's a pattern. I agree with the revert for now" Ashok Raj chimes in: "There was a report (probably this same one), and it turns out it was a bug in the BIOS SMI handler. The client BIOS's were waiting for the lowest APICID to be the SMI rendevous master. If this is MeteorLake, the BSP wasn't the one with the lowest APIC and it triped here. The BIOS change is also being pushed to others for assimilation :) Server BIOS's had this correctly for a while now" and it does look likely to be some bad interaction between SMI and the non-BSP cores having put into INIT (and thus unresponsive until reset). Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429 Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/ Link: https://forum.artixlinux.org/index.php/topic,5997.0.html Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279 Acked-by: Thomas Gleixner Cc: Ashok Raj Signed-off-by: Linus Torvalds --- arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/smp.c | 39 +++++++-------------------------------- arch/x86/kernel/smpboot.c | 27 --------------------------- 3 files changed, 7 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ad98dd1d9cfb..c31c633419fe 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -bool smp_park_other_cpus_in_init(void); void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 6eb06d001bcc..96a771f9f930 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * Disable virtualization, APIC etc. and park the CPU in a HLT loop + * this function calls the 'stop' function on all other CPUs in the system. */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) * 2) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * 3) If the system uses INIT/STARTUP for CPU bringup, then - * send all present CPUs an INIT vector, which brings them - * completely out of the way. + * 3) If #2 timed out send an NMI to the CPUs which did not + * yet report * - * 4) If #3 is not possible and #2 timed out send an NMI to the - * CPUs which did not yet report - * - * 5) Wait for all other CPUs to report that they reached the + * 4) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * #4 can obviously race against a CPU reaching the HLT loop late. + * #3 can obviously race against a CPU reaching the HLT loop late. * That CPU will have reported already and the "have all CPUs * reached HLT" condition will be true despite the fact that the * other CPU is still handling the NMI. Again, there is no @@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI/INIT shutdown in case that not all + * prevent an NMI shutdown attempt in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; @@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) udelay(1); } - /* - * Park all other CPUs in INIT including "offline" CPUs, if - * possible. That's a safe place where they can't resume execution - * of HLT and then execute the HLT loop from overwritten text or - * page tables. - * - * The only downside is a broadcast MCE, but up to the point where - * the kexec() kernel brought all APs online again an MCE will just - * make HLT resume and handle the MCE. The machine crashes and burns - * due to overwritten text, page tables and data. So there is a - * choice between fire and frying pan. The result is pretty much - * the same. Chose frying pan until x86 provides a sane mechanism - * to park a CPU. - */ - if (smp_park_other_cpus_in_init()) - goto done; - - /* - * If park with INIT was not possible and the REBOOT_VECTOR didn't - * take all secondary CPUs offline, try with the NMI. - */ + /* if the REBOOT_VECTOR didn't work, try with the NMI */ if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler @@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) udelay(1); } -done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 48e040618731..2a187c0cbd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } -bool smp_park_other_cpus_in_init(void) -{ - unsigned int cpu, this_cpu = smp_processor_id(); - unsigned int apicid; - - if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) - return false; - - /* - * If this is a crash stop which does not execute on the boot CPU, - * then this cannot use the INIT mechanism because INIT to the boot - * CPU will reset the machine. - */ - if (this_cpu) - return false; - - for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { - if (cpu == this_cpu) - continue; - apicid = apic->cpu_present_to_apicid(cpu); - if (apicid == BAD_APICID) - continue; - send_init_sequence(apicid); - } - return true; -} - /* * Early setup to make printk work. */ -- cgit From 92e37f20f20a23fec4626ae72eda50f127acb130 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:36 -0400 Subject: selftests: openvswitch: Add version check for pyroute2 Paolo Abeni reports that on some systems the pyroute2 version isn't new enough to run the test suite. Ensure that we support a minimum version of 0.6 for all cases (which does include the existing ones). The 0.6.1 version was released in May of 2021, so should be propagated to most installations at this point. The alternative that Paolo proposed was to only skip when the add-flow is being run. This would be okay for most cases, except if a future test case is added that needs to do flow dump without an associated add (just guessing). In that case, it could also be broken and we would need additional skip logic anyway. Just draw a line in the sand now. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Reported-by: Paolo Abeni Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/ Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 9c2012d70b08..220c3356901e 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -525,7 +525,7 @@ run_test() { fi if python3 ovs-dpctl.py -h 2>&1 | \ - grep "Need to install the python" >/dev/null 2>&1; then + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 912dc8c49085..e4c24d5edf20 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -28,8 +28,10 @@ try: from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + except ModuleNotFoundError: - print("Need to install the python pyroute2 package.") + print("Need to install the python pyroute2 package >= 0.6.") sys.exit(0) @@ -1998,6 +2000,12 @@ def main(argv): nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + parser = argparse.ArgumentParser() parser.add_argument( "-v", -- cgit From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:37 -0400 Subject: selftests: openvswitch: Catch cases where the tests are killed In case of fatal signal, or early abort at least cleanup the current test case. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 220c3356901e..2a0112be7ead 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -3,6 +3,8 @@ # # OVS kernel module self tests +trap ovs_exit_sig EXIT TERM INT ERR + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -- cgit From 76035fd12cb9046be00ffb9d4262b5b3277d5068 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:38 -0400 Subject: selftests: openvswitch: Skip drop testing on older kernels Kernels that don't have support for openvswitch drop reasons also won't have the drop counter reasons, so we should skip the test completely. It previously wasn't possible to build a test case for this without polluting the datapath, so we introduce a mechanism to clear all the flows from a datapath allowing us to test for explicit drop actions, and then clear the flows to build the original test case. Fixes: 4242029164d6 ("selftests: openvswitch: add explicit drop testcase") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- .../selftests/net/openvswitch/openvswitch.sh | 17 +++++++++++ .../testing/selftests/net/openvswitch/ovs-dpctl.py | 34 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 2a0112be7ead..f8499d4c87f3 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -144,6 +144,12 @@ ovs_add_flow () { return 0 } +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + ovs_drop_record_and_run () { local sbx=$1 shift @@ -200,6 +206,17 @@ test_drop_reason() { ip netns exec server ip addr add 172.31.110.20/24 dev s1 ip netns exec server ip link set s1 up + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + # Allow ARP ovs_add_flow "test_drop_reason" dropreason \ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index e4c24d5edf20..10b8f31548f8 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1906,6 +1906,32 @@ class OvsFlow(GenericNetlinkSocket): raise ne return reply + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -2068,6 +2094,9 @@ def main(argv): addflcmd.add_argument("flow", help="Flow specification") addflcmd.add_argument("acts", help="Flow actions") + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + args = parser.parse_args() if args.verbose > 0: @@ -2151,6 +2180,11 @@ def main(argv): flow = OvsFlow.ovs_flow_msg() flow.parse(args.flow, args.acts, rep["dpifindex"]) ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) return 0 -- cgit From 8eff0e062201e26739c74ac2355b7362622b7190 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:39 -0400 Subject: selftests: openvswitch: Fix the ct_tuple for v4 The ct_tuple v4 data structure decode / encode routines were using the v6 IP address decode and relying on default encode. This could cause exceptions during encode / decode depending on how a ct4 tuple would appear in a netlink message. Caught during code review. Fixes: e52b07aa1a54 ("selftests: openvswitch: add flow dump support") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 10b8f31548f8..b97e621face9 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1119,12 +1119,14 @@ class ovskey(nla): "src", lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ( "dst", "dst", - lambda x: str(ipaddress.IPv6Address(x)), + lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), -- cgit From 58720809f52779dc0f08e53e54b014209d13eebb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Oct 2023 13:34:39 -0700 Subject: Linux 6.6-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 88ebf6547964..a3e52e10840e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 52480e1f1a259c93d749ba3961af0bffedfe7a7a Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Mon, 16 Oct 2023 15:36:16 +0800 Subject: USB: serial: option: add Fibocom to DELL custom modem FM101R-GL Update the USB serial option driver support for the Fibocom FM101R-GL LTE modules as there are actually several different variants. - VID:PID 413C:8213, FM101R-GL are laptop M.2 cards (with MBIM interfaces for Linux) - VID:PID 413C:8215, FM101R-GL ESIM are laptop M.2 cards (with MBIM interface for Linux) 0x8213: mbim, tty 0x8215: mbim, tty T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8213 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8215 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0908e6d4921..45dcfaadaf98 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,6 +203,9 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 +#define DELL_PRODUCT_FM101R 0x8213 +#define DELL_PRODUCT_FM101R_ESIM 0x8215 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1108,6 +1111,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R_ESIM, 0xff) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, -- cgit From f6ca3fb6978f94d95ee79f95085fc22e71ca17cc Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Fri, 22 Sep 2023 16:17:16 +0200 Subject: mtd: rawnand: Ensure the nand chip supports cached reads Both the JEDEC and ONFI specification say that read cache sequential support is an optional command. This means that we not only need to check whether the individual controller supports the command, we also need to check the parameter pages for both ONFI and JEDEC NAND flashes before enabling sequential cache reads. This fixes support for NAND flashes which don't support enabling cache reads, i.e. Samsung K9F4G08U0F or Toshiba TC58NVG0S3HTA00. Sequential cache reads are now only available for ONFI and JEDEC devices, if individual vendors implement this, it needs to be enabled per vendor. Tested on i.MX6Q with a Samsung NAND flash chip that doesn't support sequential reads. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230922141717.35977-1-r.czerwinski@pengutronix.de --- drivers/mtd/nand/raw/nand_base.c | 3 +++ drivers/mtd/nand/raw/nand_jedec.c | 3 +++ drivers/mtd/nand/raw/nand_onfi.c | 3 +++ include/linux/mtd/jedec.h | 3 +++ include/linux/mtd/onfi.h | 1 + include/linux/mtd/rawnand.h | 2 ++ 6 files changed, 15 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d4b55155aeae..1fcac403cee6 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5110,6 +5110,9 @@ static void rawnand_check_cont_read_support(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (!chip->parameters.supports_read_cache) + return; + if (chip->read_retries) return; diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index 836757717660..b3cc8f360529 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -94,6 +94,9 @@ int nand_jedec_detect(struct nand_chip *chip) goto free_jedec_param_page; } + if (p->opt_cmd[0] & JEDEC_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + memorg->pagesize = le32_to_cpu(p->byte_per_page); mtd->writesize = memorg->pagesize; diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index f15ef90aec8c..861975e44b55 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -303,6 +303,9 @@ int nand_onfi_detect(struct nand_chip *chip) ONFI_FEATURE_ADDR_TIMING_MODE, 1); } + if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + onfi = kzalloc(sizeof(*onfi), GFP_KERNEL); if (!onfi) { ret = -ENOMEM; diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 90a141ba2a5a..c29ace15a053 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); -- cgit From 63e8b94ad1840f02462633abdb363397f56bc642 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 21 Sep 2023 15:14:12 +0800 Subject: s390/cio: fix a memleak in css_alloc_subchannel When dma_set_coherent_mask() fails, sch->lock has not been freed, which is allocated in css_sch_create_locks(), leading to a memleak. Fixes: 4520a91a976e ("s390/cio: use dma helpers for setting masks") Signed-off-by: Dinghao Liu Message-Id: <20230921071412.13806-1-dinghao.liu@zju.edu.cn> Link: https://lore.kernel.org/linux-s390/bd38baa8-7b9d-4d89-9422-7e943d626d6e@linux.ibm.com/ Reviewed-by: Halil Pasic Reviewed-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- drivers/s390/cio/css.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 3ef636935a54..3ff46fc694f8 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -233,17 +233,19 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, */ ret = dma_set_coherent_mask(&sch->dev, DMA_BIT_MASK(31)); if (ret) - goto err; + goto err_lock; /* * But we don't have such restrictions imposed on the stuff that * is handled by the streaming API. */ ret = dma_set_mask(&sch->dev, DMA_BIT_MASK(64)); if (ret) - goto err; + goto err_lock; return sch; +err_lock: + kfree(sch->lock); err: kfree(sch); return ERR_PTR(ret); -- cgit From 327899674eef18f96644be87aa5510b7523fe4f6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 12 Oct 2023 11:06:21 +0200 Subject: s390/kasan: handle DCSS mapping in memory holes When physical memory is defined under z/VM using DEF STOR CONFIG, there may be memory holes that are not hotpluggable memory. In such cases, DCSS mapping could be placed in one of these memory holes. Subsequently, attempting memory access to such DCSS mapping would result in a kasan failure because there is no shadow memory mapping for it. To maintain consistency with cases where DCSS mapping is positioned after the kernel identity mapping, which is then covered by kasan zero shadow mapping, handle the scenario above by populating zero shadow mapping for memory holes where DCSS mapping could potentially be placed. Reviewed-by: Heiko Carstens Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 01257ce3b89c..442a74f113cb 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -57,6 +57,7 @@ static void kasan_populate_shadow(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; unsigned long untracked_end; unsigned long start, end; int i; @@ -101,8 +102,12 @@ static void kasan_populate_shadow(void) * +- shadow end ----+---------+- shadow end ---+ */ - for_each_physmem_usable_range(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) { kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) + kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + memgap_start = end; + } if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ -- cgit From 3b401e30c249849d803de6c332dad2a595a58658 Mon Sep 17 00:00:00 2001 From: Karolina Stolarek Date: Mon, 16 Oct 2023 14:15:25 +0200 Subject: drm/ttm: Reorder sys manager cleanup step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current cleanup flow, we could trigger a NULL pointer dereference if there is a delayed destruction of a BO with a system resource that gets executed on drain_workqueue() call, as we attempt to free a resource using an already released resource manager. Remove the device from the device list and drain its workqueue before releasing the system domain manager in ttm_device_fini(). Signed-off-by: Karolina Stolarek Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20231016121525.2237838-1-karolina.stolarek@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 7726a72befc5..d48b39132b32 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -232,10 +232,6 @@ void ttm_device_fini(struct ttm_device *bdev) struct ttm_resource_manager *man; unsigned i; - man = ttm_manager_type(bdev, TTM_PL_SYSTEM); - ttm_resource_manager_set_used(man, false); - ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); - mutex_lock(&ttm_global_mutex); list_del(&bdev->device_list); mutex_unlock(&ttm_global_mutex); @@ -243,6 +239,10 @@ void ttm_device_fini(struct ttm_device *bdev) drain_workqueue(bdev->wq); destroy_workqueue(bdev->wq); + man = ttm_manager_type(bdev, TTM_PL_SYSTEM); + ttm_resource_manager_set_used(man, false); + ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) -- cgit From c8befdc411e5fd1bf95a13e8744c8ca79b412bee Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 13 Oct 2023 16:57:05 +0200 Subject: pinctrl: qcom: lpass-lpi: fix concurrent register updates The Qualcomm LPASS LPI pin controller driver uses one lock for guarding Read-Modify-Write code for slew rate registers. However the pin configuration and muxing registers have exactly the same RMW code but are not protected. Pin controller framework does not provide locking here, thus it is possible to trigger simultaneous change of pin configuration registers resulting in non-atomic changes. Protect from concurrent access by re-using the same lock used to cover the slew rate register. Using the same lock instead of adding second one will make more sense, once we add support for newer Qualcomm SoC, where slew rate is configured in the same register as pin configuration/muxing. Fixes: 6e261d1090d6 ("pinctrl: qcom: Add sm8250 lpass lpi pinctrl driver") Cc: stable@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231013145705.219954-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-lpass-lpi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c index e5a418026ba3..0b2839d27fd6 100644 --- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c @@ -32,7 +32,8 @@ struct lpi_pinctrl { char __iomem *tlmm_base; char __iomem *slew_base; struct clk_bulk_data clks[MAX_LPI_NUM_CLKS]; - struct mutex slew_access_lock; + /* Protects from concurrent register updates */ + struct mutex lock; DECLARE_BITMAP(ever_gpio, MAX_NR_GPIO); const struct lpi_pinctrl_variant_data *data; }; @@ -103,6 +104,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, if (WARN_ON(i == g->nfuncs)) return -EINVAL; + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, pin, LPI_GPIO_CFG_REG); /* @@ -128,6 +130,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, u32p_replace_bits(&val, i, LPI_GPIO_FUNCTION_MASK); lpi_gpio_write(pctrl, pin, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -233,14 +236,14 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, if (slew_offset == LPI_NO_SLEW) break; - mutex_lock(&pctrl->slew_access_lock); + mutex_lock(&pctrl->lock); sval = ioread32(pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); sval &= ~(LPI_SLEW_RATE_MASK << slew_offset); sval |= arg << slew_offset; iowrite32(sval, pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); - mutex_unlock(&pctrl->slew_access_lock); + mutex_unlock(&pctrl->lock); break; default: return -EINVAL; @@ -256,6 +259,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val); } + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG); u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK); @@ -264,6 +268,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, u32p_replace_bits(&val, output_enabled, LPI_GPIO_OE_MASK); lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -461,7 +466,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) pctrl->chip.label = dev_name(dev); pctrl->chip.can_sleep = false; - mutex_init(&pctrl->slew_access_lock); + mutex_init(&pctrl->lock); pctrl->ctrl = devm_pinctrl_register(dev, &pctrl->desc, pctrl); if (IS_ERR(pctrl->ctrl)) { @@ -483,7 +488,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) return 0; err_pinctrl: - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); return ret; @@ -495,7 +500,7 @@ int lpi_pinctrl_remove(struct platform_device *pdev) struct lpi_pinctrl *pctrl = platform_get_drvdata(pdev); int i; - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); for (i = 0; i < pctrl->data->npins; i++) -- cgit From 88630e91f12677848c0c4a5790ec0d691f8859fa Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 12 Oct 2023 14:49:27 -0400 Subject: drm/edid: add 8 bpc quirk to the BenQ GW2765 The BenQ GW2765 reports that it supports higher (> 8) bpc modes, but when trying to set them we end up with a black screen. So, limit it to 8 bpc modes. Cc: stable@vger.kernel.org # 6.5+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2610 Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20231012184927.133137-1-hamza.mahfooz@amd.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 340da8257b51..4b71040ae5be 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -123,6 +123,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('A', 'E', 'O', 0, EDID_QUIRK_FORCE_6BPC), + /* BenQ GW2765 */ + EDID_QUIRK('B', 'N', 'Q', 0x78d6, EDID_QUIRK_FORCE_8BPC), + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('B', 'O', 'E', 0x78b, EDID_QUIRK_FORCE_6BPC), -- cgit From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 4 Oct 2023 16:32:24 +0800 Subject: drm/mediatek: Correctly free sg_table in gem prime vmap The MediaTek DRM driver implements GEM PRIME vmap by fetching the sg_table for the object, iterating through the pages, and then vmapping them. In essence, unlike the GEM DMA helpers which vmap when the object is first created or imported, the MediaTek version does it on request. Unfortunately, the code never correctly frees the sg_table contents. This results in a kernel memory leak. On a Hayato device with a text console on the internal display, this results in the system running out of memory in a few days from all the console screen cursor updates. Add sg_free_table() to correctly free the contents of the sg_table. This was missing despite explicitly required by mtk_gem_prime_get_sg_table(). Also move the "out" shortcut label to after the kfree() call for the sg_table. Having sg_free_table() together with kfree() makes more sense. The shortcut is only used when the object already has a kernel address, in which case the pointer is NULL and kfree() does nothing. Hence this change causes no functional change. Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function") Cc: Signed-off-by: Chen-Yu Tsai Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055-1-wenst@chromium.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 9f364df52478..0e0a41b2f57f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) npages = obj->size >> PAGE_SHIFT; mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL); if (!mtk_gem->pages) { + sg_free_table(sgt); kfree(sgt); return -ENOMEM; } @@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); if (!mtk_gem->kvaddr) { + sg_free_table(sgt); kfree(sgt); kfree(mtk_gem->pages); return -ENOMEM; } -out: + sg_free_table(sgt); kfree(sgt); + +out: iosys_map_set_vaddr(map, mtk_gem->kvaddr); return 0; -- cgit From dc608db793731426938baa2f0e75a4a3cce5f5cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Oct 2023 14:19:52 +0300 Subject: fbdev: omapfb: fix some error codes Return negative -ENXIO instead of positive ENXIO. Signed-off-by: Dan Carpenter Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index f28cb90947a3..42c96f1cfc93 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1645,13 +1645,13 @@ static int omapfb_do_probe(struct platform_device *pdev, } fbdev->int_irq = platform_get_irq(pdev, 0); if (fbdev->int_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); if (fbdev->ext_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } -- cgit From e638d3710f0e2483ce01fbc113da83ba3639489c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:04:40 +0200 Subject: fbdev: sa1100fb: mark sa1100fb_init() static This is a global function that is only referenced as an initcall. This causes a warning: drivers/video/fbdev/sa1100fb.c:1218:12: error: no previous prototype for 'sa1100fb_init' [-Werror=missing-prototypes] Make it static instead. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/sa1100fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index 3d76ce111488..cf0f706762b4 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1214,7 +1214,7 @@ static struct platform_driver sa1100fb_driver = { }, }; -int __init sa1100fb_init(void) +static int __init sa1100fb_init(void) { if (fb_get_options("sa1100fb", NULL)) return -ENODEV; -- cgit From 0c37bffaaebe1733433b480d612282169632a31a Mon Sep 17 00:00:00 2001 From: Jorge Maidana Date: Fri, 6 Oct 2023 17:43:46 -0300 Subject: fbdev: uvesafb: Remove uvesafb_exec() prototype from include/video/uvesafb.h uvesafb_exec() is a static function defined and called only in drivers/video/fbdev/uvesafb.c, remove the prototype from include/video/uvesafb.h. Fixes the warning: ./include/video/uvesafb.h:112:12: warning: 'uvesafb_exec' declared 'static' but never defined [-Wunused-function] when including '