From 9c1b2429c18424759818e16e0767361a535529a8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 22 Sep 2023 15:22:06 +0200 Subject: accel/ivpu: Add Arrow Lake pci id Enable VPU on Arrow Lake CPUs. Reviewed-by: Krystian Pradzynski Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230922132206.812817-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 1 + drivers/accel/ivpu/ivpu_drv.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ba79f397c9e8..aa7314fdbc0f 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -634,6 +634,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, { } }; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9e8c075fe9ef..03b3d6532fb6 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -23,6 +23,7 @@ #define DRIVER_DATE "20230117" #define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d #define PCI_DEVICE_ID_LNL 0x643e #define IVPU_HW_37XX 37 @@ -165,6 +166,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev) { switch (ivpu_device_id(vdev)) { case PCI_DEVICE_ID_MTL: + case PCI_DEVICE_ID_ARL: return IVPU_HW_37XX; case PCI_DEVICE_ID_LNL: return IVPU_HW_40XX; -- cgit From 863a8eb3f27098b42772f668e3977ff4cae10b04 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 19 Sep 2023 20:48:55 +0100 Subject: i915: Limit the length of an sg list to the requested length The folio conversion changed the behaviour of shmem_sg_alloc_table() to put the entire length of the last folio into the sg list, even if the sg list should have been shorter. gen8_ggtt_insert_entries() relied on the list being the right length and would overrun the end of the page tables. Other functions may also have been affected. Clamp the length of the last entry in the sg list to be the expected length. Signed-off-by: Matthew Wilcox (Oracle) Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch") Cc: stable@vger.kernel.org # 6.5.x Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9256 Link: https://lore.kernel.org/lkml/6287208.lOV4Wx5bFT@natalenko.name/ Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Reviewed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230919194855.347582-1-willy@infradead.org (cherry picked from commit 26a8e32e6d77900819c0c730fbfb393692dbbeea) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 8f1633c3fb93..73a4a4eb29e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents = 0; for (i = 0; i < page_count; i++) { struct folio *folio; + unsigned long nr_pages; const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 0, @@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); + nr_pages = min_t(unsigned long, + folio_nr_pages(folio), page_count - i); if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, sg = sg_next(sg); st->nents++; - sg_set_folio(sg, folio, folio_size(folio), 0); + sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { /* XXX: could overflow? */ - sg->length += folio_size(folio); + sg->length += nr_pages * PAGE_SIZE; } - next_pfn = folio_pfn(folio) + folio_nr_pages(folio); - i += folio_nr_pages(folio) - 1; + next_pfn = folio_pfn(folio) + nr_pages; + i += nr_pages - 1; /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL); -- cgit From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001 From: Javier Pello Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello Reviewed-by: Daniele Ceraolo Spurio Cc: Fernando Pacheco Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697dbcf62d6d8@otheo.eu (cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n"); -- cgit From 907ef0398c938be8232b77c61cfcf50fbfd95554 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Sep 2023 12:21:17 -0700 Subject: i915/guc: Get runtime pm in busyness worker only if already active Ideally the busyness worker should take a gt pm wakeref because the worker only needs to be active while gt is awake. However, the gt_park path cancels the worker synchronously and this complicates the flow if the worker is also running at the same time. The cancel waits for the worker and when the worker releases the wakeref, that would call gt_park and would lead to a deadlock. The resolution is to take the global pm wakeref if runtime pm is already active. If not, we don't need to update the busyness stats as the stats would already be updated when the gt was parked. Note: - We do not requeue the worker if we cannot take a reference to runtime pm since intel_guc_busyness_unpark would requeue the worker in the resume path. - If the gt was parked longer than time taken for GT timestamp to roll over, we ignore those rollovers since we don't care about tracking the exact GT time. We only care about roll overs when the gt is active and running workloads. - There is a window of time between gt_park and runtime suspend, where the worker may run. This is acceptable since the worker will not find any new data to update busyness. v2: (Daniele) - Edit commit message and code comment - Use runtime pm in the worker - Put runtime pm after enabling the worker - Use Link tag and add Fixes tag v3: (Daniele) - Reword commit and comments and add details Link: https://gitlab.freedesktop.org/drm/intel/-/issues/7077 Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230925192117.2497058-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit e2f99b79d4c594cdf7ab449e338d4947f5ea8903) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 38 +++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b5b7f2fe8c78..dc7b40e06e38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1432,6 +1432,36 @@ static void guc_timestamp_ping(struct work_struct *wrk) unsigned long index; int srcu, ret; + /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting @@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); /* adjust context stats for overflow */ xa_for_each(&guc->context_lookup, index, ce) @@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) -- cgit From 69390f3528f5a06e2ba291c2e6c20e62dc49c3e4 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 26 Sep 2023 03:49:03 +0200 Subject: MAINTAINERS: update nouveau maintainers Since I will continue to work on Nouveau consistently, also beyond my former and still ongoing VM_BIND/EXEC work, add myself to the list of Nouveau maintainers. Signed-off-by: Danilo Krummrich Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230926014913.7721-1-dakr@redhat.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b19995690904..67ce91c8778a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6647,6 +6647,7 @@ F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS M: Karol Herbst M: Lyude Paul +M: Danilo Krummrich L: dri-devel@lists.freedesktop.org L: nouveau@lists.freedesktop.org S: Supported -- cgit From b0873eead1d1eadf13b5c80ad5d8f88b91e4910a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 25 Sep 2023 14:11:32 +0200 Subject: accel/ivpu: Do not use wait event interruptible If we receive signal when waiting for IPC message response in ivpu_ipc_receive() we return error and continue to operate. Then the driver can send another IPC messages and re-use occupied slot of the message still processed by the firmware. This can result in corrupting firmware memory and following FW crash with messages: [ 3698.569719] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_send_receive_internal(): IPC receive failed: type 0x1103, ret -512 [ 3698.569747] intel_vpu 0000:00:0b.0: [drm] ivpu_jsm_unregister_db(): Failed to unregister doorbell 3: -512 [ 3698.569756] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_tx_prepare(): IPC message vpu:0x88980000 not released by firmware [ 3698.569763] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_tx_prepare(): JSM message vpu:0x88980040 not released by firmware [ 3698.570234] intel_vpu 0000:00:0b.0: [drm] ivpu_ipc_send_receive_internal(): IPC receive failed: type 0x110e, ret -512 [ 3698.570318] intel_vpu 0000:00:0b.0: [drm] *ERROR* ivpu_mmu_dump_event(): MMU EVTQ: 0x10 (Translation fault) SSID: 0 SID: 3, e[2] 00000000, e[3] 00000208, in addr: 0x88988000, fetch addr: 0x0 To fix the issue don't use interruptible variant of wait event to allow firmware to finish IPC processing. Fixes: 5d7422cfb498 ("accel/ivpu: Add IPC driver and JSM messages") Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_ipc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index fa0af59e39ab..295c0d7b5039 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -209,10 +209,10 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_rx_msg *rx_msg; int wait_ret, ret = 0; - wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq, - (IS_KTHREAD() && kthread_should_stop()) || - !list_empty(&cons->rx_msg_list), - msecs_to_jiffies(timeout_ms)); + wait_ret = wait_event_timeout(cons->rx_msg_wq, + (IS_KTHREAD() && kthread_should_stop()) || + !list_empty(&cons->rx_msg_list), + msecs_to_jiffies(timeout_ms)); if (IS_KTHREAD() && kthread_should_stop()) return -EINTR; @@ -220,9 +220,6 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, if (wait_ret == 0) return -ETIMEDOUT; - if (wait_ret < 0) - return -ERESTARTSYS; - spin_lock_irq(&cons->rx_msg_lock); rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); if (!rx_msg) { -- cgit From 002652555022728c42b5517c6c11265b8c3ab827 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 25 Sep 2023 14:11:33 +0200 Subject: accel/ivpu: Don't flood dmesg with VPU ready message Use ivpu_dbg() to print the VPU ready message so it doesn't pollute the dmesg. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-3-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index aa7314fdbc0f..467a60235370 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -327,7 +327,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) } if (!ret) - ivpu_info(vdev, "VPU ready message received successfully\n"); + ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); else ivpu_hw_diagnose_failure(vdev); -- cgit From 6c3f2f90ccad024806f72c49740742df4ded3727 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:34 +0200 Subject: accel/ivpu/40xx: Ensure clock resource ownership Ack before Power-Up We need to wait for the CLOCK_RESOURCE_OWN_ACK bit to be set after configuring the workpoint. This step ensures that the VPU microcontroller clock is actively toggling and ready for operation. Previously, we relied solely on the READY bit in the VPU_STATUS register, which indicated the completion of the workpoint download. However, this approach was insufficient, as the READY bit could be set while the device was still running on a sideband clock until the PLL locked. To guarantee that the PLL is locked and the device is running on the main clock source, we now wait for the CLOCK_RESOURCE_OWN_ACK before proceeding with the remainder of the power-up sequence. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-4-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 14 ++++++++++++++ drivers/accel/ivpu/ivpu_hw_40xx_reg.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 00c5dbbe6847..f4a251a58ca4 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -196,6 +196,14 @@ static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); } +static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev) +{ + if (ivpu_is_simics(vdev)) + return 0; + + return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); +} + static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; @@ -556,6 +564,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) { int ret; + ret = ivpu_wait_for_clock_own_resource_ack(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n"); + return ret; + } + ivpu_boot_pwr_island_trickle_drive(vdev, true); ivpu_boot_pwr_island_drive(vdev, true); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index 5139cfe88532..ff4a5d4f5821 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -70,6 +70,8 @@ #define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) #define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) #define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2) +#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6) +#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7) #define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11) #define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12) -- cgit From ec3e3adc6d53b0f4a9afc8f903fbf851341e0193 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:35 +0200 Subject: accel/ivpu/40xx: Disable frequency change interrupt Do not enable frequency change interrupt on 40xx as it might lead to an interrupt storm in current design. FREQ_CHANGE interrupt is triggered on D0I2 entry which will cause KMD to check VPU interrupt sources by reading VPUIP registers. Access to those registers will toggle necessary clocks and trigger another FREQ_CHANGE interrupt possibly ending in an infinite loop. FREQ_CHANGE interrupt has only debug purposes and can be permanently disabled. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-5-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index f4a251a58ca4..87b1085d44cf 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -57,8 +57,7 @@ #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ +#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \ (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \ -- cgit From 09bb81cf243d151dd1c02fcd727a4604829d9927 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 25 Sep 2023 14:11:36 +0200 Subject: accel/ivpu/40xx: Fix missing VPUIP interrupts Move sequence of masking and unmasking global interrupts from buttress interrupt handler to generic one that handles both VPUIP and BTRS interrupts. Unmasking global interrupts will re-trigger MSI for any pending interrupts. Lack of this sequence can randomly cause to miss any VPUIP interrupt that comes after reading VPU_40XX_HOST_SS_ICB_STATUS_0 and before clearing all active interrupt sources. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230925121137.872158-6-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 87b1085d44cf..8bdb59a45da6 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1059,9 +1059,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) if (status == 0) return IRQ_NONE; - /* Disable global interrupt before handling local buttress interrupts */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); @@ -1109,9 +1106,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) /* This must be done after interrupts are cleared at the source. */ REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); - /* Re-enable global interrupt */ - REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - if (schedule_recovery) ivpu_pm_schedule_recovery(vdev); @@ -1123,9 +1117,14 @@ static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr) struct ivpu_device *vdev = ptr; irqreturn_t ret = IRQ_NONE; + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); + ret |= ivpu_hw_40xx_irqv_handler(vdev, irq); ret |= ivpu_hw_40xx_irqb_handler(vdev, irq); + /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ + REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); + if (ret & IRQ_WAKE_THREAD) return IRQ_WAKE_THREAD; -- cgit From 645d694559cab36fe6a57c717efcfa27d9321396 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Tue, 26 Sep 2023 14:09:43 +0200 Subject: accel/ivpu: Use cached buffers for FW loading Create buffers with cache coherency on the CPU side (write-back) while disabling snooping on the VPU side. These buffers require an explicit cache flush after each CPU-side modification. Configuring pages as write-combined may introduce significant delays, potentially taking hundreds of milliseconds for 64 MB buffers. Added internal DRM_IVPU_BO_NOSNOOP mask which disables snooping on the VPU side. Allocate FW runtime memory buffer (64 MB) as cached with snooping-disabled. This fixes random long FW loading times and boot params memory corruption on warmboot (due to missed wmb). Fixes: 02d5b0aacd05 ("accel/ivpu: Implement firmware parsing and booting") Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230926120943.GD846747@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 8 +++++--- drivers/accel/ivpu/ivpu_gem.h | 5 +++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 9827ea4d7b83..0191cf8e5964 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -220,7 +220,8 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (ret) return ret; - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_NOSNOOP); if (!fw->mem) { ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); return -ENOMEM; @@ -330,7 +331,7 @@ int ivpu_fw_load(struct ivpu_device *vdev) memset(start, 0, size); } - wmb(); /* Flush WC buffers after writing fw->mem */ + clflush_cache_range(fw->mem->kvaddr, fw->mem->base.size); return 0; } @@ -432,6 +433,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params if (!ivpu_fw_is_cold_boot(vdev)) { boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; + clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); return; } @@ -493,7 +495,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); - wmb(); /* Flush WC buffers after writing bootparams */ + clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); ivpu_fw_boot_params_print(vdev, boot_params); } diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 6b0ceda5f253..f4130586ff1b 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -8,6 +8,8 @@ #include #include +#define DRM_IVPU_BO_NOSNOOP 0x10000000 + struct dma_buf; struct ivpu_bo_ops; struct ivpu_file_priv; @@ -83,6 +85,9 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { + if (bo->flags & DRM_IVPU_BO_NOSNOOP) + return false; + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } -- cgit