summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c7
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c193
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.h3
-rw-r--r--drivers/accel/drm_accel.c16
-rw-r--r--drivers/accel/habanalabs/common/device.c25
-rw-r--r--drivers/accel/habanalabs/common/sysfs.c4
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c1
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h15
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c4
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c1
-rw-r--r--drivers/accel/ivpu/ivpu_job.c81
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c15
-rw-r--r--drivers/accel/qaic/Makefile1
-rw-r--r--drivers/accel/qaic/qaic.h10
-rw-r--r--drivers/accel/qaic/qaic_data.c1
-rw-r--r--drivers/accel/qaic/qaic_drv.c6
-rw-r--r--drivers/accel/qaic/qaic_ras.c642
-rw-r--r--drivers/accel/qaic/qaic_ras.h10
18 files changed, 862 insertions, 173 deletions
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index e04549f64d69..2cff5419bd2f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -361,7 +361,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
aie2_hwctx_restart(xdna, hwctx);
mutex_unlock(&xdna->dev_lock);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
static const struct drm_sched_backend_ops sched_ops = {
@@ -566,7 +566,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
.size = MAX_CHAIN_CMDBUF_SIZE,
};
- abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+ abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
if (IS_ERR(abo)) {
ret = PTR_ERR(abo);
goto free_cmd_bufs;
@@ -848,7 +848,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
goto up_sem;
}
- ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+ ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
+ hwctx->client->filp->client_id);
if (ret) {
XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
goto free_chain;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 26831ec69f89..0f85a0105178 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -24,40 +24,79 @@
MODULE_IMPORT_NS("DMA_BUF");
static int
-amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
+amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
{
struct amdxdna_client *client = abo->client;
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_mem *mem = &abo->mem;
+ struct amdxdna_gem_obj *heap;
u64 offset;
u32 align;
int ret;
+ mutex_lock(&client->mm_lock);
+
+ heap = client->dev_heap;
+ if (!heap) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
+ if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
+ XDNA_ERR(xdna, "Invalid dev heap userptr");
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
+ if (mem->size == 0 || mem->size > heap->mem.size) {
+ XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx",
+ mem->size, heap->mem.size);
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+
align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
- ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node,
+ ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node,
mem->size, align,
0, DRM_MM_INSERT_BEST);
if (ret) {
XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
- return ret;
+ goto unlock_out;
}
mem->dev_addr = abo->mm_node.start;
- offset = mem->dev_addr - abo->dev_heap->mem.dev_addr;
- mem->userptr = abo->dev_heap->mem.userptr + offset;
- mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT];
- mem->nr_pages = mem->size >> PAGE_SHIFT;
-
- if (use_vmap) {
- mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL);
- if (!mem->kva) {
- XDNA_ERR(xdna, "Failed to vmap");
- drm_mm_remove_node(&abo->mm_node);
- return -EFAULT;
- }
- }
+ offset = mem->dev_addr - heap->mem.dev_addr;
+ mem->userptr = heap->mem.userptr + offset;
+ mem->kva = heap->mem.kva + offset;
- return 0;
+ drm_gem_object_get(to_gobj(heap));
+
+unlock_out:
+ mutex_unlock(&client->mm_lock);
+
+ return ret;
+}
+
+static void
+amdxdna_gem_destroy_obj(struct amdxdna_gem_obj *abo)
+{
+ mutex_destroy(&abo->lock);
+ kfree(abo);
+}
+
+static void
+amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_gem_obj *heap;
+
+ mutex_lock(&abo->client->mm_lock);
+
+ drm_mm_remove_node(&abo->mm_node);
+
+ heap = abo->client->dev_heap;
+ drm_gem_object_put(to_gobj(heap));
+
+ mutex_unlock(&abo->client->mm_lock);
}
static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
@@ -213,6 +252,20 @@ free_map:
return ret;
}
+static void amdxdna_gem_dev_obj_free(struct drm_gem_object *gobj)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+ XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
+ if (abo->pinned)
+ amdxdna_gem_unpin(abo);
+
+ amdxdna_gem_heap_free(abo);
+ drm_gem_object_release(gobj);
+ amdxdna_gem_destroy_obj(abo);
+}
+
static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo,
struct vm_area_struct *vma)
{
@@ -374,19 +427,6 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
if (abo->pinned)
amdxdna_gem_unpin(abo);
- if (abo->type == AMDXDNA_BO_DEV) {
- mutex_lock(&abo->client->mm_lock);
- drm_mm_remove_node(&abo->mm_node);
- mutex_unlock(&abo->client->mm_lock);
-
- vunmap(abo->mem.kva);
- drm_gem_object_put(to_gobj(abo->dev_heap));
- drm_gem_object_release(gobj);
- mutex_destroy(&abo->lock);
- kfree(abo);
- return;
- }
-
if (abo->type == AMDXDNA_BO_DEV_HEAP)
drm_mm_takedown(&abo->mm);
@@ -402,7 +442,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
}
static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
- .free = amdxdna_gem_obj_free,
+ .free = amdxdna_gem_dev_obj_free,
};
static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
@@ -527,6 +567,7 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct drm_gem_shmem_object *shmem;
struct amdxdna_gem_obj *abo;
@@ -553,18 +594,26 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
shmem->map_wc = false;
abo = to_xdna_obj(&shmem->base);
-
abo->type = AMDXDNA_BO_DEV_HEAP;
abo->client = client;
abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
+ ret = drm_gem_vmap(to_gobj(abo), &map);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap heap bo failed, ret %d", ret);
+ goto release_obj;
+ }
+ abo->mem.kva = map.vaddr;
+
client->dev_heap = abo;
drm_gem_object_get(to_gobj(abo));
mutex_unlock(&client->mm_lock);
return abo;
+release_obj:
+ drm_gem_object_put(to_gobj(abo));
mm_unlock:
mutex_unlock(&client->mm_lock);
return ERR_PTR(ret);
@@ -573,58 +622,32 @@ mm_unlock:
struct amdxdna_gem_obj *
amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
- struct drm_file *filp, bool use_vmap)
+ struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
struct amdxdna_dev *xdna = to_xdna_dev(dev);
size_t aligned_sz = PAGE_ALIGN(args->size);
- struct amdxdna_gem_obj *abo, *heap;
+ struct amdxdna_gem_obj *abo;
int ret;
- mutex_lock(&client->mm_lock);
- heap = client->dev_heap;
- if (!heap) {
- ret = -EINVAL;
- goto mm_unlock;
- }
-
- if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
- XDNA_ERR(xdna, "Invalid dev heap userptr");
- ret = -EINVAL;
- goto mm_unlock;
- }
-
- if (args->size > heap->mem.size) {
- XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx",
- args->size, heap->mem.size);
- ret = -EINVAL;
- goto mm_unlock;
- }
-
abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
- if (IS_ERR(abo)) {
- ret = PTR_ERR(abo);
- goto mm_unlock;
- }
+ if (IS_ERR(abo))
+ return abo;
+
to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
abo->type = AMDXDNA_BO_DEV;
abo->client = client;
- abo->dev_heap = heap;
- ret = amdxdna_gem_insert_node_locked(abo, use_vmap);
+
+ ret = amdxdna_gem_heap_alloc(abo);
if (ret) {
XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
- goto mm_unlock;
+ amdxdna_gem_destroy_obj(abo);
+ return ERR_PTR(ret);
}
- drm_gem_object_get(to_gobj(heap));
drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
- mutex_unlock(&client->mm_lock);
return abo;
-
-mm_unlock:
- mutex_unlock(&client->mm_lock);
- return ERR_PTR(ret);
}
static struct amdxdna_gem_obj *
@@ -632,10 +655,10 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
struct drm_file *filp)
{
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct drm_gem_shmem_object *shmem;
struct amdxdna_gem_obj *abo;
- struct iosys_map map;
int ret;
if (args->size > XDNA_MAX_CMD_BO_SIZE) {
@@ -692,7 +715,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
abo = amdxdna_drm_create_dev_heap(dev, args, filp);
break;
case AMDXDNA_BO_DEV:
- abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false);
+ abo = amdxdna_drm_alloc_dev_bo(dev, args, filp);
break;
case AMDXDNA_BO_CMD:
abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
@@ -724,20 +747,13 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
int ret;
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->client->dev_heap;
+
if (is_import_bo(abo))
return 0;
- switch (abo->type) {
- case AMDXDNA_BO_SHMEM:
- case AMDXDNA_BO_DEV_HEAP:
- ret = drm_gem_shmem_pin(&abo->base);
- break;
- case AMDXDNA_BO_DEV:
- ret = drm_gem_shmem_pin(&abo->dev_heap->base);
- break;
- default:
- ret = -EOPNOTSUPP;
- }
+ ret = drm_gem_shmem_pin(&abo->base);
XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
return ret;
@@ -747,9 +763,6 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
{
int ret;
- if (abo->type == AMDXDNA_BO_DEV)
- abo = abo->dev_heap;
-
mutex_lock(&abo->lock);
ret = amdxdna_gem_pin_nolock(abo);
mutex_unlock(&abo->lock);
@@ -759,12 +772,12 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
{
+ if (abo->type == AMDXDNA_BO_DEV)
+ abo = abo->client->dev_heap;
+
if (is_import_bo(abo))
return;
- if (abo->type == AMDXDNA_BO_DEV)
- abo = abo->dev_heap;
-
mutex_lock(&abo->lock);
drm_gem_shmem_unpin(&abo->base);
mutex_unlock(&abo->lock);
@@ -855,10 +868,12 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
if (is_import_bo(abo))
drm_clflush_sg(abo->base.sgt);
- else if (abo->type == AMDXDNA_BO_DEV)
- drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
- else
+ else if (abo->mem.kva)
+ drm_clflush_virt_range(abo->mem.kva + args->offset, args->size);
+ else if (abo->base.pages)
drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
+ else
+ drm_WARN(&xdna->ddev, 1, "Can not get flush memory");
amdxdna_gem_unpin(abo);
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index aee97e971d6d..ae29db94a9d3 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -41,7 +41,6 @@ struct amdxdna_gem_obj {
/* Below members is uninitialized when needed */
struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
- struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */
struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
u32 assigned_hwctx;
struct dma_buf *dma_buf;
@@ -72,7 +71,7 @@ amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
struct amdxdna_gem_obj *
amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
struct amdxdna_drm_create_bo *args,
- struct drm_file *filp, bool use_vmap);
+ struct drm_file *filp);
int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index aa826033b0ce..ca3357acd127 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -20,8 +20,6 @@
DEFINE_XARRAY_ALLOC(accel_minors_xa);
-static struct dentry *accel_debugfs_root;
-
static const struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
};
@@ -74,17 +72,6 @@ static const struct drm_info_list accel_debugfs_list[] = {
#define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
/**
- * accel_debugfs_init() - Initialize debugfs for device
- * @dev: Pointer to the device instance.
- *
- * This function creates a root directory for the device in debugfs.
- */
-void accel_debugfs_init(struct drm_device *dev)
-{
- drm_debugfs_dev_init(dev, accel_debugfs_root);
-}
-
-/**
* accel_debugfs_register() - Register debugfs for device
* @dev: Pointer to the device instance.
*
@@ -194,7 +181,6 @@ static const struct file_operations accel_stub_fops = {
void accel_core_exit(void)
{
unregister_chrdev(ACCEL_MAJOR, "accel");
- debugfs_remove(accel_debugfs_root);
accel_sysfs_destroy();
WARN_ON(!xa_empty(&accel_minors_xa));
}
@@ -209,8 +195,6 @@ int __init accel_core_init(void)
goto error;
}
- accel_debugfs_root = debugfs_create_dir("accel", NULL);
-
ret = register_chrdev(ACCEL_MAJOR, "accel", &accel_stub_fops);
if (ret < 0)
DRM_ERROR("Cannot register ACCEL major: %d\n", ret);
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 68eebed3b050..80fa08bf57bd 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1066,28 +1066,11 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
return (device_id == hdev->pdev->device);
}
-static void stringify_time_of_last_heartbeat(struct hl_device *hdev, char *time_str, size_t size,
- bool is_pq_hb)
-{
- time64_t seconds = is_pq_hb ? hdev->heartbeat_debug_info.last_pq_heartbeat_ts
- : hdev->heartbeat_debug_info.last_eq_heartbeat_ts;
- struct tm tm;
-
- if (!seconds)
- return;
-
- time64_to_tm(seconds, 0, &tm);
-
- snprintf(time_str, size, "%ld-%02d-%02d %02d:%02d:%02d (UTC)",
- tm.tm_year + 1900, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
-}
-
static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
{
struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info;
u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- char pq_time_str[64] = "N/A", eq_time_str[64] = "N/A";
if (!prop->cpucp_info.eq_health_check_supported)
return true;
@@ -1095,17 +1078,15 @@ static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
if (!hdev->eq_heartbeat_received) {
dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
- stringify_time_of_last_heartbeat(hdev, pq_time_str, sizeof(pq_time_str), true);
- stringify_time_of_last_heartbeat(hdev, eq_time_str, sizeof(eq_time_str), false);
dev_err(hdev->dev,
- "EQ: {CI %u, HB counter %u, last HB time: %s}, PQ: {PI: %u, CI: %u (%u), last HB time: %s}\n",
+ "EQ: {CI %u, HB counter %u, last HB time: %ptTs}, PQ: {PI: %u, CI: %u (%u), last HB time: %ptTs}\n",
hdev->event_queue.ci,
heartbeat_debug_info->heartbeat_event_counter,
- eq_time_str,
+ &hdev->heartbeat_debug_info.last_eq_heartbeat_ts,
hdev->kernel_queues[cpu_q_id].pi,
atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask,
- pq_time_str);
+ &hdev->heartbeat_debug_info.last_pq_heartbeat_ts);
hl_eq_dump(hdev, &hdev->event_queue);
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 9d58efa2ff38..82f66520ec18 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -446,7 +446,7 @@ static DEVICE_ATTR_RO(parent_device);
static const struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
.size = PAGE_SIZE,
- .read_new = eeprom_read_handler
+ .read = eeprom_read_handler
};
static struct attribute *hl_dev_attrs[] = {
@@ -479,7 +479,7 @@ static const struct bin_attribute *const hl_dev_bin_attrs[] = {
static struct attribute_group hl_dev_attr_group = {
.attrs = hl_dev_attrs,
- .bin_attrs_new = hl_dev_bin_attrs,
+ .bin_attrs = hl_dev_bin_attrs,
};
static struct attribute_group hl_dev_clks_attr_group;
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 0e7748c5e117..3d6d52492536 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -704,6 +704,7 @@ static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_WCL) },
{ }
};
MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 5497e7030e91..62ab1c654e63 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
@@ -26,6 +26,7 @@
#define PCI_DEVICE_ID_ARL 0xad1d
#define PCI_DEVICE_ID_LNL 0x643e
#define PCI_DEVICE_ID_PTL_P 0xb03e
+#define PCI_DEVICE_ID_WCL 0xfd3e
#define IVPU_HW_IP_37XX 37
#define IVPU_HW_IP_40XX 40
@@ -165,6 +166,7 @@ struct ivpu_device {
int boot;
int jsm;
int tdr;
+ int inference;
int autosuspend;
int d0i3_entry_msg;
int state_dump_msg;
@@ -207,10 +209,11 @@ extern bool ivpu_force_snoop;
#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
#define IVPU_TEST_MODE_MIP_DISABLE BIT(6)
#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8)
-#define IVPU_TEST_MODE_TURBO BIT(9)
-#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10)
-#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(11)
-#define IVPU_TEST_MODE_D0I2_DISABLE BIT(12)
+#define IVPU_TEST_MODE_TURBO_ENABLE BIT(9)
+#define IVPU_TEST_MODE_TURBO_DISABLE BIT(10)
+#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11)
+#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE BIT(12)
+#define IVPU_TEST_MODE_D0I2_DISABLE BIT(13)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
@@ -240,6 +243,7 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_LNL:
return IVPU_HW_IP_40XX;
case PCI_DEVICE_ID_PTL_P:
+ case PCI_DEVICE_ID_WCL:
return IVPU_HW_IP_50XX;
default:
dump_stack();
@@ -256,6 +260,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
return IVPU_HW_BTRS_MTL;
case PCI_DEVICE_ID_LNL:
case PCI_DEVICE_ID_PTL_P:
+ case PCI_DEVICE_ID_WCL:
return IVPU_HW_BTRS_LNL;
default:
dump_stack();
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 633160470c93..08dcc31b56f4 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -94,12 +94,14 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.boot = -1;
vdev->timeout.jsm = -1;
vdev->timeout.tdr = -1;
+ vdev->timeout.inference = -1;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = -1;
} else if (ivpu_is_fpga(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 15000;
vdev->timeout.tdr = 30000;
+ vdev->timeout.inference = 900000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
vdev->timeout.state_dump_msg = 10000;
@@ -107,6 +109,7 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
+ vdev->timeout.inference = 300000;
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 100;
vdev->timeout.state_dump_msg = 10;
@@ -114,6 +117,7 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 2000;
+ vdev->timeout.inference = 60000;
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
vdev->timeout.autosuspend = 10;
else
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 823f6a57dc54..2bf9882ab52e 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -683,6 +683,7 @@ static void pwr_island_delay_set(struct ivpu_device *vdev)
return;
switch (ivpu_device_id(vdev)) {
+ case PCI_DEVICE_ID_WCL:
case PCI_DEVICE_ID_PTL_P:
post = high ? 18 : 0;
post1 = 0;
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index fae8351aa330..060f1fc031d3 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
*/
#include <drm/drm_file.h>
@@ -100,6 +100,43 @@ err_free_cmdq:
return NULL;
}
+/**
+ * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue.
+ * @cmdq: Pointer to the command queue structure.
+ *
+ * Returns the number of entries that can fit in the command queue memory.
+ */
+static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
+{
+ size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
+
+ return size / sizeof(struct vpu_job_queue_entry);
+}
+
+/**
+ * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode.
+ * @vdev: Pointer to the ivpu device structure.
+ * @flags: Input flags to determine the command queue flags.
+ *
+ * Returns the calculated command queue flags, considering both the input flags
+ * and the current test mode settings.
+ */
+static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags)
+{
+ u32 cmdq_flags = 0;
+
+ if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX))
+ cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+ /* Test mode can override the TURBO flag coming from the application */
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE)
+ cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE)
+ cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+ return cmdq_flags;
+}
+
static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
@@ -107,8 +144,7 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
kfree(cmdq);
}
-static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority,
- bool is_legacy)
+static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_cmdq *cmdq = NULL;
@@ -121,10 +157,6 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
ivpu_err(vdev, "Failed to allocate command queue\n");
return NULL;
}
-
- cmdq->priority = priority;
- cmdq->is_legacy = is_legacy;
-
ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
&file_priv->cmdq_id_next, GFP_KERNEL);
if (ret < 0) {
@@ -132,7 +164,15 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
goto err_free_cmdq;
}
- ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id);
+ cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
+ cmdq->priority = priority;
+
+ cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
+ cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
+ cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags);
+
+ ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n",
+ cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags);
return cmdq;
err_free_cmdq:
@@ -188,27 +228,14 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
return ret;
}
-static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
+static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
{
- jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
jobq->header.head = 0;
jobq->header.tail = 0;
- if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
- ivpu_dbg(vdev, JOB, "Turbo mode enabled");
- jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
- }
-
wmb(); /* Flush WC buffer for jobq->header */
}
-static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
-{
- size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
-
- return size / sizeof(struct vpu_job_queue_entry);
-}
-
static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
struct ivpu_device *vdev = file_priv->vdev;
@@ -219,10 +246,7 @@ static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
if (cmdq->db_id)
return 0;
- cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
- cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
-
- ivpu_cmdq_jobq_init(vdev, cmdq->jobq);
+ ivpu_cmdq_jobq_reset(vdev, cmdq->jobq);
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
@@ -291,9 +315,10 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_pr
break;
if (!cmdq) {
- cmdq = ivpu_cmdq_create(file_priv, priority, true);
+ cmdq = ivpu_cmdq_create(file_priv, priority, 0);
if (!cmdq)
return NULL;
+ cmdq->is_legacy = true;
}
return cmdq;
@@ -891,7 +916,7 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
+ cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags);
if (cmdq)
args->cmdq_id = cmdq->id;
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index ea30db181cd7..eacda1dbe840 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -33,8 +33,11 @@ static unsigned long ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+static unsigned long ivpu_inference_timeout_ms;
+module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
-#define PM_TDR_HEARTBEAT_LIMIT 30
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
{
@@ -191,6 +194,10 @@ static void ivpu_job_timeout_work(struct work_struct *work)
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+ unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms :
+ vdev->timeout.inference;
+ u64 inference_max_retries;
u64 heartbeat;
if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
@@ -198,8 +205,10 @@ static void ivpu_job_timeout_work(struct work_struct *work)
goto recovery;
}
- if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) {
- ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n");
+ inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms);
+ if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n",
+ inference_max_retries);
goto recovery;
}
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 35e883515629..1106b876f737 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -10,6 +10,7 @@ qaic-y := \
qaic_control.o \
qaic_data.o \
qaic_drv.o \
+ qaic_ras.o \
qaic_timesync.o \
sahara.o
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 0dbb8e32e4b9..c31081e42cee 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -167,6 +167,14 @@ struct qaic_device {
struct workqueue_struct *bootlog_wq;
/* Synchronizes access of pages in MHI bootlog device */
struct mutex bootlog_mutex;
+ /* MHI RAS channel device */
+ struct mhi_device *ras_ch;
+ /* Correctable error count */
+ unsigned int ce_count;
+ /* Un-correctable error count */
+ unsigned int ue_count;
+ /* Un-correctable non-fatal error count */
+ unsigned int ue_nf_count;
};
struct qaic_drm_device {
@@ -213,8 +221,6 @@ struct qaic_bo {
bool sliced;
/* Request ID of this BO if it is queued for execution */
u16 req_id;
- /* Handle assigned to this BO */
- u32 handle;
/* Wait on this for completion of DMA transfer of this BO */
struct completion xfer_done;
/*
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 1bce1af7c72c..797289e9d780 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -731,7 +731,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (ret)
goto free_bo;
- bo->handle = args->handle;
drm_gem_object_put(obj);
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 3b415e2c9431..e31bcb0ecfc9 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -29,6 +29,7 @@
#include "mhi_controller.h"
#include "qaic.h"
#include "qaic_debugfs.h"
+#include "qaic_ras.h"
#include "qaic_timesync.h"
#include "sahara.h"
@@ -695,6 +696,10 @@ static int __init qaic_init(void)
if (ret)
pr_debug("qaic: qaic_bootlog_register failed %d\n", ret);
+ ret = qaic_ras_register();
+ if (ret)
+ pr_debug("qaic: qaic_ras_register failed %d\n", ret);
+
return 0;
free_mhi:
@@ -722,6 +727,7 @@ static void __exit qaic_exit(void)
* reinitializing the link_up state after the cleanup is done.
*/
link_up = true;
+ qaic_ras_unregister();
qaic_bootlog_unregister();
qaic_timesync_deinit();
sahara_unregister();
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c
new file mode 100644
index 000000000000..914ffc4a9970
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
+
+#include <asm/byteorder.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+
+#include "qaic.h"
+#include "qaic_ras.h"
+
+#define MAGIC 0x55AA
+#define VERSION 0x2
+#define HDR_SZ 12
+#define NUM_TEMP_LVL 3
+#define POWER_BREAK BIT(0)
+
+enum msg_type {
+ MSG_PUSH, /* async push from device */
+ MSG_REQ, /* sync request to device */
+ MSG_RESP, /* sync response from device */
+};
+
+enum err_type {
+ CE, /* correctable error */
+ UE, /* uncorrectable error */
+ UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */
+ ERR_TYPE_MAX,
+};
+
+static const char * const err_type_str[] = {
+ [CE] = "Correctable",
+ [UE] = "Uncorrectable",
+ [UE_NF] = "Uncorrectable Non-Fatal",
+};
+
+static const char * const err_class_str[] = {
+ [CE] = "Warning",
+ [UE] = "Fatal",
+ [UE_NF] = "Warning",
+};
+
+enum err_source {
+ SOC_MEM,
+ PCIE,
+ DDR,
+ SYS_BUS1,
+ SYS_BUS2,
+ NSP_MEM,
+ TSENS,
+};
+
+static const char * const err_src_str[TSENS + 1] = {
+ [SOC_MEM] = "SoC Memory",
+ [PCIE] = "PCIE",
+ [DDR] = "DDR",
+ [SYS_BUS1] = "System Bus source 1",
+ [SYS_BUS2] = "System Bus source 2",
+ [NSP_MEM] = "NSP Memory",
+ [TSENS] = "Temperature Sensors",
+};
+
+struct ras_data {
+ /* header start */
+ /* Magic number to validate the message */
+ u16 magic;
+ /* RAS version number */
+ u16 ver;
+ u32 seq_num;
+ /* RAS message type */
+ u8 type;
+ u8 id;
+ /* Size of RAS message without the header in byte */
+ u16 len;
+ /* header end */
+ s32 result;
+ /*
+ * Error source
+ * 0 : SoC Memory
+ * 1 : PCIE
+ * 2 : DDR
+ * 3 : System Bus source 1
+ * 4 : System Bus source 2
+ * 5 : NSP Memory
+ * 6 : Temperature Sensors
+ */
+ u32 source;
+ /*
+ * Stores the error type, there are three types of error in RAS
+ * 0 : correctable error (CE)
+ * 1 : uncorrectable error (UE)
+ * 2 : uncorrectable error that is non-fatal (UE_NF)
+ */
+ u32 err_type;
+ u32 err_threshold;
+ u32 ce_count;
+ u32 ue_count;
+ u32 intr_num;
+ /* Data specific to error source */
+ u8 syndrome[64];
+} __packed;
+
+struct soc_mem_syndrome {
+ u64 error_address[8];
+} __packed;
+
+struct nsp_mem_syndrome {
+ u32 error_address[8];
+ u8 nsp_id;
+} __packed;
+
+struct ddr_syndrome {
+ u32 count;
+ u32 irq_status;
+ u32 data_31_0[2];
+ u32 data_63_32[2];
+ u32 data_95_64[2];
+ u32 data_127_96[2];
+ u32 addr_lsb;
+ u16 addr_msb;
+ u16 parity_bits;
+ u16 instance;
+ u16 err_type;
+} __packed;
+
+struct tsens_syndrome {
+ u32 threshold_type;
+ s32 temp;
+} __packed;
+
+struct sysbus1_syndrome {
+ u32 slave;
+ u32 err_type;
+ u16 addr[8];
+ u8 instance;
+} __packed;
+
+struct sysbus2_syndrome {
+ u32 lsb3;
+ u32 msb3;
+ u32 lsb2;
+ u32 msb2;
+ u32 ext_id;
+ u16 path;
+ u16 op_type;
+ u16 len;
+ u16 redirect;
+ u8 valid;
+ u8 word_error;
+ u8 non_secure;
+ u8 opc;
+ u8 error_code;
+ u8 trans_type;
+ u8 addr_space;
+ u8 instance;
+} __packed;
+
+struct pcie_syndrome {
+ /* CE info */
+ u32 bad_tlp;
+ u32 bad_dllp;
+ u32 replay_rollover;
+ u32 replay_timeout;
+ u32 rx_err;
+ u32 internal_ce_count;
+ /* UE_NF info */
+ u32 fc_timeout;
+ u32 poison_tlp;
+ u32 ecrc_err;
+ u32 unsupported_req;
+ u32 completer_abort;
+ u32 completion_timeout;
+ /* UE info */
+ u32 addr;
+ u8 index;
+ /*
+ * Flag to indicate specific event of PCIe
+ * BIT(0): Power break (low power)
+ * BIT(1) to BIT(7): Reserved
+ */
+ u8 flag;
+} __packed;
+
+static const char * const threshold_type_str[NUM_TEMP_LVL] = {
+ [0] = "lower",
+ [1] = "upper",
+ [2] = "critical",
+};
+
+static void ras_msg_to_cpu(struct ras_data *msg)
+{
+ struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+ struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+ struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+ struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+ struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+ struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+ struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+ int i;
+
+ le16_to_cpus(&msg->magic);
+ le16_to_cpus(&msg->ver);
+ le32_to_cpus(&msg->seq_num);
+ le16_to_cpus(&msg->len);
+ le32_to_cpus(&msg->result);
+ le32_to_cpus(&msg->source);
+ le32_to_cpus(&msg->err_type);
+ le32_to_cpus(&msg->err_threshold);
+ le32_to_cpus(&msg->ce_count);
+ le32_to_cpus(&msg->ue_count);
+ le32_to_cpus(&msg->intr_num);
+
+ switch (msg->source) {
+ case SOC_MEM:
+ for (i = 0; i < 8; i++)
+ le64_to_cpus(&soc_syndrome->error_address[i]);
+ break;
+ case PCIE:
+ le32_to_cpus(&pcie_syndrome->bad_tlp);
+ le32_to_cpus(&pcie_syndrome->bad_dllp);
+ le32_to_cpus(&pcie_syndrome->replay_rollover);
+ le32_to_cpus(&pcie_syndrome->replay_timeout);
+ le32_to_cpus(&pcie_syndrome->rx_err);
+ le32_to_cpus(&pcie_syndrome->internal_ce_count);
+ le32_to_cpus(&pcie_syndrome->fc_timeout);
+ le32_to_cpus(&pcie_syndrome->poison_tlp);
+ le32_to_cpus(&pcie_syndrome->ecrc_err);
+ le32_to_cpus(&pcie_syndrome->unsupported_req);
+ le32_to_cpus(&pcie_syndrome->completer_abort);
+ le32_to_cpus(&pcie_syndrome->completion_timeout);
+ le32_to_cpus(&pcie_syndrome->addr);
+ break;
+ case DDR:
+ le16_to_cpus(&ddr_syndrome->instance);
+ le16_to_cpus(&ddr_syndrome->err_type);
+ le32_to_cpus(&ddr_syndrome->count);
+ le32_to_cpus(&ddr_syndrome->irq_status);
+ le32_to_cpus(&ddr_syndrome->data_31_0[0]);
+ le32_to_cpus(&ddr_syndrome->data_31_0[1]);
+ le32_to_cpus(&ddr_syndrome->data_63_32[0]);
+ le32_to_cpus(&ddr_syndrome->data_63_32[1]);
+ le32_to_cpus(&ddr_syndrome->data_95_64[0]);
+ le32_to_cpus(&ddr_syndrome->data_95_64[1]);
+ le32_to_cpus(&ddr_syndrome->data_127_96[0]);
+ le32_to_cpus(&ddr_syndrome->data_127_96[1]);
+ le16_to_cpus(&ddr_syndrome->parity_bits);
+ le16_to_cpus(&ddr_syndrome->addr_msb);
+ le32_to_cpus(&ddr_syndrome->addr_lsb);
+ break;
+ case SYS_BUS1:
+ le32_to_cpus(&sysbus1_syndrome->slave);
+ le32_to_cpus(&sysbus1_syndrome->err_type);
+ for (i = 0; i < 8; i++)
+ le16_to_cpus(&sysbus1_syndrome->addr[i]);
+ break;
+ case SYS_BUS2:
+ le16_to_cpus(&sysbus2_syndrome->op_type);
+ le16_to_cpus(&sysbus2_syndrome->len);
+ le16_to_cpus(&sysbus2_syndrome->redirect);
+ le16_to_cpus(&sysbus2_syndrome->path);
+ le32_to_cpus(&sysbus2_syndrome->ext_id);
+ le32_to_cpus(&sysbus2_syndrome->lsb2);
+ le32_to_cpus(&sysbus2_syndrome->msb2);
+ le32_to_cpus(&sysbus2_syndrome->lsb3);
+ le32_to_cpus(&sysbus2_syndrome->msb3);
+ break;
+ case NSP_MEM:
+ for (i = 0; i < 8; i++)
+ le32_to_cpus(&nsp_syndrome->error_address[i]);
+ break;
+ case TSENS:
+ le32_to_cpus(&tsens_syndrome->threshold_type);
+ le32_to_cpus(&tsens_syndrome->temp);
+ break;
+ }
+}
+
+static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg)
+{
+ struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
+ struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
+ struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
+ struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
+ struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
+ struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
+ struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
+ char *class;
+ char *level;
+
+ if (msg->magic != MAGIC) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic);
+ return;
+ }
+
+ if (!msg->ver || msg->ver > VERSION) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver);
+ return;
+ }
+
+ if (msg->type != MSG_PUSH) {
+ pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n");
+ return;
+ }
+
+ if (msg->len != sizeof(*msg) - HDR_SZ) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len);
+ return;
+ }
+
+ if (msg->err_type >= ERR_TYPE_MAX) {
+ pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type);
+ return;
+ }
+
+ if (msg->err_type == UE)
+ level = KERN_ERR;
+ else
+ level = KERN_WARNING;
+
+ switch (msg->source) {
+ case SOC_MEM:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ soc_syndrome->error_address[0],
+ soc_syndrome->error_address[1],
+ soc_syndrome->error_address[2],
+ soc_syndrome->error_address[3],
+ soc_syndrome->error_address[4],
+ soc_syndrome->error_address[5],
+ soc_syndrome->error_address[6],
+ soc_syndrome->error_address[7]);
+ break;
+ case PCIE:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold);
+
+ switch (msg->err_type) {
+ case CE:
+ /*
+ * Modeled after AER prints. This continues the dev_printk() from a few
+ * lines up. We reduce duplication of code, but also avoid re-printing the
+ * PCI device info so that the end result looks uniform to the log user.
+ */
+ printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"),
+ pcie_syndrome->bad_tlp,
+ pcie_syndrome->bad_dllp,
+ pcie_syndrome->replay_rollover,
+ pcie_syndrome->replay_timeout,
+ pcie_syndrome->rx_err,
+ pcie_syndrome->internal_ce_count);
+ if (msg->ver > 0x1)
+ pr_warn(" Power break %s\n",
+ pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF");
+ break;
+ case UE:
+ printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"),
+ pcie_syndrome->index, pcie_syndrome->addr);
+ break;
+ case UE_NF:
+ printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"),
+ pcie_syndrome->fc_timeout,
+ pcie_syndrome->poison_tlp,
+ pcie_syndrome->ecrc_err,
+ pcie_syndrome->unsupported_req,
+ pcie_syndrome->completer_abort,
+ pcie_syndrome->completion_timeout);
+ break;
+ default:
+ break;
+ }
+ break;
+ case DDR:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ ddr_syndrome->instance,
+ ddr_syndrome->count,
+ ddr_syndrome->data_31_0[1],
+ ddr_syndrome->data_31_0[0],
+ ddr_syndrome->data_63_32[1],
+ ddr_syndrome->data_63_32[0],
+ ddr_syndrome->data_95_64[1],
+ ddr_syndrome->data_95_64[0],
+ ddr_syndrome->data_127_96[1],
+ ddr_syndrome->data_127_96[0],
+ ddr_syndrome->parity_bits,
+ ddr_syndrome->addr_msb,
+ ddr_syndrome->addr_lsb);
+ break;
+ case SYS_BUS1:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ sysbus1_syndrome->instance,
+ sysbus1_syndrome->slave ? "Slave" : "Master",
+ sysbus1_syndrome->err_type,
+ sysbus1_syndrome->addr[0],
+ sysbus1_syndrome->addr[1],
+ sysbus1_syndrome->addr[2],
+ sysbus1_syndrome->addr[3],
+ sysbus1_syndrome->addr[4],
+ sysbus1_syndrome->addr[5],
+ sysbus1_syndrome->addr[6],
+ sysbus1_syndrome->addr[7]);
+ break;
+ case SYS_BUS2:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ sysbus2_syndrome->instance,
+ sysbus2_syndrome->valid,
+ sysbus2_syndrome->word_error,
+ sysbus2_syndrome->non_secure,
+ sysbus2_syndrome->opc,
+ sysbus2_syndrome->error_code,
+ sysbus2_syndrome->trans_type,
+ sysbus2_syndrome->addr_space,
+ sysbus2_syndrome->op_type,
+ sysbus2_syndrome->len,
+ sysbus2_syndrome->redirect,
+ sysbus2_syndrome->path,
+ sysbus2_syndrome->ext_id,
+ sysbus2_syndrome->lsb2,
+ sysbus2_syndrome->msb2,
+ sysbus2_syndrome->lsb3,
+ sysbus2_syndrome->msb3);
+ break;
+ case NSP_MEM:
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n",
+ err_class_str[msg->err_type],
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ nsp_syndrome->nsp_id,
+ nsp_syndrome->error_address[0],
+ nsp_syndrome->error_address[1],
+ nsp_syndrome->error_address[2],
+ nsp_syndrome->error_address[3],
+ nsp_syndrome->error_address[4],
+ nsp_syndrome->error_address[5],
+ nsp_syndrome->error_address[6],
+ nsp_syndrome->error_address[7]);
+ break;
+ case TSENS:
+ if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) {
+ pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n",
+ tsens_syndrome->threshold_type);
+ break;
+ }
+
+ if (msg->err_type)
+ class = "Fatal";
+ else if (tsens_syndrome->threshold_type)
+ class = "Critical";
+ else
+ class = "Warning";
+
+ dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n",
+ class,
+ err_type_str[msg->err_type],
+ "error from",
+ err_src_str[msg->source],
+ msg->err_threshold,
+ threshold_type_str[tsens_syndrome->threshold_type],
+ tsens_syndrome->temp);
+ break;
+ }
+
+ /* Uncorrectable errors are fatal */
+ if (msg->err_type == UE)
+ mhi_soc_reset(qdev->mhi_cntrl);
+
+ switch (msg->err_type) {
+ case CE:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ce_count++;
+ break;
+ case UE:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ue_count++;
+ break;
+ case UE_NF:
+ if (qdev->ce_count != UINT_MAX)
+ qdev->ue_nf_count++;
+ break;
+ default:
+ /* not possible */
+ break;
+ }
+}
+
+static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count);
+}
+
+static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count);
+}
+
+static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count);
+}
+
+static DEVICE_ATTR_RO(ce_count);
+static DEVICE_ATTR_RO(ue_count);
+static DEVICE_ATTR_RO(ue_nonfatal_count);
+
+static struct attribute *ras_attrs[] = {
+ &dev_attr_ce_count.attr,
+ &dev_attr_ue_count.attr,
+ &dev_attr_ue_nonfatal_count.attr,
+ NULL,
+};
+
+static struct attribute_group ras_group = {
+ .attrs = ras_attrs,
+};
+
+static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+ struct ras_data *resp;
+ int ret;
+
+ ret = mhi_prepare_for_transfer(mhi_dev);
+ if (ret)
+ return ret;
+
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ return -ENOMEM;
+ }
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT);
+ if (ret) {
+ kfree(resp);
+ mhi_unprepare_from_transfer(mhi_dev);
+ return ret;
+ }
+
+ ret = device_add_group(&qdev->pdev->dev, &ras_group);
+ if (ret) {
+ mhi_unprepare_from_transfer(mhi_dev);
+ pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret);
+ return ret;
+ }
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->ras_ch = mhi_dev;
+
+ return ret;
+}
+
+static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev)
+{
+ struct qaic_device *qdev;
+
+ qdev = dev_get_drvdata(&mhi_dev->dev);
+ qdev->ras_ch = NULL;
+ device_remove_group(&qdev->pdev->dev, &ras_group);
+ mhi_unprepare_from_transfer(mhi_dev);
+}
+
+static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {}
+
+static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct ras_data *msg = mhi_result->buf_addr;
+ int ret;
+
+ if (mhi_result->transaction_status) {
+ kfree(msg);
+ return;
+ }
+
+ ras_msg_to_cpu(msg);
+ decode_ras_msg(qdev, msg);
+
+ ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT);
+ if (ret) {
+ dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret);
+ kfree(msg);
+ }
+}
+
+static const struct mhi_device_id qaic_ras_mhi_match_table[] = {
+ { .chan = "QAIC_STATUS", },
+ {},
+};
+
+static struct mhi_driver qaic_ras_mhi_driver = {
+ .id_table = qaic_ras_mhi_match_table,
+ .remove = qaic_ras_mhi_remove,
+ .probe = qaic_ras_mhi_probe,
+ .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_ras",
+ },
+};
+
+int qaic_ras_register(void)
+{
+ return mhi_driver_register(&qaic_ras_mhi_driver);
+}
+
+void qaic_ras_unregister(void)
+{
+ mhi_driver_unregister(&qaic_ras_mhi_driver);
+}
diff --git a/drivers/accel/qaic/qaic_ras.h b/drivers/accel/qaic/qaic_ras.h
new file mode 100644
index 000000000000..d44a4eeeb060
--- /dev/null
+++ b/drivers/accel/qaic/qaic_ras.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */
+
+#ifndef __QAIC_RAS_H__
+#define __QAIC_RAS_H__
+
+int qaic_ras_register(void);
+void qaic_ras_unregister(void);
+
+#endif /* __QAIC_RAS_H__ */