diff options
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/atmel-ssc.c | 24 | ||||
-rw-r--r-- | drivers/misc/habanalabs/command_submission.c | 27 | ||||
-rw-r--r-- | drivers/misc/habanalabs/debugfs.c | 27 | ||||
-rw-r--r-- | drivers/misc/habanalabs/device.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/firmware_if.c | 10 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 160 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudiP.h | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 20 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 19 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/hwmon.c | 19 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/gaudi_packets.h | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/sysfs.c | 11 | ||||
-rw-r--r-- | drivers/misc/mei/bus.c | 3 | ||||
-rw-r--r-- | drivers/misc/mei/hw-me-regs.h | 3 | ||||
-rw-r--r-- | drivers/misc/mei/hw-me.c | 70 | ||||
-rw-r--r-- | drivers/misc/mei/hw-me.h | 17 | ||||
-rw-r--r-- | drivers/misc/mei/pci-me.c | 17 |
18 files changed, 306 insertions, 131 deletions
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index ab4144ea1f11..d6cd5537126c 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -10,7 +10,7 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/io.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/atmel-ssc.h> #include <linux/slab.h> #include <linux/module.h> @@ -20,7 +20,7 @@ #include "../../sound/soc/atmel/atmel_ssc_dai.h" /* Serialize access to ssc_list and user count */ -static DEFINE_SPINLOCK(user_lock); +static DEFINE_MUTEX(user_lock); static LIST_HEAD(ssc_list); struct ssc_device *ssc_request(unsigned int ssc_num) @@ -28,7 +28,7 @@ struct ssc_device *ssc_request(unsigned int ssc_num) int ssc_valid = 0; struct ssc_device *ssc; - spin_lock(&user_lock); + mutex_lock(&user_lock); list_for_each_entry(ssc, &ssc_list, list) { if (ssc->pdev->dev.of_node) { if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc") @@ -44,18 +44,18 @@ struct ssc_device *ssc_request(unsigned int ssc_num) } if (!ssc_valid) { - spin_unlock(&user_lock); + mutex_unlock(&user_lock); pr_err("ssc: ssc%d platform device is missing\n", ssc_num); return ERR_PTR(-ENODEV); } if (ssc->user) { - spin_unlock(&user_lock); + mutex_unlock(&user_lock); dev_dbg(&ssc->pdev->dev, "module busy\n"); return ERR_PTR(-EBUSY); } ssc->user++; - spin_unlock(&user_lock); + mutex_unlock(&user_lock); clk_prepare(ssc->clk); @@ -67,14 +67,14 @@ void ssc_free(struct ssc_device *ssc) { bool disable_clk = true; - spin_lock(&user_lock); + mutex_lock(&user_lock); if (ssc->user) ssc->user--; else { disable_clk = false; dev_dbg(&ssc->pdev->dev, "device already free\n"); } - spin_unlock(&user_lock); + mutex_unlock(&user_lock); if (disable_clk) clk_unprepare(ssc->clk); @@ -237,9 +237,9 @@ static int ssc_probe(struct platform_device *pdev) return -ENXIO; } - spin_lock(&user_lock); + mutex_lock(&user_lock); list_add_tail(&ssc->list, &ssc_list); - spin_unlock(&user_lock); + mutex_unlock(&user_lock); platform_set_drvdata(pdev, ssc); @@ -258,9 +258,9 @@ static int ssc_remove(struct platform_device *pdev) ssc_sound_dai_remove(ssc); - spin_lock(&user_lock); + mutex_lock(&user_lock); list_del(&ssc->list); - spin_unlock(&user_lock); + mutex_unlock(&user_lock); return 0; } diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index f82974a916c3..f3a8f113865d 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -62,6 +62,12 @@ static void hl_fence_release(struct dma_fence *fence) container_of(fence, struct hl_cs_compl, base_fence); struct hl_device *hdev = hl_cs_cmpl->hdev; + /* EBUSY means the CS was never submitted and hence we don't have + * an attached hw_sob object that we should handle here + */ + if (fence->error == -EBUSY) + goto free; + if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || (hl_cs_cmpl->type == CS_TYPE_WAIT)) { @@ -92,6 +98,7 @@ static void hl_fence_release(struct dma_fence *fence) kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); } +free: kfree_rcu(hl_cs_cmpl, base_fence.rcu); } @@ -328,10 +335,16 @@ static void cs_do_release(struct kref *ref) hl_ctx_put(cs->ctx); + /* We need to mark an error for not submitted because in that case + * the dma fence release flow is different. Mainly, we don't need + * to handle hw_sob for signal/wait + */ if (cs->timedout) dma_fence_set_error(cs->fence, -ETIMEDOUT); else if (cs->aborted) dma_fence_set_error(cs->fence, -EIO); + else if (!cs->submitted) + dma_fence_set_error(cs->fence, -EBUSY); dma_fence_signal(cs->fence); dma_fence_put(cs->fence); @@ -486,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev, struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= HL_MAX_QUEUES) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - if ((chunk->queue_index >= HL_MAX_QUEUES) || - (hw_queue_prop->type == QUEUE_TYPE_NA)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is not applicable\n", chunk->queue_index); return -EINVAL; } diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 3c8dcdfba20c..0bc036e01ee8 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, (long *) val); + 0, (long *) val); if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); @@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.value = cpu_to_le64(state); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); @@ -480,7 +480,7 @@ out: return 0; } -static ssize_t mmu_write(struct file *file, const char __user *buf, +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, size_t count, loff_t *f_pos) { struct seq_file *s = file->private_data; @@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, if (*ppos) return 0; - sprintf(tmp_buf, "%d\n", hdev->clock_gating); + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, strlen(tmp_buf) + 1); @@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u32 value; + u64 value; ssize_t rc; if (atomic_read(&hdev->in_reset)) { @@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, return 0; } - rc = kstrtouint_from_user(buf, count, 10, &value); + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (value) { - hdev->clock_gating = 1; - if (hdev->asic_funcs->enable_clock_gating) - hdev->asic_funcs->enable_clock_gating(hdev); - } else { - if (hdev->asic_funcs->disable_clock_gating) - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->clock_gating = 0; - } + hdev->clock_gating_mask = value; + hdev->asic_funcs->set_clock_gating(hdev); return count; } @@ -1125,7 +1118,7 @@ static const struct hl_info_list hl_debugfs_list[] = { {"command_submission_jobs", command_submission_jobs_show, NULL}, {"userptr", userptr_show, NULL}, {"vm", vm_show, NULL}, - {"mmu", mmu_show, mmu_write}, + {"mmu", mmu_show, mmu_asid_va_write}, {"engines", engines_show, NULL} }; diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2b38a119704c..59608d1bac88 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) hdev->in_debug = 0; if (!hdev->hard_reset_pending) - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); goto out; } diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index baf790cf4b78..d27841cb5bcb 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); + sizeof(pkt), 0, NULL); } int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, @@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(test_pkt), 0, &result); if (!rc) { if (result != ARMCP_PACKET_FENCE_VAL) @@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(hb_pkt), 0, &result); if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) rc = -EIO; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 61f88e9884ce..637a9d608707 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -80,6 +80,7 @@ #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 @@ -96,7 +97,12 @@ #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 -#define GAUDI_ARB_WDT_TIMEOUT 0x400000 +#define GAUDI_ARB_WDT_TIMEOUT 0x1000000 + +#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ + BIT(GAUDI_ENGINE_ID_MME_0) |\ + BIT(GAUDI_ENGINE_ID_MME_2) |\ + GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", @@ -106,14 +112,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { }; static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { - [GAUDI_PCI_DMA_1] = 0, - [GAUDI_PCI_DMA_2] = 1, - [GAUDI_PCI_DMA_3] = 5, - [GAUDI_HBM_DMA_1] = 2, - [GAUDI_HBM_DMA_2] = 3, - [GAUDI_HBM_DMA_3] = 4, - [GAUDI_HBM_DMA_4] = 6, - [GAUDI_HBM_DMA_5] = 7 + [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, + [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, + [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, + [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, + [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, + [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, + [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, + [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 }; static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { @@ -1819,7 +1825,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_rate_limiter(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; @@ -1893,6 +1899,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); + WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); + /* The following configuration is needed only once per QMAN */ if (qman_id == 0) { /* Configure RAZWI IRQ */ @@ -2529,46 +2537,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_enable_clock_gating(struct hl_device *hdev) +static void gaudi_set_clock_gating(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; - if (!hdev->clock_gating) - return; - - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) - return; - /* In case we are during debug session, don't enable the clock gate * as it may interfere */ if (hdev->in_debug) return; - for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_UPPER_CP_CGM_PWR_GATE_EN); } - for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_COMMON_CP_CGM_PWR_GATE_EN); } - WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME0_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } + + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) + continue; + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmTPC0_QM_CGM_CFG + qman_offset, @@ -2661,7 +2678,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); msleep(wait_timeout_ms); @@ -2725,6 +2742,12 @@ static int gaudi_mmu_init(struct hl_device *hdev) WREG32(mmSTLB_HOP_CONFIGURATION, hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); + /* + * The H/W expects the first PI after init to be 1. After wraparound + * we'll write 0. + */ + gaudi->mmu_cache_inv_pi = 1; + gaudi->hw_cap_initialized |= HW_CAP_MMU; return 0; @@ -2995,7 +3018,7 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_tpc_qmans(hdev); - gaudi_enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); gaudi_enable_timestamp(hdev); @@ -3104,7 +3127,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER); + HW_CAP_HBM_SCRAMBLER | + HW_CAP_CLK_GATE); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); } @@ -3455,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, return 0; } + if (!timeout) + timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -3790,6 +3818,25 @@ static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, src_in_host); } +static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, + struct hl_cs_parser *parser, + struct packet_load_and_exe *user_pkt) +{ + u32 cfg; + + cfg = le32_to_cpu(user_pkt->cfg); + + if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { + dev_err(hdev->dev, + "User not allowed to use Load and Execute\n"); + return -EPERM; + } + + parser->patched_cb_size += sizeof(struct packet_load_and_exe); + + return 0; +} + static int gaudi_validate_cb(struct hl_device *hdev, struct hl_cs_parser *parser, bool is_mmu) { @@ -3838,6 +3885,17 @@ static int gaudi_validate_cb(struct hl_device *hdev, rc = -EPERM; break; + case PACKET_WREG_BULK: + dev_err(hdev->dev, + "User not allowed to use WREG_BULK\n"); + rc = -EPERM; + break; + + case PACKET_LOAD_AND_EXE: + rc = gaudi_validate_load_and_exe_pkt(hdev, parser, + (struct packet_load_and_exe *) user_pkt); + break; + case PACKET_LIN_DMA: parser->contains_dma_pkt = true; if (is_mmu) @@ -3848,14 +3906,12 @@ static int gaudi_validate_cb(struct hl_device *hdev, break; case PACKET_WREG_32: - case PACKET_WREG_BULK: case PACKET_MSG_LONG: case PACKET_MSG_SHORT: case PACKET_REPEAT: case PACKET_FENCE: case PACKET_NOP: case PACKET_ARB_POINT: - case PACKET_LOAD_AND_EXE: parser->patched_cb_size += pkt_size; break; @@ -4490,13 +4546,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; } else { *val = RREG32(addr - CFG_BASE); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4532,13 +4593,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; } else { WREG32(addr - CFG_BASE, val); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { writel(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4574,7 +4640,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4584,6 +4654,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) *val = (((u64) val_h) << 32) | val_l; } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4620,7 +4691,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4629,6 +4704,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4850,7 +4926,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } @@ -5231,7 +5307,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev, } if (disable_clock_gating) { - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } } @@ -5718,7 +5794,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -5994,6 +6070,8 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, mutex_lock(&hdev->mmu_cache_lock); /* L0 & L1 invalidation */ + WREG32(mmSTLB_INV_PS, 3); + WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); WREG32(mmSTLB_INV_PS, 2); rc = hl_poll_timeout( @@ -6232,7 +6310,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6333,7 +6411,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -6362,7 +6440,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); if (rc) { @@ -6703,7 +6781,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .enable_clock_gating = gaudi_enable_clock_gating, + .set_clock_gating = gaudi_set_clock_gating, .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index a46530d375fa..41a8d9bff6bf 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -229,6 +229,8 @@ struct gaudi_internal_qman_info { * @multi_msi_mode: whether we are working in multi MSI single MSI mode. * Multi MSI is possible only with IOMMU enabled. * @ext_queue_idx: helper index for external queues initialization. + * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an + * 8-bit value so use u8. */ struct gaudi_device { int (*armcp_info_get)(struct hl_device *hdev); @@ -248,6 +250,7 @@ struct gaudi_device { u32 hw_cap_initialized; u8 multi_msi_mode; u8 ext_queue_idx; + u8 mmu_cache_inv_pi; }; void gaudi_init_security(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58df..88460b2138d8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -88,6 +88,7 @@ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } + if (!timeout) + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -5028,14 +5032,14 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static void goya_disable_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5259,7 +5263,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_enable_clock_gating, + .set_clock_gating = goya_set_clock_gating, .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1ecdcf8b763a..194d83352696 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -578,8 +578,9 @@ enum hl_pll_frequency { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. + * @set_clock_gating: enable/disable clock gating per engine according to + * clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -587,7 +588,11 @@ enum hl_pll_frequency { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. + * @send_cpu_message: send message to F/W. If the message is timedout, the + * driver will eventually reset the device. The timeout can + * be determined by the calling function or it can be 0 and + * then the timeout is the default timeout for the specific + * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns @@ -680,7 +685,7 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); + void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, @@ -1398,6 +1403,9 @@ struct hl_device_idle_busy_ts { * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + * different engines. See debugfs-driver-habanalabs for + * details. * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @cs_active_cnt: number of active command submissions on this device (active @@ -1425,7 +1433,6 @@ struct hl_device_idle_busy_ts { * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1493,6 +1500,7 @@ struct hl_device { atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; + u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; int cs_active_cnt; @@ -1514,7 +1522,6 @@ struct hl_device { u8 dram_default_page_mapping; u8 pmmu_huge_range; u8 init_done; - u8 clock_gating; u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8652c7e5d7f1..22716da9f85f 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->fw_loading = 1; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; - hdev->clock_gating = 1; + hdev->clock_gating_mask = ULONG_MAX; hdev->reset_pcilink = 0; hdev->axi_drain = 0; diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 8c6cd77e6af6..b997336fa75f 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -10,7 +10,6 @@ #include <linux/pci.h> #include <linux/hwmon.h> -#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h index 9a5800b0086b..0f0cd067bb43 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h @@ -197,6 +197,9 @@ struct packet_wait { __le32 ctl; }; +#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_SHIFT 0 +#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK 0x00000001 + struct packet_load_and_exe { __le32 cfg; __le32 ctl; diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 5d78d5e1c782..70b6b1863c2e 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -9,9 +9,6 @@ #include <linux/pci.h> -#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ -#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ - long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct armcp_packet pkt; @@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, @@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev) ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); @@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 8d468e0a950a..f476dbc7252b 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -745,9 +745,8 @@ static int mei_cl_device_remove(struct device *dev) mei_cl_bus_module_put(cldev); module_put(THIS_MODULE); - dev->driver = NULL; - return ret; + return ret; } static ssize_t name_show(struct device *dev, struct device_attribute *a, diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 9392934e3a06..7becfc768bbc 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -94,6 +94,7 @@ #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ #define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ +#define MEI_DEV_ID_TGP_H 0x43E0 /* Tiger Lake Point H */ #define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ #define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ @@ -107,6 +108,8 @@ # define PCI_CFG_HFS_1_D0I3_MSK 0x80000000 #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 +# define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070 +# define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060 #define PCI_CFG_HFS_4 0x64 #define PCI_CFG_HFS_5 0x68 #define PCI_CFG_HFS_6 0x6C diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index f620442addf5..7649710a2ab9 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1366,7 +1366,7 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev) #define MEI_CFG_FW_NM \ .quirk_probe = mei_me_fw_type_nm -static bool mei_me_fw_type_sps(struct pci_dev *pdev) +static bool mei_me_fw_type_sps_4(struct pci_dev *pdev) { u32 reg; unsigned int devfn; @@ -1382,7 +1382,36 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) return (reg & 0xf0000) == 0xf0000; } -#define MEI_CFG_FW_SPS \ +#define MEI_CFG_FW_SPS_4 \ + .quirk_probe = mei_me_fw_type_sps_4 + +/** + * mei_me_fw_sku_sps() - check for sps sku + * + * Read ME FW Status register to check for SPS Firmware. + * The SPS FW is only signaled in pci function 0 + * + * @pdev: pci device + * + * Return: true in case of SPS firmware + */ +static bool mei_me_fw_type_sps(struct pci_dev *pdev) +{ + u32 reg; + u32 fw_type; + unsigned int devfn; + + devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_3, ®); + trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_3", PCI_CFG_HFS_3, reg); + fw_type = (reg & PCI_CFG_HFS_3_FW_SKU_MSK); + + dev_dbg(&pdev->dev, "fw type is %d\n", fw_type); + + return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; +} + +#define MEI_CFG_FW_SPS \ .quirk_probe = mei_me_fw_type_sps #define MEI_CFG_FW_VER_SUPP \ @@ -1452,10 +1481,17 @@ static const struct mei_cfg mei_me_pch8_cfg = { }; /* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ -static const struct mei_cfg mei_me_pch8_sps_cfg = { +static const struct mei_cfg mei_me_pch8_sps_4_cfg = { MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_4, +}; + +/* LBG with quirk for SPS (4.0) Firmware exclusion */ +static const struct mei_cfg mei_me_pch12_sps_4_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_FW_SPS_4, }; /* Cannon Lake and newer devices */ @@ -1465,10 +1501,20 @@ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_DMA_128, }; -/* LBG with quirk for SPS Firmware exclusion */ +/* Cannon Lake with quirk for SPS 5.0 and newer Firmware exclusion */ static const struct mei_cfg mei_me_pch12_sps_cfg = { MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, + MEI_CFG_FW_SPS, +}; + +/* Cannon Lake with quirk for SPS 5.0 and newer Firmware exclusion + * w/o DMA support + */ +static const struct mei_cfg mei_me_pch12_nodma_sps_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_FW_SPS, }; @@ -1480,6 +1526,15 @@ static const struct mei_cfg mei_me_pch15_cfg = { MEI_CFG_TRC, }; +/* Tiger Lake with quirk for SPS 5.0 and newer Firmware exclusion */ +static const struct mei_cfg mei_me_pch15_sps_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, + MEI_CFG_TRC, + MEI_CFG_FW_SPS, +}; + /* * mei_cfg_list - A list of platform platform specific configurations. * Note: has to be synchronized with enum mei_cfg_idx. @@ -1492,10 +1547,13 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg, [MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg, [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, - [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, + [MEI_ME_PCH8_SPS_4_CFG] = &mei_me_pch8_sps_4_cfg, [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, + [MEI_ME_PCH12_SPS_4_CFG] = &mei_me_pch12_sps_4_cfg, [MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg, + [MEI_ME_PCH12_SPS_NODMA_CFG] = &mei_me_pch12_nodma_sps_cfg, [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, + [MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index b6b94e211464..6a8973649c49 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2020, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -76,14 +76,20 @@ struct mei_me_hw { * with quirk for Node Manager exclusion. * @MEI_ME_PCH8_CFG: Platform Controller Hub Gen8 and newer * client platforms. - * @MEI_ME_PCH8_SPS_CFG: Platform Controller Hub Gen8 and newer + * @MEI_ME_PCH8_SPS_4_CFG: Platform Controller Hub Gen8 and newer * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer - * @MEI_ME_PCH12_SPS_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH12_SPS_4_CFG:Platform Controller Hub Gen12 up to 4.0 + * servers platforms with quirk for + * SPS firmware exclusion. + * @MEI_ME_PCH12_SPS_CFG: Platform Controller Hub Gen12 5.0 and newer * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH15_CFG: Platform Controller Hub Gen15 and newer + * @MEI_ME_PCH15_SPS_CFG: Platform Controller Hub Gen15 and newer + * servers platforms with quirk for + * SPS firmware exclusion. * @MEI_ME_NUM_CFG: Upper Sentinel. */ enum mei_cfg_idx { @@ -94,10 +100,13 @@ enum mei_cfg_idx { MEI_ME_PCH7_CFG, MEI_ME_PCH_CPT_PBG_CFG, MEI_ME_PCH8_CFG, - MEI_ME_PCH8_SPS_CFG, + MEI_ME_PCH8_SPS_4_CFG, MEI_ME_PCH12_CFG, + MEI_ME_PCH12_SPS_4_CFG, MEI_ME_PCH12_SPS_CFG, + MEI_ME_PCH12_SPS_NODMA_CFG, MEI_ME_PCH15_CFG, + MEI_ME_PCH15_SPS_CFG, MEI_ME_NUM_CFG, }; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 71f795b510ce..2a3f2fd5df50 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -59,18 +59,18 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_4_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_4_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_4_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_4_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_4_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_4_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, @@ -84,8 +84,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH12_SPS_NODMA_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, @@ -96,6 +96,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_H, MEI_ME_PCH15_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)}, |