summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
authorOfir Bitton <obitton@habana.ai>2021-01-28 16:30:25 +0200
committerOded Gabbay <ogabbay@kernel.org>2021-02-08 18:20:08 +0200
commit5dbd7b4de6ef84321cc1378eccdd92d4730c2e56 (patch)
treef356ec10cf6ca9f7a9ae591b44b243491a8c7a70 /drivers/misc/habanalabs/common
parent6c1e3f92f9f1dfc7f14b43fd432c8ec95b1a188f (diff)
habanalabs: improve communication protocol with cpucp
Current messaging communictaion protocol with cpucp can get out of sync due to coherency issues. In order to improve the protocol reliability, we modify the protocol to expect a different acknowledgment for every packet sent to cpucp. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c17
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h3
2 files changed, 18 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index ba6920f2b4ab..31b52a223f02 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result)
{
+ struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
- u32 tmp;
+ u32 tmp, expected_ack_val;
int rc = 0;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -115,14 +116,22 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
+ /* set fence to a non valid value */
+ pkt->fence = UINT_MAX;
+
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
if (rc) {
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
goto out;
}
+ if (hdev->asic_prop.fw_cpucp_ack_with_pi)
+ expected_ack_val = queue->pi;
+ else
+ expected_ack_val = CPUCP_PACKET_FENCE_VAL;
+
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
- (tmp == CPUCP_PACKET_FENCE_VAL), 1000,
+ (tmp == expected_ack_val), 1000,
timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -777,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
+ if (prop->fw_boot_cpu_security_map &
+ CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
+ prop->fw_cpucp_ack_with_pi = true;
+
dev_dbg(hdev->dev,
"Firmware boot CPU security status %#x\n",
prop->fw_boot_cpu_security_map);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 30f32f2edb8a..3c54010f7ab9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -419,6 +419,8 @@ struct hl_mmu_properties {
* from BOOT_DEV_STS0
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
+ * @fw_cpucp_ack_with_pi: true if cpucp is acking messages with the PQ PI
+ * instead of a magic number
* @num_functional_hbms: number of functional HBMs in each DCORE.
*/
struct asic_fixed_properties {
@@ -479,6 +481,7 @@ struct asic_fixed_properties {
u8 fw_security_status_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
+ u8 fw_cpucp_ack_with_pi;
u8 num_functional_hbms;
};