summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/goya/goya.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c251
1 files changed, 138 insertions, 113 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e0ad2a269779..755e08cf2ecc 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -87,6 +87,7 @@
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
+#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
@@ -354,7 +355,7 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-int goya_get_fixed_properties(struct hl_device *hdev)
+int goya_set_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
@@ -460,8 +461,10 @@ int goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
- prop->fw_security_status_valid = false;
+ prop->fw_cpu_boot_dev_sts0_valid = false;
+ prop->fw_cpu_boot_dev_sts1_valid = false;
prop->hard_reset_done_by_fw = false;
+ prop->gic_interrupts_enable = true;
return 0;
}
@@ -531,10 +534,8 @@ static int goya_init_iatu(struct hl_device *hdev)
struct hl_outbound_pci_region outbound_region;
int rc;
- if (hdev->asic_prop.iatu_done_by_fw) {
- hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+ if (hdev->asic_prop.iatu_done_by_fw)
return 0;
- }
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
inbound_region.mode = PCI_BAR_MATCH_MODE;
@@ -586,7 +587,7 @@ static int goya_early_init(struct hl_device *hdev)
u32 fw_boot_status, val;
int rc;
- rc = goya_get_fixed_properties(hdev);
+ rc = goya_set_fixed_properties(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to get fixed properties\n");
return rc;
@@ -618,7 +619,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
- if (!hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
hdev->asic_prop.iatu_done_by_fw = true;
goto pci_init;
}
@@ -642,8 +643,10 @@ pci_init:
* version to determine whether we run with a security-enabled firmware
*/
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
- mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
- GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
+ mmCPU_BOOT_DEV_STS0,
+ mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
+ mmCPU_BOOT_ERR1,
+ GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true);
@@ -723,7 +726,15 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int rc;
- if (hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
+ rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
+ pll_freq_arr);
+
+ if (rc)
+ return;
+
+ freq = pll_freq_arr[1];
+ } else {
div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
nr = RREG32(mmPSOC_PCI_PLL_NR);
@@ -750,14 +761,6 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
div_sel);
freq = 0;
}
- } else {
- rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
- pll_freq_arr);
-
- if (rc)
- return;
-
- freq = pll_freq_arr[1];
}
prop->psoc_timestamp_frequency = freq;
@@ -849,6 +852,39 @@ void goya_late_fini(struct hl_device *hdev)
hdev->hl_chip_info->info = NULL;
}
+static void goya_set_pci_memory_regions(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct pci_mem_region *region;
+
+ /* CFG */
+ region = &hdev->pci_mem_region[PCI_REGION_CFG];
+ region->region_base = CFG_BASE;
+ region->region_size = CFG_SIZE;
+ region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
+ region->bar_size = CFG_BAR_SIZE;
+ region->bar_id = SRAM_CFG_BAR_ID;
+ region->used = 1;
+
+ /* SRAM */
+ region = &hdev->pci_mem_region[PCI_REGION_SRAM];
+ region->region_base = SRAM_BASE_ADDR;
+ region->region_size = SRAM_SIZE;
+ region->offset_in_bar = 0;
+ region->bar_size = CFG_BAR_SIZE;
+ region->bar_id = SRAM_CFG_BAR_ID;
+ region->used = 1;
+
+ /* DRAM */
+ region = &hdev->pci_mem_region[PCI_REGION_DRAM];
+ region->region_base = DRAM_PHYS_BASE;
+ region->region_size = hdev->asic_prop.dram_size;
+ region->offset_in_bar = 0;
+ region->bar_size = prop->dram_pci_bar_size;
+ region->bar_id = DDR_BAR_ID;
+ region->used = 1;
+}
+
/*
* goya_sw_init - Goya software initialization code
*
@@ -918,6 +954,9 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
hdev->supports_soft_reset = true;
+ hdev->allow_external_soft_reset = true;
+
+ goya_set_pci_memory_regions(hdev);
return 0;
@@ -1263,8 +1302,11 @@ int goya_init_cpu_queues(struct hl_device *hdev)
}
/* update FW application security bits */
- if (prop->fw_security_status_valid)
- prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+ if (prop->fw_cpu_boot_dev_sts0_valid)
+ prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
+
+ if (prop->fw_cpu_boot_dev_sts1_valid)
+ prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
goya->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
@@ -2402,47 +2444,67 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev)
return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
}
-/*
- * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
- * The version string should be located by that offset.
- */
-static int goya_read_device_fw_version(struct hl_device *hdev,
- enum hl_fw_component fwc)
-{
- const char *name;
- u32 ver_off;
- char *dest;
-
- switch (fwc) {
- case FW_COMP_UBOOT:
- ver_off = RREG32(mmUBOOT_VER_OFFSET);
- dest = hdev->asic_prop.uboot_ver;
- name = "U-Boot";
- break;
- case FW_COMP_PREBOOT:
- ver_off = RREG32(mmPREBOOT_VER_OFFSET);
- dest = hdev->asic_prop.preboot_ver;
- name = "Preboot";
- break;
- default:
- dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
- return -EIO;
- }
+static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
+{
+ struct dynamic_fw_load_mgr *dynamic_loader;
+ struct cpu_dyn_regs *dyn_regs;
- ver_off &= ~((u32)SRAM_BASE_ADDR);
+ dynamic_loader = &hdev->fw_loader.dynamic_loader;
- if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
- memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
- VERSION_MAX_LEN);
- } else {
- dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
- name, ver_off);
- strcpy(dest, "unavailable");
+ /*
+ * here we update initial values for few specific dynamic regs (as
+ * before reading the first descriptor from FW those value has to be
+ * hard-coded) in later stages of the protocol those values will be
+ * updated automatically by reading the FW descriptor so data there
+ * will always be up-to-date
+ */
+ dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
+ dyn_regs->kmd_msg_to_cpu =
+ cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
+ dyn_regs->cpu_cmd_status_to_host =
+ cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
- return -EIO;
- }
+ dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
+}
- return 0;
+static void goya_init_static_firmware_loader(struct hl_device *hdev)
+{
+ struct static_fw_load_mgr *static_loader;
+
+ static_loader = &hdev->fw_loader.static_loader;
+
+ static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
+ static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
+ static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
+ static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
+ static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
+ static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
+ static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
+ static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
+ static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
+ static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
+ static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
+ static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
+}
+
+static void goya_init_firmware_loader(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct fw_load_mgr *fw_loader = &hdev->fw_loader;
+
+ /* fill common fields */
+ fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
+ fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
+ fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
+ fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
+ fw_loader->skip_bmc = false;
+ fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
+ fw_loader->dram_bar_id = DDR_BAR_ID;
+
+ if (prop->dynamic_fw_load)
+ goya_init_dynamic_firmware_loader(hdev);
+ else
+ goya_init_static_firmware_loader(hdev);
}
static int goya_init_cpu(struct hl_device *hdev)
@@ -2466,12 +2528,7 @@ static int goya_init_cpu(struct hl_device *hdev)
return -EIO;
}
- rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
- mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
- mmCPU_CMD_STATUS_TO_HOST,
- mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
- false, GOYA_CPU_TIMEOUT_USEC,
- GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
+ rc = hl_fw_init_cpu(hdev);
if (rc)
return rc;
@@ -2881,7 +2938,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
*dma_handle = hdev->asic_prop.sram_base_address;
- base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
+ base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
switch (queue_id) {
case GOYA_QUEUE_ID_MME:
@@ -3270,6 +3327,7 @@ already_pinned:
return 0;
unpin_memory:
+ list_del(&userptr->job_node);
hl_unpin_host_memory(hdev, userptr);
free_userptr:
kfree(userptr);
@@ -5169,54 +5227,13 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
}
static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
- bool is_hard, u32 asid, u64 va, u64 size)
+ bool is_hard, u32 flags,
+ u32 asid, u64 va, u64 size)
{
- struct goya_device *goya = hdev->asic_specific;
- u32 status, timeout_usec, inv_data, pi;
- int rc;
-
- if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
- hdev->hard_reset_pending)
- return 0;
-
- /* no need in L1 only invalidation in Goya */
- if (!is_hard)
- return 0;
-
- if (hdev->pldm)
- timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
- else
- timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
-
- /*
- * TODO: currently invalidate entire L0 & L1 as in regular hard
- * invalidation. Need to apply invalidation of specific cache lines with
- * mask of ASID & VA & size.
- * Note that L1 with be flushed entirely in any case.
+ /* Treat as invalidate all because there is no range invalidation
+ * in Goya
*/
-
- /* L0 & L1 invalidation */
- inv_data = RREG32(mmSTLB_CACHE_INV);
- /* PI is 8 bit */
- pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
- WREG32(mmSTLB_CACHE_INV,
- (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
-
- rc = hl_poll_timeout(
- hdev,
- mmSTLB_INV_CONSUMER_INDEX,
- status,
- status == pi,
- 1000,
- timeout_usec);
-
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "MMU cache invalidation timeout\n");
- hl_device_reset(hdev, HL_RESET_HARD);
- }
-
- return rc;
+ return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
}
int goya_send_heartbeat(struct hl_device *hdev)
@@ -5239,7 +5256,9 @@ int goya_cpucp_info_get(struct hl_device *hdev)
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+ rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
+ mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
+ mmCPU_BOOT_ERR1);
if (rc)
return rc;
@@ -5385,6 +5404,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
return hl_fw_get_eeprom_data(hdev, data, max_size);
}
+static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
+{
+
+}
+
static int goya_ctx_init(struct hl_ctx *ctx)
{
if (ctx->asid != HL_KERNEL_ASID_ID)
@@ -5565,7 +5589,6 @@ static const struct hl_asic_funcs goya_funcs = {
.ctx_fini = goya_ctx_fini,
.get_clk_rate = goya_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
- .read_device_fw_version = goya_read_device_fw_version,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
.get_signal_cb_size = goya_get_signal_cb_size,
@@ -5584,7 +5607,9 @@ static const struct hl_asic_funcs goya_funcs = {
.get_hw_block_id = goya_get_hw_block_id,
.hw_block_mmap = goya_block_mmap,
.enable_events_from_fw = goya_enable_events_from_fw,
- .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx
+ .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
+ .init_firmware_loader = goya_init_firmware_loader,
+ .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
};
/*