diff options
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 216 |
1 files changed, 141 insertions, 75 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58df..85030759b2af 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -6,10 +6,10 @@ */ #include "goyaP.h" -#include "include/hw_ip/mmu/mmu_general.h" -#include "include/hw_ip/mmu/mmu_v1_0.h" -#include "include/goya/asic_reg/goya_masks.h" -#include "include/goya/goya_reg_map.h" +#include "../include/hw_ip/mmu/mmu_general.h" +#include "../include/hw_ip/mmu/mmu_v1_0.h" +#include "../include/goya/asic_reg/goya_masks.h" +#include "../include/goya/goya_reg_map.h" #include <linux/pci.h> #include <linux/genalloc.h> @@ -88,6 +88,7 @@ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -337,11 +338,19 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev); static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); -void goya_get_fixed_properties(struct hl_device *hdev) +int goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; int i; + prop->max_queues = GOYA_QUEUE_ID_SIZE; + prop->hw_queues_props = kcalloc(prop->max_queues, + sizeof(struct hw_queue_properties), + GFP_KERNEL); + + if (!prop->hw_queues_props) + return -ENOMEM; + for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].driver_only = 0; @@ -361,9 +370,6 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->hw_queues_props[i].requires_kernel_cb = 0; } - for (; i < HL_MAX_QUEUES; i++) - prop->hw_queues_props[i].type = QUEUE_TYPE_NA; - prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->dram_base_address = DRAM_PHYS_BASE; @@ -426,6 +432,10 @@ void goya_get_fixed_properties(struct hl_device *hdev) strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); + + prop->max_pending_cs = GOYA_MAX_PENDING_CS; + + return 0; } /* @@ -456,6 +466,7 @@ static int goya_pci_bars_map(struct hl_device *hdev) static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; + struct hl_inbound_pci_region pci_region; u64 old_addr = addr; int rc; @@ -463,7 +474,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) return old_addr; /* Inbound Region 1 - Bar 4 - Point to DDR */ - rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr); + pci_region.mode = PCI_BAR_MATCH_MODE; + pci_region.bar = DDR_BAR_ID; + pci_region.addr = addr; + rc = hl_pci_set_inbound_region(hdev, 1, &pci_region); if (rc) return U64_MAX; @@ -485,8 +499,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) */ static int goya_init_iatu(struct hl_device *hdev) { - return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE, - HOST_PHYS_BASE, HOST_PHYS_SIZE); + struct hl_inbound_pci_region inbound_region; + struct hl_outbound_pci_region outbound_region; + int rc; + + /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */ + inbound_region.mode = PCI_BAR_MATCH_MODE; + inbound_region.bar = SRAM_CFG_BAR_ID; + inbound_region.addr = SRAM_BASE_ADDR; + rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); + if (rc) + goto done; + + /* Inbound Region 1 - Bar 4 - Point to DDR */ + inbound_region.mode = PCI_BAR_MATCH_MODE; + inbound_region.bar = DDR_BAR_ID; + inbound_region.addr = DRAM_PHYS_BASE; + rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); + if (rc) + goto done; + + hdev->asic_funcs->set_dma_mask_from_fw(hdev); + + /* Outbound Region 0 - Point to Host */ + outbound_region.addr = HOST_PHYS_BASE; + outbound_region.size = HOST_PHYS_SIZE; + rc = hl_pci_set_outbound_region(hdev, &outbound_region); + +done: + return rc; } /* @@ -507,7 +548,11 @@ static int goya_early_init(struct hl_device *hdev) u32 val; int rc; - goya_get_fixed_properties(hdev); + rc = goya_get_fixed_properties(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to get fixed properties\n"); + return rc; + } /* Check BAR sizes */ if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) { @@ -517,7 +562,8 @@ static int goya_early_init(struct hl_device *hdev) (unsigned long long) pci_resource_len(pdev, SRAM_CFG_BAR_ID), CFG_BAR_SIZE); - return -ENODEV; + rc = -ENODEV; + goto free_queue_props; } if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) { @@ -527,14 +573,15 @@ static int goya_early_init(struct hl_device *hdev) (unsigned long long) pci_resource_len(pdev, MSIX_BAR_ID), MSIX_BAR_SIZE); - return -ENODEV; + rc = -ENODEV; + goto free_queue_props; } prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); rc = hl_pci_init(hdev); if (rc) - return rc; + goto free_queue_props; if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); @@ -544,6 +591,10 @@ static int goya_early_init(struct hl_device *hdev) } return 0; + +free_queue_props: + kfree(hdev->asic_prop.hw_queues_props); + return rc; } /* @@ -556,6 +607,7 @@ static int goya_early_init(struct hl_device *hdev) */ static int goya_early_fini(struct hl_device *hdev) { + kfree(hdev->asic_prop.hw_queues_props); hl_pci_fini(hdev); return 0; @@ -592,11 +644,36 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure) static void goya_fetch_psoc_frequency(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 trace_freq = 0; + u32 pll_clk = 0; + u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); + u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); + u32 nr = RREG32(mmPSOC_PCI_PLL_NR); + u32 nf = RREG32(mmPSOC_PCI_PLL_NF); + u32 od = RREG32(mmPSOC_PCI_PLL_OD); + + if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { + if (div_sel == DIV_SEL_REF_CLK) + trace_freq = PLL_REF_CLK; + else + trace_freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + trace_freq = pll_clk; + else + trace_freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", div_sel); + } - prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR); - prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF); - prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD); - prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); + prop->psoc_timestamp_frequency = trace_freq; + prop->psoc_pci_pll_nr = nr; + prop->psoc_pci_pll_nf = nf; + prop->psoc_pci_pll_od = od; + prop->psoc_pci_pll_div_factor = div_fctr; } int goya_late_init(struct hl_device *hdev) @@ -2164,29 +2241,15 @@ static void goya_disable_timestamp(struct hl_device *hdev) static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) { - u32 wait_timeout_ms, cpu_timeout_ms; + u32 wait_timeout_ms; dev_info(hdev->dev, "Halting compute engines and disabling interrupts\n"); - if (hdev->pldm) { + if (hdev->pldm) wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; - cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; - } else { + else wait_timeout_ms = GOYA_RESET_WAIT_MSEC; - cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC; - } - - if (hard_reset) { - /* - * I don't know what is the state of the CPU so make sure it is - * stopped in any means necessary - */ - WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE); - WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, - GOYA_ASYNC_EVENT_ID_HALT_MACHINE); - msleep(cpu_timeout_ms); - } goya_stop_external_queues(hdev); goya_stop_internal_queues(hdev); @@ -2491,14 +2554,26 @@ disable_queues: static void goya_hw_fini(struct hl_device *hdev, bool hard_reset) { struct goya_device *goya = hdev->asic_specific; - u32 reset_timeout_ms, status; + u32 reset_timeout_ms, cpu_timeout_ms, status; - if (hdev->pldm) + if (hdev->pldm) { reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC; - else + cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; + } else { reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC; + cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC; + } if (hard_reset) { + /* I don't know what is the state of the CPU so make sure it is + * stopped in any means necessary + */ + WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE); + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, + GOYA_ASYNC_EVENT_ID_HALT_MACHINE); + + msleep(cpu_timeout_ms); + goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); goya_disable_clk_rlx(hdev); goya_set_pll_refclk(hdev); @@ -2830,6 +2905,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } + if (!timeout) + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -3697,9 +3775,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, parser->patched_cb_size = parser->user_cb_size + sizeof(struct packet_msg_prot) * 2; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, - parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, + &patched_cb_handle, HL_KERNEL_ASID_ID, false); if (rc) { dev_err(hdev->dev, @@ -3771,9 +3848,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, - parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, + &patched_cb_handle, HL_KERNEL_ASID_ID, false); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); @@ -3942,8 +4018,7 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= DRAM_PHYS_BASE) && - (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) { + } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); @@ -3999,8 +4074,7 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= DRAM_PHYS_BASE) && - (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) { + } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); @@ -4044,9 +4118,8 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= DRAM_PHYS_BASE) && - (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + } else if (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); @@ -4088,9 +4161,8 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= DRAM_PHYS_BASE) && - (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + } else if (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); @@ -4431,8 +4503,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4536,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -4623,7 +4695,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G); cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) + sizeof(struct packet_msg_prot); - cb = hl_cb_kernel_create(hdev, cb_size); + cb = hl_cb_kernel_create(hdev, cb_size, false); if (!cb) return -ENOMEM; @@ -5028,14 +5100,14 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static void goya_disable_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5153,19 +5225,14 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) return RREG32(mmHW_STATE); } -u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) -{ - return cq_idx; -} - -static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx) +static int goya_ctx_init(struct hl_ctx *ctx) { - + return 0; } -static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx) +u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) { - + return cq_idx; } static u32 goya_get_signal_cb_size(struct hl_device *hdev) @@ -5259,7 +5326,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_enable_clock_gating, + .set_clock_gating = goya_set_clock_gating, .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, @@ -5276,13 +5343,12 @@ static const struct hl_asic_funcs goya_funcs = { .rreg = hl_rreg, .wreg = hl_wreg, .halt_coresight = goya_halt_coresight, + .ctx_init = goya_ctx_init, .get_clk_rate = goya_get_clk_rate, .get_queue_id_for_cq = goya_get_queue_id_for_cq, .read_device_fw_version = goya_read_device_fw_version, .load_firmware_to_device = goya_load_firmware_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device, - .ext_queue_init = goya_ext_queue_init, - .ext_queue_reset = goya_ext_queue_reset, .get_signal_cb_size = goya_get_signal_cb_size, .get_wait_cb_size = goya_get_wait_cb_size, .gen_signal_cb = goya_gen_signal_cb, |