From d724170160f800fa8dfd3c0cdebb8b093570b504 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 28 May 2019 23:03:54 +0300 Subject: habanalabs: fix bug in checking huge page optimization This patch fix a bug in the mmu code that checks whether we can use huge page mappings for host pages. The code is supposed to enable huge page mappings only if ALL DMA addresses are aligned to 2MB AND the number of pages in each DMA chunk is a modulo of the number of pages in 2MB. However, the code ignored the first requirement for the first DMA chunk. This patch fix that issue by making sure the requirement of address alignment is validated against all DMA chunks. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index d67d24c13efd..693877e37fd8 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -675,11 +675,6 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, total_npages += npages; - if (first) { - first = false; - dma_addr &= PAGE_MASK_2MB; - } - if ((npages % PGS_IN_2MB_PAGE) || (dma_addr & (PAGE_SIZE_2MB - 1))) is_huge_page_opt = false; @@ -704,7 +699,6 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->total_size = total_npages * page_size; j = 0; - first = true; for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { npages = get_sg_info(sg, &dma_addr); -- cgit From e4c814aa7105a6a8320f64ecc48f13bc089eea88 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 3 Jun 2019 11:25:04 +0000 Subject: habanalabs: Fix virtual address access via debugfs for 2MB pages The debugfs interface for accessing DRAM virtual addresses currently uses the 12 LSBs of a virtual address as an offset. However, it should use the 20 LSBs in case the device MMU page size is 2MB instead of 4KB. This patch fixes the offset calculation to be based on the page size. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 0ce5621c1324..ba418aaa404c 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -500,6 +500,7 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, { struct hl_ctx *ctx = hdev->user_ctx; u64 hop_addr, hop_pte_addr, hop_pte; + u64 offset_mask = HOP4_MASK | OFFSET_MASK; int rc = 0; if (!ctx) { @@ -542,12 +543,14 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, goto not_mapped; hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + offset_mask = OFFSET_MASK; } if (!(hop_pte & PAGE_PRESENT_MASK)) goto not_mapped; - *phys_addr = (hop_pte & PTE_PHYS_ADDR_MASK) | (virt_addr & OFFSET_MASK); + *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); goto out; -- cgit From 1f65105ffc472624b45aff8bedb819c10a85944d Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 4 Jun 2019 11:35:30 +0000 Subject: habanalabs: Read upper bits of trace buffer from RWPHI The trace buffer address is 40 bits wide. The end of the buffer is set in the RWP register (lower 32 bits), and in the RWPHI register (upper 8 bits). Currently only the lower 32 bits are read, and this patch fixes it and concatenates the upper 8 bits to the output address. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya_coresight.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 39f62ce72660..d7ec7ad84cc6 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -425,8 +425,18 @@ static int goya_config_etr(struct hl_device *hdev, WREG32(base_reg + 0x28, 0); WREG32(base_reg + 0x304, 0); - if (params->output_size >= sizeof(u32)) - *(u32 *) params->output = RREG32(base_reg + 0x18); + if (params->output_size >= sizeof(u64)) { + u32 rwp, rwphi; + + /* + * The trace buffer address is 40 bits wide. The end of + * the buffer is set in the RWP register (lower 32 + * bits), and in the RWPHI register (upper 8 bits). + */ + rwp = RREG32(base_reg + 0x18); + rwphi = RREG32(base_reg + 0x3c) & 0xff; + *(u64 *) params->output = ((u64) rwphi << 32) | rwp; + } } return 0; -- cgit