summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/debugfs.c90
-rw-r--r--drivers/misc/habanalabs/goya/goya.c17
-rw-r--r--drivers/misc/habanalabs/habanalabs.h114
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h1
-rw-r--r--drivers/misc/habanalabs/memory.c45
-rw-r--r--drivers/misc/habanalabs/mmu.c149
6 files changed, 268 insertions, 148 deletions
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 1e1fa619a225..1cf75010a379 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr)
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+ u64 virt_addr, u64 mask, u64 shift)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & HOP0_MASK) >> HOP0_SHIFT);
+ ((virt_addr & mask) >> shift);
}
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & HOP1_MASK) >> HOP1_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+ mmu_specs->hop0_shift);
}
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & HOP2_MASK) >> HOP2_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+ mmu_specs->hop1_shift);
}
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & HOP3_MASK) >> HOP3_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+ mmu_specs->hop2_shift);
}
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & HOP4_MASK) >> HOP4_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+ mmu_specs->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+ mmu_specs->hop4_shift);
}
static inline u64 get_next_hop_addr(u64 curr_pte)
@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
struct hl_ctx *ctx;
+ bool is_dram_addr;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->va_space_dram_start_address,
+ prop->va_space_dram_end_address);
+
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
mutex_lock(&ctx->mmu_lock);
/* the following lookup is copied from unmap() in mmu.c */
hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
hop1_addr = get_next_hop_addr(hop0_pte);
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
- hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
hop2_addr = get_next_hop_addr(hop1_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
- hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
hop3_addr = get_next_hop_addr(hop2_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
- hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) {
@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
- hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
@@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
struct hl_ctx *ctx = hdev->compute_ctx;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | FLAGS_MASK;
int rc = 0;
+ bool is_dram_addr;
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
}
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->va_space_dram_start_address,
+ prop->va_space_dram_end_address);
+
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
mutex_lock(&ctx->mmu_lock);
/* hop 0 */
hop_addr = get_hop0_addr(ctx);
- hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 1 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
- hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 2 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
- hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 3 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
- hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
if (!(hop_pte & LAST_MASK)) {
@@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
- hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+ virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
offset_mask = FLAGS_MASK;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3c22fb96a26f..3294a6a92f75 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
+ prop->dmmu.hop0_shift = HOP0_SHIFT;
+ prop->dmmu.hop1_shift = HOP1_SHIFT;
+ prop->dmmu.hop2_shift = HOP2_SHIFT;
+ prop->dmmu.hop3_shift = HOP3_SHIFT;
+ prop->dmmu.hop4_shift = HOP4_SHIFT;
+ prop->dmmu.hop0_mask = HOP0_MASK;
+ prop->dmmu.hop1_mask = HOP1_MASK;
+ prop->dmmu.hop2_mask = HOP2_MASK;
+ prop->dmmu.hop3_mask = HOP3_MASK;
+ prop->dmmu.hop4_mask = HOP4_MASK;
+ prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+
+ /* No difference between PMMU and DMMU except of page size */
+ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
+ prop->dmmu.page_size = PAGE_SIZE_2MB;
+ prop->pmmu.page_size = PAGE_SIZE_4KB;
+
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 36d05c32f7ec..00c949f4ccd1 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -131,12 +131,44 @@ enum hl_device_hw_state {
};
/**
+ * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @hop0_shift: shift of hop 0 mask.
+ * @hop1_shift: shift of hop 1 mask.
+ * @hop2_shift: shift of hop 2 mask.
+ * @hop3_shift: shift of hop 3 mask.
+ * @hop4_shift: shift of hop 4 mask.
+ * @hop0_mask: mask to get the PTE address in hop 0.
+ * @hop1_mask: mask to get the PTE address in hop 1.
+ * @hop2_mask: mask to get the PTE address in hop 2.
+ * @hop3_mask: mask to get the PTE address in hop 3.
+ * @hop4_mask: mask to get the PTE address in hop 4.
+ * @page_size: default page size used to allocate memory.
+ * @huge_page_size: page size used to allocate memory with huge pages.
+ */
+struct hl_mmu_properties {
+ u64 hop0_shift;
+ u64 hop1_shift;
+ u64 hop2_shift;
+ u64 hop3_shift;
+ u64 hop4_shift;
+ u64 hop0_mask;
+ u64 hop1_mask;
+ u64 hop2_mask;
+ u64 hop3_mask;
+ u64 hop4_mask;
+ u32 page_size;
+ u32 huge_page_size;
+};
+
+/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
* @armcp_info: received various information from ArmCP regarding the H/W, e.g.
* available sensors.
* @uboot_ver: F/W U-boot version.
* @preboot_ver: F/W Preboot version.
+ * @dmmu: DRAM MMU address translation properties.
+ * @pmmu: PCI (host) MMU address translation properties.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
@@ -173,53 +205,55 @@ enum hl_device_hw_state {
* @psoc_pci_pll_nf: PCI PLL NF value.
* @psoc_pci_pll_od: PCI PLL OD value.
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
- * @completion_queues_count: number of completion queues.
* @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool.
* @tpc_enabled_mask: which TPCs are enabled.
+ * @completion_queues_count: number of completion queues.
*/
struct asic_fixed_properties {
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
- struct armcp_info armcp_info;
- char uboot_ver[VERSION_MAX_LEN];
- char preboot_ver[VERSION_MAX_LEN];
- u64 sram_base_address;
- u64 sram_end_address;
- u64 sram_user_base_address;
- u64 dram_base_address;
- u64 dram_end_address;
- u64 dram_user_base_address;
- u64 dram_size;
- u64 dram_pci_bar_size;
- u64 max_power_default;
- u64 va_space_host_start_address;
- u64 va_space_host_end_address;
- u64 va_space_dram_start_address;
- u64 va_space_dram_end_address;
- u64 dram_size_for_default_page_mapping;
- u64 pcie_dbi_base_address;
- u64 pcie_aux_dbi_reg_addr;
- u64 mmu_pgt_addr;
- u64 mmu_dram_default_page_addr;
- u32 mmu_pgt_size;
- u32 mmu_pte_size;
- u32 mmu_hop_table_size;
- u32 mmu_hop0_tables_total_size;
- u32 dram_page_size;
- u32 cfg_size;
- u32 sram_size;
- u32 max_asid;
- u32 num_of_events;
- u32 psoc_pci_pll_nr;
- u32 psoc_pci_pll_nf;
- u32 psoc_pci_pll_od;
- u32 psoc_pci_pll_div_factor;
- u32 high_pll;
- u32 cb_pool_cb_cnt;
- u32 cb_pool_cb_size;
- u8 completion_queues_count;
- u8 tpc_enabled_mask;
+ struct armcp_info armcp_info;
+ char uboot_ver[VERSION_MAX_LEN];
+ char preboot_ver[VERSION_MAX_LEN];
+ struct hl_mmu_properties dmmu;
+ struct hl_mmu_properties pmmu;
+ u64 sram_base_address;
+ u64 sram_end_address;
+ u64 sram_user_base_address;
+ u64 dram_base_address;
+ u64 dram_end_address;
+ u64 dram_user_base_address;
+ u64 dram_size;
+ u64 dram_pci_bar_size;
+ u64 max_power_default;
+ u64 va_space_host_start_address;
+ u64 va_space_host_end_address;
+ u64 va_space_dram_start_address;
+ u64 va_space_dram_end_address;
+ u64 dram_size_for_default_page_mapping;
+ u64 pcie_dbi_base_address;
+ u64 pcie_aux_dbi_reg_addr;
+ u64 mmu_pgt_addr;
+ u64 mmu_dram_default_page_addr;
+ u32 mmu_pgt_size;
+ u32 mmu_pte_size;
+ u32 mmu_hop_table_size;
+ u32 mmu_hop0_tables_total_size;
+ u32 dram_page_size;
+ u32 cfg_size;
+ u32 sram_size;
+ u32 max_asid;
+ u32 num_of_events;
+ u32 psoc_pci_pll_nr;
+ u32 psoc_pci_pll_nf;
+ u32 psoc_pci_pll_od;
+ u32 psoc_pci_pll_div_factor;
+ u32 high_pll;
+ u32 cb_pool_cb_cnt;
+ u32 cb_pool_cb_size;
+ u8 tpc_enabled_mask;
+ u8 completion_queues_count;
};
/**
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
index 74a5502b8c4e..a6851a9d3f03 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -12,7 +12,6 @@
#define PAGE_SHIFT_2MB 21
#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
-#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
#define PAGE_PRESENT_MASK 0x0000000000001ull
#define SWAP_OUT_MASK 0x0000000000004ull
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 12db6609da27..cce6bdb6e655 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -13,7 +13,6 @@
#include <linux/slab.h>
#include <linux/genalloc.h>
-#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
#define HL_MMU_DEBUG 0
/*
@@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block
*/
static u64 get_va_block(struct hl_device *hdev,
- struct hl_va_range *va_range, u64 size, u64 hint_addr,
- bool is_userptr)
+ struct hl_va_range *va_range, u64 size, u64 hint_addr,
+ bool is_userptr)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, page_mask,
@@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev,
u32 page_size;
bool add_prev = false;
- if (is_userptr) {
+ if (is_userptr)
/*
* We cannot know if the user allocated memory with huge pages
* or not, hence we continue with the biggest possible
* granularity.
*/
- page_size = PAGE_SIZE_2MB;
- page_mask = PAGE_MASK_2MB;
- } else {
- page_size = hdev->asic_prop.dram_page_size;
- page_mask = ~((u64)page_size - 1);
- }
+ page_size = hdev->asic_prop.pmmu.huge_page_size;
+ else
+ page_size = hdev->asic_prop.dmmu.page_size;
+
+ page_mask = ~((u64)page_size - 1);
mutex_lock(&va_range->lock);
@@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev,
if (valid_size >= size &&
(!new_va_block || valid_size < res_valid_size)) {
-
new_va_block = va_block;
res_valid_start = valid_start;
res_valid_size = valid_size;
@@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
/*
* init_phys_pg_pack_from_userptr - initialize physical page pack from host
* memory
- * @asid: current context ASID
+ * @ctx: current context
* @userptr: userptr to initialize from
* @pphys_pg_pack: result pointer
*
@@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
* - Create a physical page pack from the physical pages related to the given
* virtual block
*/
-static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+ struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack)
{
+ struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg;
dma_addr_t dma_addr;
u64 page_mask, total_npages;
- u32 npages, page_size = PAGE_SIZE;
+ u32 npages, page_size = PAGE_SIZE,
+ huge_page_size = mmu_prop->huge_page_size;
bool first = true, is_huge_page_opt = true;
int rc, i, j;
+ u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
if (!phys_pg_pack)
@@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
phys_pg_pack->vm_type = userptr->vm_type;
phys_pg_pack->created_from_userptr = true;
- phys_pg_pack->asid = asid;
+ phys_pg_pack->asid = ctx->asid;
atomic_set(&phys_pg_pack->mapping_cnt, 1);
/* Only if all dma_addrs are aligned to 2MB and their
@@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
total_npages += npages;
- if ((npages % PGS_IN_2MB_PAGE) ||
- (dma_addr & (PAGE_SIZE_2MB - 1)))
+ if ((npages % pgs_in_huge_page) ||
+ (dma_addr & (huge_page_size - 1)))
is_huge_page_opt = false;
}
if (is_huge_page_opt) {
- page_size = PAGE_SIZE_2MB;
- total_npages /= PGS_IN_2MB_PAGE;
+ page_size = huge_page_size;
+ do_div(total_npages, pgs_in_huge_page);
}
page_mask = ~(((u64) page_size) - 1);
@@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
dma_addr += page_size;
if (is_huge_page_opt)
- npages -= PGS_IN_2MB_PAGE;
+ npages -= pgs_in_huge_page;
else
npages--;
}
@@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
}
- rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack);
if (rc) {
dev_err(hdev->dev,
@@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true;
userptr = hnode->ptr;
- rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack);
if (rc) {
dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 21b4e3281b3e..3a7f8ff19eb2 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -171,29 +171,44 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
((virt_addr & mask) >> shift);
}
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
+ mmu_prop->hop0_shift);
}
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
+ mmu_prop->hop1_shift);
}
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
+ mmu_prop->hop2_shift);
}
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
+ mmu_prop->hop3_shift);
}
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
+ mmu_prop->hop4_shift);
}
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
@@ -513,24 +528,23 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
mutex_destroy(&ctx->mmu_lock);
}
-static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0,
hop1_addr = 0, hop1_pte_addr = 0,
hop2_addr = 0, hop2_pte_addr = 0,
hop3_addr = 0, hop3_pte_addr = 0,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte;
- bool is_dram_addr, is_huge, clear_hop3 = true;
+ bool is_huge, clear_hop3 = true;
- is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
@@ -539,7 +553,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
- hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
@@ -548,7 +562,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
- hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
@@ -557,7 +571,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
- hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
@@ -575,7 +589,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
- hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
@@ -667,25 +682,36 @@ not_mapped:
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
{
struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
int i, rc;
+ bool is_dram_addr;
if (!hdev->mmu_enable)
return 0;
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->va_space_dram_start_address,
+ prop->va_space_dram_end_address);
+
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
/*
- * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
- * is bigger, we break it to sub-pages and unmap them separately.
+ * The H/W handles mapping of specific page sizes. Hence if the page
+ * size is bigger, we break it to sub-pages and unmap them separately.
*/
- if ((page_size % PAGE_SIZE_2MB) == 0) {
- real_page_size = PAGE_SIZE_2MB;
- } else if ((page_size % PAGE_SIZE_4KB) == 0) {
- real_page_size = PAGE_SIZE_4KB;
+ if ((page_size % mmu_prop->huge_page_size) == 0) {
+ real_page_size = mmu_prop->huge_page_size;
+ } else if ((page_size % mmu_prop->page_size) == 0) {
+ real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
- page_size);
+ "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
+ page_size,
+ mmu_prop->page_size >> 10,
+ mmu_prop->huge_page_size >> 20);
return -EFAULT;
}
@@ -694,7 +720,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) {
- rc = _hl_mmu_unmap(ctx, real_virt_addr);
+ rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc)
return rc;
@@ -705,10 +731,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
}
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
- u32 page_size)
+ u32 page_size, bool is_dram_addr)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0,
hop1_addr = 0, hop1_pte_addr = 0,
hop2_addr = 0, hop2_pte_addr = 0,
@@ -716,21 +743,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte = 0;
bool hop1_new = false, hop2_new = false, hop3_new = false,
- hop4_new = false, is_huge, is_dram_addr;
+ hop4_new = false, is_huge;
int rc = -ENOMEM;
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
/*
- * This mapping function can map a 4KB/2MB page. For 2MB page there are
- * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
- * pages only but user memory could have been allocated with one of the
- * two page sizes. Since this is a common code for all the three cases,
- * we need this hugs page check.
+ * This mapping function can map a page or a huge page. For huge page
+ * there are only 3 hops rather than 4. Currently the DRAM allocation
+ * uses huge pages only but user memory could have been allocated with
+ * one of the two page sizes. Since this is a common code for all the
+ * three cases, we need this hugs page check.
*/
- is_huge = page_size == PAGE_SIZE_2MB;
-
- is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
- prop->va_space_dram_start_address,
- prop->va_space_dram_end_address);
+ is_huge = page_size == mmu_prop->huge_page_size;
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
@@ -738,28 +763,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
}
hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
if (hop1_addr == ULLONG_MAX)
goto err;
- hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
if (hop2_addr == ULLONG_MAX)
goto err;
- hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
if (hop3_addr == ULLONG_MAX)
goto err;
- hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
if (!is_huge) {
@@ -767,7 +792,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (hop4_addr == ULLONG_MAX)
goto err;
- hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
}
@@ -890,25 +916,36 @@ err:
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
{
struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages;
int i, rc, mapped_cnt = 0;
+ bool is_dram_addr;
if (!hdev->mmu_enable)
return 0;
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->va_space_dram_start_address,
+ prop->va_space_dram_end_address);
+
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
/*
- * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
- * is bigger, we break it to sub-pages and map them separately.
+ * The H/W handles mapping of specific page sizes. Hence if the page
+ * size is bigger, we break it to sub-pages and map them separately.
*/
- if ((page_size % PAGE_SIZE_2MB) == 0) {
- real_page_size = PAGE_SIZE_2MB;
- } else if ((page_size % PAGE_SIZE_4KB) == 0) {
- real_page_size = PAGE_SIZE_4KB;
+ if ((page_size % mmu_prop->huge_page_size) == 0) {
+ real_page_size = mmu_prop->huge_page_size;
+ } else if ((page_size % mmu_prop->page_size) == 0) {
+ real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not 4KB nor 2MB aligned, can't map\n",
- page_size);
+ "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
+ page_size,
+ mmu_prop->page_size >> 10,
+ mmu_prop->huge_page_size >> 20);
return -EFAULT;
}
@@ -923,7 +960,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
- real_page_size);
+ real_page_size, is_dram_addr);
if (rc)
goto err;
@@ -937,7 +974,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
err:
real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) {
- if (_hl_mmu_unmap(ctx, real_virt_addr))
+ if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr);