summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/common/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/common/memory.c')
-rw-r--r--drivers/accel/habanalabs/common/memory.c308
1 files changed, 166 insertions, 142 deletions
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 4fc72a07d2f5..0b8689fe0b64 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
*p_userptr = userptr;
- rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+ rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto dma_map_err;
@@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
* physical pages
*
* This function does the following:
- * - Pin the physical pages related to the given virtual block.
* - Create a physical page pack from the physical pages related to the given
* virtual block.
*/
@@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
}
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
- u64 page_size, u64 exported_size,
+ u64 page_size, u64 exported_size, u64 offset,
struct device *dev, enum dma_data_direction dir)
{
- u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
- struct asic_fixed_properties *prop;
- int rc, i, j, nents, cur_page;
+ u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
+ left_size_in_dma_seg, device_address, bar_address, start_page;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
+ unsigned int nents, i;
struct sg_table *sgt;
+ bool next_sg_entry;
+ int rc;
- prop = &hdev->asic_prop;
-
- dma_max_seg_size = dma_get_max_seg_size(dev);
-
- /* We would like to align the max segment size to PAGE_SIZE, so the
- * SGL will contain aligned addresses that can be easily mapped to
- * an MMU
- */
- dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+ /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
+ dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
if (dma_max_seg_size < PAGE_SIZE) {
dev_err_ratelimited(hdev->dev,
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
if (!sgt)
return ERR_PTR(-ENOMEM);
- /* remove export size restrictions in case not explicitly defined */
- cur_size_to_export = exported_size ? exported_size : (npages * page_size);
-
- /* If the size of each page is larger than the dma max segment size,
- * then we can't combine pages and the number of entries in the SGL
- * will just be the
- * <number of pages> * <chunks of max segment size in each page>
- */
- if (page_size > dma_max_seg_size) {
- /* we should limit number of pages according to the exported size */
- cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
- nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
- } else {
- cur_npages = npages;
-
- /* Get number of non-contiguous chunks */
- for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
- if (pages[i - 1] + page_size != pages[i] ||
- chunk_size + page_size > dma_max_seg_size) {
- nents++;
- chunk_size = page_size;
- continue;
- }
+ /* Use the offset to move to the actual first page that is exported */
+ for (start_page = 0 ; start_page < npages ; ++start_page) {
+ if (offset < page_size)
+ break;
- chunk_size += page_size;
- }
+ /* The offset value was validated so there can't be an underflow */
+ offset -= page_size;
}
- rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
- if (rc)
- goto error_free;
+ /* Calculate the required number of entries for the SG table */
+ curr_page = start_page;
+ nents = 1;
+ left_size_to_export = exported_size;
+ left_size_in_page = page_size - offset;
+ left_size_in_dma_seg = dma_max_seg_size;
+ next_sg_entry = false;
- cur_page = 0;
+ while (true) {
+ size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+ left_size_to_export -= size;
+ left_size_in_page -= size;
+ left_size_in_dma_seg -= size;
- if (page_size > dma_max_seg_size) {
- u64 size_left, cur_device_address = 0;
+ if (!left_size_to_export)
+ break;
- size_left = page_size;
+ if (!left_size_in_page) {
+ /* left_size_to_export is not zero so there must be another page */
+ if (pages[curr_page] + page_size != pages[curr_page + 1])
+ next_sg_entry = true;
- /* Need to split each page into the number of chunks of
- * dma_max_seg_size
- */
- for_each_sgtable_dma_sg(sgt, sg, i) {
- if (size_left == page_size)
- cur_device_address =
- pages[cur_page] - prop->dram_base_address;
- else
- cur_device_address += dma_max_seg_size;
+ ++curr_page;
+ left_size_in_page = page_size;
+ }
- /* make sure not to export over exported size */
- chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
+ if (!left_size_in_dma_seg) {
+ next_sg_entry = true;
+ left_size_in_dma_seg = dma_max_seg_size;
+ }
- bar_address = hdev->dram_pci_bar_start + cur_device_address;
+ if (next_sg_entry) {
+ ++nents;
+ next_sg_entry = false;
+ }
+ }
- rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
- if (rc)
- goto error_unmap;
+ rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
+ if (rc)
+ goto err_free_sgt;
- cur_size_to_export -= chunk_size;
+ /* Prepare the SG table entries */
+ curr_page = start_page;
+ device_address = pages[curr_page] + offset;
+ left_size_to_export = exported_size;
+ left_size_in_page = page_size - offset;
+ left_size_in_dma_seg = dma_max_seg_size;
+ next_sg_entry = false;
- if (size_left > dma_max_seg_size) {
- size_left -= dma_max_seg_size;
- } else {
- cur_page++;
- size_left = page_size;
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
+ chunk_size = 0;
+
+ for ( ; curr_page < npages ; ++curr_page) {
+ size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+ chunk_size += size;
+ left_size_to_export -= size;
+ left_size_in_page -= size;
+ left_size_in_dma_seg -= size;
+
+ if (!left_size_to_export)
+ break;
+
+ if (!left_size_in_page) {
+ /* left_size_to_export is not zero so there must be another page */
+ if (pages[curr_page] + page_size != pages[curr_page + 1]) {
+ device_address = pages[curr_page + 1];
+ next_sg_entry = true;
+ }
+
+ left_size_in_page = page_size;
}
- }
- } else {
- /* Merge pages and put them into the scatterlist */
- for_each_sgtable_dma_sg(sgt, sg, i) {
- chunk_size = page_size;
- for (j = cur_page + 1 ; j < cur_npages ; j++) {
- if (pages[j - 1] + page_size != pages[j] ||
- chunk_size + page_size > dma_max_seg_size)
- break;
-
- chunk_size += page_size;
+
+ if (!left_size_in_dma_seg) {
+ /*
+ * Skip setting a new device address if already moving to a page
+ * which is not contiguous with the current page.
+ */
+ if (!next_sg_entry) {
+ device_address += chunk_size;
+ next_sg_entry = true;
+ }
+
+ left_size_in_dma_seg = dma_max_seg_size;
}
- bar_address = hdev->dram_pci_bar_start +
- (pages[cur_page] - prop->dram_base_address);
+ if (next_sg_entry) {
+ next_sg_entry = false;
+ break;
+ }
+ }
- /* make sure not to export over exported size */
- chunk_size = min(chunk_size, cur_size_to_export);
- rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
- if (rc)
- goto error_unmap;
+ rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+ if (rc)
+ goto err_unmap;
+ }
- cur_size_to_export -= chunk_size;
- cur_page = j;
- }
+ /* There should be nothing left to export exactly after looping over all SG elements */
+ if (left_size_to_export) {
+ dev_err(hdev->dev,
+ "left size to export %#llx after initializing %u SG elements\n",
+ left_size_to_export, sgt->nents);
+ rc = -ENOMEM;
+ goto err_unmap;
}
- /* Because we are not going to include a CPU list we want to have some
- * chance that other users will detect this by setting the orig_nents
- * to 0 and using only nents (length of DMA list) when going over the
- * sgl
+ /*
+ * Because we are not going to include a CPU list, we want to have some chance that other
+ * users will detect this when going over SG table, by setting the orig_nents to 0 and using
+ * only nents (length of DMA list).
*/
sgt->orig_nents = 0;
+ dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
+ nents, dev_name(dev));
+ for_each_sgtable_dma_sg(sgt, sg, i)
+ dev_dbg(hdev->dev,
+ "SG entry %d: address %#llx, length %#x\n",
+ i, sg_dma_address(sg), sg_dma_len(sg));
+
return sgt;
-error_unmap:
+err_unmap:
for_each_sgtable_dma_sg(sgt, sg, i) {
if (!sg_dma_len(sg))
continue;
- dma_unmap_resource(dev, sg_dma_address(sg),
- sg_dma_len(sg), dir,
+ dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
DMA_ATTR_SKIP_CPU_SYNC);
}
sg_free_table(sgt);
-error_free:
+err_free_sgt:
kfree(sgt);
return ERR_PTR(rc);
}
@@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
{
+ u64 *pages, npages, page_size, exported_size, offset;
struct dma_buf *dma_buf = attachment->dmabuf;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_dmabuf_priv *hl_dmabuf;
@@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
hl_dmabuf = dma_buf->priv;
hdev = hl_dmabuf->ctx->hdev;
- phys_pg_pack = hl_dmabuf->phys_pg_pack;
if (!attachment->peer2peer) {
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
return ERR_PTR(-EPERM);
}
- if (phys_pg_pack)
- sgt = alloc_sgt_from_device_pages(hdev,
- phys_pg_pack->pages,
- phys_pg_pack->npages,
- phys_pg_pack->page_size,
- phys_pg_pack->exported_size,
- attachment->dev,
- dir);
- else
- sgt = alloc_sgt_from_device_pages(hdev,
- &hl_dmabuf->device_address,
- 1,
- hl_dmabuf->dmabuf->size,
- 0,
- attachment->dev,
- dir);
+ exported_size = hl_dmabuf->dmabuf->size;
+ offset = hl_dmabuf->offset;
+ phys_pg_pack = hl_dmabuf->phys_pg_pack;
+ if (phys_pg_pack) {
+ pages = phys_pg_pack->pages;
+ npages = phys_pg_pack->npages;
+ page_size = phys_pg_pack->page_size;
+ } else {
+ pages = &hl_dmabuf->device_phys_addr;
+ npages = 1;
+ page_size = hl_dmabuf->dmabuf->size;
+ }
+
+ sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
+ attachment->dev, dir);
if (IS_ERR(sgt))
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
@@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
hl_ctx_put(ctx);
/* Paired with get_file() in export_dmabuf() */
- fput(ctx->hpriv->filp);
+ fput(ctx->hpriv->file_priv->filp);
kfree(hl_dmabuf);
}
@@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
* released first and only then the compute device.
* Paired with fput() in hl_release_dmabuf().
*/
- get_file(ctx->hpriv->filp);
+ get_file(ctx->hpriv->file_priv->filp);
*dmabuf_fd = fd;
@@ -1876,22 +1898,29 @@ err_dma_buf_put:
return rc;
}
-static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
+static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
{
- if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+ if (!PAGE_ALIGNED(addr)) {
dev_dbg(hdev->dev,
- "exported device memory address 0x%llx should be aligned to 0x%lx\n",
- device_addr, PAGE_SIZE);
+ "exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
+ addr, PAGE_SIZE);
return -EINVAL;
}
- if (size < PAGE_SIZE) {
+ if (!size || !PAGE_ALIGNED(size)) {
dev_dbg(hdev->dev,
- "exported device memory size %llu should be equal to or greater than %lu\n",
+ "exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
size, PAGE_SIZE);
return -EINVAL;
}
+ if (!PAGE_ALIGNED(offset)) {
+ dev_dbg(hdev->dev,
+ "exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
+ offset, PAGE_SIZE);
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
u64 bar_address;
int rc;
- rc = validate_export_params_common(hdev, device_addr, size);
+ rc = validate_export_params_common(hdev, device_addr, size, 0);
if (rc)
return rc;
if (device_addr < prop->dram_user_base_address ||
- (device_addr + size) > prop->dram_end_address ||
- (device_addr + size) < device_addr) {
+ (device_addr + size) > prop->dram_end_address ||
+ (device_addr + size) < device_addr) {
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
device_addr, size);
@@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
u64 bar_address;
int i, rc;
- rc = validate_export_params_common(hdev, device_addr, size);
+ rc = validate_export_params_common(hdev, device_addr, size, offset);
if (rc)
return rc;
if ((offset + size) > phys_pg_pack->total_size) {
dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
- offset, size, phys_pg_pack->total_size);
+ offset, size, phys_pg_pack->total_size);
return -EINVAL;
}
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-
bar_address = hdev->dram_pci_bar_start +
- (phys_pg_pack->pages[i] - prop->dram_base_address);
+ (phys_pg_pack->pages[i] - prop->dram_base_address);
if ((bar_address + phys_pg_pack->page_size) >
(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
(bar_address + phys_pg_pack->page_size) < bar_address) {
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
- phys_pg_pack->pages[i],
- phys_pg_pack->page_size);
-
+ phys_pg_pack->pages[i], phys_pg_pack->page_size);
return -EINVAL;
}
}
@@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
struct asic_fixed_properties *prop;
struct hl_dmabuf_priv *hl_dmabuf;
struct hl_device *hdev;
- u64 export_addr;
int rc;
hdev = ctx->hdev;
@@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
return -EINVAL;
}
- export_addr = addr + offset;
-
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
if (!hl_dmabuf)
return -ENOMEM;
@@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
rc = PTR_ERR(phys_pg_pack);
goto dec_memhash_export_cnt;
}
- rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
+ rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
if (rc)
goto dec_memhash_export_cnt;
- phys_pg_pack->exported_size = size;
hl_dmabuf->phys_pg_pack = phys_pg_pack;
hl_dmabuf->memhash_hnode = hnode;
+ hl_dmabuf->offset = offset;
} else {
- rc = validate_export_params_no_mmu(hdev, export_addr, size);
+ rc = validate_export_params_no_mmu(hdev, addr, size);
if (rc)
goto err_free_dmabuf_wrapper;
- }
- hl_dmabuf->device_address = export_addr;
+ hl_dmabuf->device_phys_addr = addr;
+ }
rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
if (rc)
@@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
return 0;
}
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
+ struct hl_fpriv *hpriv = file_priv->driver_priv;
enum hl_device_status status;
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
@@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
- hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
+ hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);