summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/context.c
diff options
context:
space:
mode:
authorOfir Bitton <obitton@habana.ai>2020-06-02 12:28:27 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-07-24 20:31:34 +0300
commitc16d45f42b64e91895f4bc1cf19febeb5e0c52b6 (patch)
tree0e248646613ecbb8fa69681dfc7d835c59dfcb6d /drivers/misc/habanalabs/context.c
parent0b168c8f1d21f87003fb28b4c87c32335d7fc94b (diff)
habanalabs: Use pending CS amount per ASIC
Training schemes requires much more concurrent command submissions than inference does. In addition, training command submissions can be completed in a non serialized manner. Hence, we add support in which each ASIC will be able to configure the amount of concurrent pending command submissions, rather than use a predefined amount. This change will enhance performance by allowing the user to add more concurrent work without waiting for the previous work to be completed. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs/context.c')
-rw-r--r--drivers/misc/habanalabs/context.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index ec92b3506b1f..1b96fefa4a65 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
* to this function unless the ref count is 0
*/
- for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+ for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
dma_fence_put(ctx->cs_pending[i]);
+ kfree(ctx->cs_pending);
+
if (ctx->asid != HL_KERNEL_ASID_ID) {
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
@@ -126,6 +128,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
+ ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+ sizeof(struct dma_fence *),
+ GFP_KERNEL);
+ if (!ctx->cs_pending)
+ return -ENOMEM;
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
@@ -170,6 +177,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
+ struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
struct dma_fence *fence;
spin_lock(&ctx->cs_lock);
@@ -179,13 +187,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
return ERR_PTR(-EINVAL);
}
- if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+ if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
spin_unlock(&ctx->cs_lock);
return NULL;
}
fence = dma_fence_get(
- ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+ ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
spin_unlock(&ctx->cs_lock);
return fence;