habanalabs: Use pending CS amount per ASIC

Training schemes requires much more concurrent command submissions than inference does. In addition, training command submissions can be completed in a non serialized manner. Hence, we add support in which each ASIC will be able to configure the amount of concurrent pending command submissions, rather than use a predefined amount. This change will enhance performance by allowing the user to add more concurrent work without waiting for the previous work to be completed. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
author: Ofir Bitton <obitton@habana.ai> 2020-06-02 12:28:27 +0300
committer: Oded Gabbay <oded.gabbay@gmail.com> 2020-07-24 20:31:34 +0300
commit: c16d45f42b64e91895f4bc1cf19febeb5e0c52b6 (patch)
tree: 0e248646613ecbb8fa69681dfc7d835c59dfcb6d /drivers/misc/habanalabs/context.c
parent: 0b168c8f1d21f87003fb28b4c87c32335d7fc94b (diff)
1 files changed, 11 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index ec92b3506b1f..1b96fefa4a65 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	 * to this function unless the ref count is 0
 	 */
 
-	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
 		dma_fence_put(ctx->cs_pending[i]);
 
+	kfree(ctx->cs_pending);
+
 	if (ctx->asid != HL_KERNEL_ASID_ID) {
 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@@ -126,6 +128,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	spin_lock_init(&ctx->cs_lock);
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
+	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+				sizeof(struct dma_fence *),
+				GFP_KERNEL);
+	if (!ctx->cs_pending)
+		return -ENOMEM;
 
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
@@ -170,6 +177,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
 
 struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 {
+	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->cs_lock);
@@ -179,13 +187,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
 		spin_unlock(&ctx->cs_lock);
 		return NULL;
 	}
 
 	fence = dma_fence_get(
-			ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+			ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
 	spin_unlock(&ctx->cs_lock);
 
 	return fence;
author	Ofir Bitton <obitton@habana.ai>	2020-06-02 12:28:27 +0300
committer	Oded Gabbay <oded.gabbay@gmail.com>	2020-07-24 20:31:34 +0300
commit	c16d45f42b64e91895f4bc1cf19febeb5e0c52b6 (patch)
tree	0e248646613ecbb8fa69681dfc7d835c59dfcb6d /drivers/misc/habanalabs/context.c
parent	0b168c8f1d21f87003fb28b4c87c32335d7fc94b (diff)