diff options
| -rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c | 82 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 13 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/common/context.c | 8 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 18 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 20 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 18 | 
6 files changed, 106 insertions, 53 deletions
| diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 02d13f71b1df..7c38c4f7f9c0 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -10,12 +10,18 @@  #include <linux/mm.h>  #include <linux/slab.h> +#include <linux/genalloc.h>  static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)  { -	hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, -			(void *) (uintptr_t) cb->kernel_address, -			cb->bus_address); +	if (cb->is_internal) +		gen_pool_free(hdev->internal_cb_pool, +				cb->kernel_address, cb->size); +	else +		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, +				(void *) (uintptr_t) cb->kernel_address, +				cb->bus_address); +  	kfree(cb);  } @@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)  }  static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, -					int ctx_id) +					int ctx_id, bool internal_cb)  {  	struct hl_cb *cb; +	u32 cb_offset;  	void *p;  	/* @@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,  	if (!cb)  		return NULL; -	if (ctx_id == HL_KERNEL_ASID_ID) +	if (internal_cb) { +		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); +		if (!p) { +			kfree(cb); +			return NULL; +		} + +		cb_offset = p - hdev->internal_cb_pool_virt_addr; +		cb->is_internal = true; +		cb->bus_address =  hdev->internal_cb_va_base + cb_offset; +	} else if (ctx_id == HL_KERNEL_ASID_ID) {  		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,  						&cb->bus_address, GFP_ATOMIC); -	else +	} else {  		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,  						&cb->bus_address,  						GFP_USER | __GFP_ZERO); +	} +  	if (!p) {  		dev_err(hdev->dev,  			"failed to allocate %d of dma memory for CB\n", @@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,  }  int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, -			u32 cb_size, u64 *handle, int ctx_id) +			u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)  {  	struct hl_cb *cb;  	bool alloc_new_cb = true; @@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,  		goto out_err;  	} -	/* Minimum allocation must be PAGE SIZE */ -	if (cb_size < PAGE_SIZE) -		cb_size = PAGE_SIZE; - -	if (ctx_id == HL_KERNEL_ASID_ID && -			cb_size <= hdev->asic_prop.cb_pool_cb_size) { - -		spin_lock(&hdev->cb_pool_lock); -		if (!list_empty(&hdev->cb_pool)) { -			cb = list_first_entry(&hdev->cb_pool, typeof(*cb), -					pool_list); -			list_del(&cb->pool_list); -			spin_unlock(&hdev->cb_pool_lock); -			alloc_new_cb = false; -		} else { -			spin_unlock(&hdev->cb_pool_lock); -			dev_dbg(hdev->dev, "CB pool is empty\n"); +	if (!internal_cb) { +		/* Minimum allocation must be PAGE SIZE */ +		if (cb_size < PAGE_SIZE) +			cb_size = PAGE_SIZE; + +		if (ctx_id == HL_KERNEL_ASID_ID && +				cb_size <= hdev->asic_prop.cb_pool_cb_size) { + +			spin_lock(&hdev->cb_pool_lock); +			if (!list_empty(&hdev->cb_pool)) { +				cb = list_first_entry(&hdev->cb_pool, +						typeof(*cb), pool_list); +				list_del(&cb->pool_list); +				spin_unlock(&hdev->cb_pool_lock); +				alloc_new_cb = false; +			} else { +				spin_unlock(&hdev->cb_pool_lock); +				dev_dbg(hdev->dev, "CB pool is empty\n"); +			}  		}  	}  	if (alloc_new_cb) { -		cb = hl_cb_alloc(hdev, cb_size, ctx_id); +		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);  		if (!cb) {  			rc = -ENOMEM;  			goto out_err; @@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)  			rc = -EINVAL;  		} else {  			rc = hl_cb_create(hdev, &hpriv->cb_mgr, -						args->in.cb_size, &handle, -						hpriv->ctx->asid); +					args->in.cb_size, &handle, +					hpriv->ctx->asid, false);  		}  		memset(args, 0, sizeof(*args)); @@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)  	idr_destroy(&mgr->cb_handles);  } -struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size) +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, +					bool internal_cb)  {  	u64 cb_handle;  	struct hl_cb *cb;  	int rc;  	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, -			HL_KERNEL_ASID_ID); +			HL_KERNEL_ASID_ID, internal_cb);  	if (rc) {  		dev_err(hdev->dev,  			"Failed to allocate CB for the kernel driver %d\n", rc); @@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)  	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {  		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, -				HL_KERNEL_ASID_ID); +				HL_KERNEL_ASID_ID, false);  		if (cb) {  			cb->is_pool = true;  			list_add(&cb->pool_list, &hdev->cb_pool); diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index c605be89f764..e096532c0e48 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -919,7 +919,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,  		goto put_cs;  	} -	cb = hl_cb_kernel_create(hdev, PAGE_SIZE); +	if (cs->type == CS_TYPE_WAIT) +		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); +	else +		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); + +	cb = hl_cb_kernel_create(hdev, cb_size, +				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);  	if (!cb) {  		ctx->cs_counters.out_of_mem_drop_cnt++;  		kfree(job); @@ -927,11 +933,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,  		goto put_cs;  	} -	if (cs->type == CS_TYPE_WAIT) -		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); -	else -		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); -  	job->id = 0;  	job->cs = cs;  	job->user_cb = cb; diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 1e3e5b19ecd9..b75a20364fad 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -153,10 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)  			rc = -ENOMEM;  			goto mem_ctx_err;  		} + +		rc = hdev->asic_funcs->ctx_init(ctx); +		if (rc) { +			dev_err(hdev->dev, "ctx_init failed\n"); +			goto ctx_init_err; +		}  	}  	return 0; +ctx_init_err: +	hl_vm_ctx_fini(ctx);  mem_ctx_err:  	if (ctx->asid != HL_KERNEL_ASID_ID)  		hl_asid_free(hdev, ctx->asid); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 82532f1f94cb..bf9abfa47b7a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -392,6 +392,7 @@ struct hl_cb_mgr {   * @ctx_id: holds the ID of the owner's context.   * @mmap: true if the CB is currently mmaped to user.   * @is_pool: true if CB was acquired from the pool, false otherwise. + * @is_internal: internaly allocated   */  struct hl_cb {  	struct kref		refcount; @@ -408,6 +409,7 @@ struct hl_cb {  	u32			ctx_id;  	u8			mmap;  	u8			is_pool; +	u8			is_internal;  }; @@ -643,6 +645,7 @@ enum div_select_defs {   * @rreg: Read a register. Needed for simulator support.   * @wreg: Write a register. Needed for simulator support.   * @halt_coresight: stop the ETF and ETR traces. + * @ctx_init: context dependent initialization.   * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz   * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.   * @read_device_fw_version: read the device's firmware versions that are @@ -745,6 +748,7 @@ struct hl_asic_funcs {  	u32 (*rreg)(struct hl_device *hdev, u32 reg);  	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);  	void (*halt_coresight)(struct hl_device *hdev); +	int (*ctx_init)(struct hl_ctx *ctx);  	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);  	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);  	void (*read_device_fw_version)(struct hl_device *hdev, @@ -1432,6 +1436,10 @@ struct hl_device_idle_busy_ts {   * @hl_debugfs: device's debugfs manager.   * @cb_pool: list of preallocated CBs.   * @cb_pool_lock: protects the CB pool. + * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. + * @internal_cb_pool_dma_addr: internal command buffer pool dma address. + * @internal_cb_pool: internal command buffer memory pool. + * @internal_cb_va_base: internal cb pool mmu virtual address base   * @fpriv_list: list of file private data structures. Each structure is created   *              when a user opens the device   * @fpriv_list_lock: protects the fpriv_list @@ -1531,6 +1539,11 @@ struct hl_device {  	struct list_head		cb_pool;  	spinlock_t			cb_pool_lock; +	void				*internal_cb_pool_virt_addr; +	dma_addr_t			internal_cb_pool_dma_addr; +	struct gen_pool			*internal_cb_pool; +	u64				internal_cb_va_base; +  	struct list_head		fpriv_list;  	struct mutex			fpriv_list_lock; @@ -1741,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);  void hl_hwmon_fini(struct hl_device *hdev);  int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size, -		u64 *handle, int ctx_id); +		u64 *handle, int ctx_id, bool internal_cb);  int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);  int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);  struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr, @@ -1749,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,  void hl_cb_put(struct hl_cb *cb);  void hl_cb_mgr_init(struct hl_cb_mgr *mgr);  void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); -struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size); +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, +					bool internal_cb);  int hl_cb_pool_init(struct hl_device *hdev);  int hl_cb_pool_fini(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 57b2b9392cb2..86cfaf73ad74 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -635,7 +635,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,  	u8 tpc_id;  	int rc; -	cb = hl_cb_kernel_create(hdev, PAGE_SIZE); +	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);  	if (!cb)  		return -EFAULT; @@ -4048,9 +4048,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,  	parser->patched_cb_size = parser->user_cb_size +  			sizeof(struct packet_msg_prot) * 2; -	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, -				parser->patched_cb_size, -				&patched_cb_handle, HL_KERNEL_ASID_ID); +	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, +			&patched_cb_handle, HL_KERNEL_ASID_ID, false);  	if (rc) {  		dev_err(hdev->dev, @@ -4122,9 +4121,8 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,  	if (rc)  		goto free_userptr; -	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, -				parser->patched_cb_size, -				&patched_cb_handle, HL_KERNEL_ASID_ID); +	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, +			&patched_cb_handle, HL_KERNEL_ASID_ID, false);  	if (rc) {  		dev_err(hdev->dev,  			"Failed to allocate patched CB for DMA CS %d\n", rc); @@ -4257,7 +4255,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,  	struct hl_cb *cb;  	int rc; -	cb = hl_cb_kernel_create(hdev, PAGE_SIZE); +	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);  	if (!cb)  		return -EFAULT; @@ -6229,6 +6227,11 @@ static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)  	return RREG32(mmHW_STATE);  } +int gaudi_ctx_init(struct hl_ctx *ctx) +{ +	return 0; +} +  static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)  {  	return gaudi_cq_assignment[cq_idx]; @@ -6532,6 +6535,7 @@ static const struct hl_asic_funcs gaudi_funcs = {  	.rreg = hl_rreg,  	.wreg = hl_wreg,  	.halt_coresight = gaudi_halt_coresight, +	.ctx_init = gaudi_ctx_init,  	.get_clk_rate = gaudi_get_clk_rate,  	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,  	.read_device_fw_version = gaudi_read_device_fw_version, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 2b0937d950c1..4473ded313d6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3771,9 +3771,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,  	parser->patched_cb_size = parser->user_cb_size +  			sizeof(struct packet_msg_prot) * 2; -	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, -				parser->patched_cb_size, -				&patched_cb_handle, HL_KERNEL_ASID_ID); +	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, +			&patched_cb_handle, HL_KERNEL_ASID_ID, false);  	if (rc) {  		dev_err(hdev->dev, @@ -3845,9 +3844,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,  	if (rc)  		goto free_userptr; -	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, -				parser->patched_cb_size, -				&patched_cb_handle, HL_KERNEL_ASID_ID); +	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, +			&patched_cb_handle, HL_KERNEL_ASID_ID, false);  	if (rc) {  		dev_err(hdev->dev,  			"Failed to allocate patched CB for DMA CS %d\n", rc); @@ -4693,7 +4691,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,  	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);  	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +  						sizeof(struct packet_msg_prot); -	cb = hl_cb_kernel_create(hdev, cb_size); +	cb = hl_cb_kernel_create(hdev, cb_size, false);  	if (!cb)  		return -ENOMEM; @@ -5223,6 +5221,11 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)  	return RREG32(mmHW_STATE);  } +int goya_ctx_init(struct hl_ctx *ctx) +{ +	return 0; +} +  u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)  {  	return cq_idx; @@ -5336,6 +5339,7 @@ static const struct hl_asic_funcs goya_funcs = {  	.rreg = hl_rreg,  	.wreg = hl_wreg,  	.halt_coresight = goya_halt_coresight, +	.ctx_init = goya_ctx_init,  	.get_clk_rate = goya_get_clk_rate,  	.get_queue_id_for_cq = goya_get_queue_id_for_cq,  	.read_device_fw_version = goya_read_device_fw_version, | 
