diff options
author | Mukul Joshi <mukul.joshi@amd.com> | 2022-05-09 22:25:25 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-06-09 09:42:50 -0400 |
commit | 7fe51e6fd2368b358441a1f6e0c94f4cd7e0720f (patch) | |
tree | f9300381fd879df2f4725bcfca85d2759b15aac1 /drivers/gpu | |
parent | f87f686482c6d2d4465245356854710b01f312c1 (diff) |
drm/amdkfd: Update context save handling for multi XCC setup (v2)
Context save handling needs to be updated for a multi XCC
setup:
- On a multi XCC setup, KFD needs to report context save base
address and size for each XCC in MQD.
- Thunk will allocate a large context save area covering all
XCCs which will be equal to: num_of_xccs in a partition * size
of context save area for 1 XCC. However, it will report only the
size of context save area for 1 XCC only in the ioctl call.
- Driver then setups the MQD correctly using the size passed from
Thunk and information about number of XCCs in a partition.
- Update get_wave_state function to return context save area
for all XCCs in the partition.
v2: update the get_wave_state function for mqd manager v11 (Morris)
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 62 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 |
6 files changed, 67 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 33a9394f9e58..01ea038cab6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2095,8 +2095,8 @@ static int get_wave_state(struct device_queue_manager *dqm, * and the queue should be protected against destruction by the process * lock. */ - return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, - ctl_stack_used_size, save_area_used_size); + return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, + ctl_stack, ctl_stack_used_size, save_area_used_size); } static void get_queue_checkpoint_info(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index eb18be74f559..23158db7da03 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -97,6 +97,7 @@ struct mqd_manager { uint32_t queue_id); int (*get_wave_state)(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index d54c6fdebbb6..772c09b5821b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -227,6 +227,7 @@ static uint32_t read_doorbell_id(void *mqd) } static int get_wave_state(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 338d639c1898..632344b95d90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -260,6 +260,7 @@ static uint32_t read_doorbell_id(void *mqd) } static int get_wave_state(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index b53cd8f9620b..5c9b3392758e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -295,6 +295,7 @@ static uint32_t read_doorbell_id(void *mqd) } static int get_wave_state(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) @@ -561,6 +562,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, int xcc = 0; struct kfd_mem_obj xcc_mqd_mem_obj; uint64_t xcc_gart_addr = 0; + uint64_t xcc_ctx_save_restore_area_address; uint64_t offset = mm->mqd_stride(mm, q); memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); @@ -570,6 +572,23 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q); m->cp_mqd_stride_size = offset; + + /* + * Update the CWSR address for each XCC if CWSR is enabled + * and CWSR area is allocated in thunk + */ + if (mm->dev->kfd->cwsr_enabled && + q->ctx_save_restore_area_address) { + xcc_ctx_save_restore_area_address = + q->ctx_save_restore_area_address + + (xcc * q->ctx_save_restore_area_size); + + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(xcc_ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(xcc_ctx_save_restore_area_address); + } + if (q->format == KFD_QUEUE_FORMAT_AQL) { m->compute_tg_chunk_size = 1; @@ -689,6 +708,46 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, return err; } +static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + int xcc, err = 0; + void *xcc_mqd; + void __user *xcc_ctl_stack; + uint64_t mqd_stride_size = mm->mqd_stride(mm, q); + u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0; + + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { + xcc_mqd = mqd + mqd_stride_size * xcc; + xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack + + q->ctx_save_restore_area_size * xcc); + + err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack, + &tmp_ctl_stack_used_size, + &tmp_save_area_used_size); + if (err) + break; + + /* + * Set the ctl_stack_used_size and save_area_used_size to + * ctl_stack_used_size and save_area_used_size of XCC 0 when + * passing the info the user-space. + * For multi XCC, user-space would have to look at the header + * info of each Control stack area to determine the control + * stack size and save area used. + */ + if (xcc == 0) { + *ctl_stack_used_size = tmp_ctl_stack_used_size; + *save_area_used_size = tmp_save_area_used_size; + } + } + + return err; +} + #if defined(CONFIG_DEBUG_FS) static int debugfs_show_mqd(struct seq_file *m, void *data) @@ -726,7 +785,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_mqd; mqd->free_mqd = kfd_free_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; - mqd->get_wave_state = get_wave_state; mqd->get_checkpoint_info = get_checkpoint_info; mqd->checkpoint_mqd = checkpoint_mqd; mqd->restore_mqd = restore_mqd; @@ -740,11 +798,13 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; mqd->destroy_mqd = destroy_mqd_v9_4_3; + mqd->get_wave_state = get_wave_state_v9_4_3; } else { mqd->init_mqd = init_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->get_wave_state = get_wave_state; } break; case KFD_MQD_TYPE_HIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index ebf963f42b51..fe69492b1bb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -261,6 +261,7 @@ static void update_mqd_tonga(struct mqd_manager *mm, void *mqd, } static int get_wave_state(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) |