diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 76 |
1 files changed, 57 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 7f2ae0d15d4a..4a7180b46b71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -28,6 +28,7 @@ #include "kfd_topology.h" #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_xgmi.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -1422,6 +1423,7 @@ err: static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + bool cache_line_size_missing, struct kfd_gpu_cache_info *pcache_info) { struct amdgpu_device *adev = kdev->adev; @@ -1434,7 +1436,10 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1446,6 +1451,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1456,6 +1464,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 64; i++; } /* GL1 Data Cache per SA */ @@ -1468,6 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + if (cache_line_size_missing) + pcache_info[i].cache_line_size = 128; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1478,6 +1491,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* L3 Data Cache per GPU */ @@ -1488,6 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 64; i++; } return i; @@ -1503,6 +1520,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_tcp_size_per_cu) { pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; pcache_info[i].cache_level = 1; + /* Cacheline size not available in IP discovery for gc943,gc944 */ + pcache_info[i].cache_line_size = 128; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1514,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, pcache_info[i].cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; pcache_info[i].cache_level = 1; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1524,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; pcache_info[i].cache_level = 1; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1534,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gfx.config.gc_tcc_size) { pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; pcache_info[i].cache_level = 2; + pcache_info[i].cache_line_size = 128; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1544,6 +1566,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, if (adev->gmc.mall_size) { pcache_info[i].cache_size = adev->gmc.mall_size / 1024; pcache_info[i].cache_level = 3; + pcache_info[i].cache_line_size = 64; pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); @@ -1556,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; + bool cache_line_size_missing = false; switch (kdev->adev->asic_type) { case CHIP_KAVERI: @@ -1614,6 +1638,8 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): num_of_cache_types = kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, *pcache_info); @@ -1677,8 +1703,19 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + /* Cacheline size not available in IP discovery for gc11. + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it + */ + cache_line_size_missing = true; + fallthrough; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, + cache_line_size_missing, + *pcache_info); break; default: *pcache_info = dummy_cache_info; @@ -2096,9 +2133,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : KFD_CRAT_INTRA_SOCKET_WEIGHT; - uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( - kdev->adev, NULL, true) : mem_bw; - /* * with host gpu xgmi link, host can access gpu memory whether * or not pcie bar type is large, so always create bidirectional @@ -2107,8 +2141,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->weight_xgmi = weight; - sub_type_hdr->minimum_bandwidth_mbs = bandwidth; - sub_type_hdr->maximum_bandwidth_mbs = bandwidth; + if (ext_cpu) { + amdgpu_xgmi_get_bandwidth(kdev->adev, NULL, + AMDGPU_XGMI_BW_MODE_PER_LINK, + AMDGPU_XGMI_BW_UNIT_MBYTES, + &sub_type_hdr->minimum_bandwidth_mbs, + &sub_type_hdr->maximum_bandwidth_mbs); + } else { + sub_type_hdr->minimum_bandwidth_mbs = mem_bw; + sub_type_hdr->maximum_bandwidth_mbs = mem_bw; + } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->minimum_bandwidth_mbs = @@ -2161,12 +2203,12 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, if (use_ta_info) { sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); - sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, - peer_kdev->adev, false); - sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; + amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev); + amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev, + AMDGPU_XGMI_BW_MODE_PER_PEER, + AMDGPU_XGMI_BW_UNIT_MBYTES, + &sub_type_hdr->minimum_bandwidth_mbs, + &sub_type_hdr->maximum_bandwidth_mbs); } else { bool is_single_hop = kdev->kfd == peer_kdev->kfd; int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT : @@ -2210,9 +2252,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * Modify length and total_entries as subunits are added. */ avail_size -= sizeof(struct crat_header); - if (avail_size < 0) - return -ENOMEM; - memset(crat_table, 0, sizeof(struct crat_header)); memcpy(&crat_table->signature, CRAT_SIGNATURE, @@ -2226,9 +2265,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * First fill in the sub type header and then sub type data */ avail_size -= sizeof(struct crat_subtype_computeunit); - if (avail_size < 0) - return -ENOMEM; - sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); @@ -2325,6 +2361,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, continue; if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) continue; + if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev)) + continue; sub_type_hdr = (typeof(sub_type_hdr))( (char *)sub_type_hdr + sizeof(struct crat_subtype_iolink)); |