summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c88
1 files changed, 58 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 475e47027354..3dcd8f8bc98e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -30,6 +30,9 @@
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
+#define DUMMY_VRAM_SIZE 31138512896
+
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
* The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
props->heap_type = heap_type;
props->flags = flags;
+ if (size_in_bytes == 0)
+ size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
props->size_in_bytes = size_in_bytes;
props->width = width;
@@ -1166,7 +1171,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
- props->weight = 15 * iolink->num_hops_xgmi;
+ props->weight = iolink->weight_xgmi;
else
props->weight = node_distance(id_from, id_to);
@@ -1405,7 +1410,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
}
-int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
@@ -1524,7 +1529,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
@@ -1858,7 +1863,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
}
static int kfd_fill_gpu_memory_affinity(int *avail_size,
- struct kfd_dev *kdev, uint8_t type, uint64_t size,
+ struct kfd_node *kdev, uint8_t type, uint64_t size,
struct crat_subtype_memory *sub_type_hdr,
uint32_t proximity_domain,
const struct kfd_local_mem_info *local_mem_info)
@@ -1887,7 +1892,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
}
#ifdef CONFIG_ACPI_NUMA
-static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
{
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
@@ -1972,6 +1977,9 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
}
#endif
+#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13
+#define KFD_CRAT_XGMI_WEIGHT 15
+
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
* to its NUMA node
* @avail_size: Available size in the memory
@@ -1982,7 +1990,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
* Return 0 if successful else return -ve value
*/
static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
- struct kfd_dev *kdev,
+ struct kfd_node *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
@@ -2002,7 +2010,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (kdev->adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu ||
+ (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
+ kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
+ AMDGPU_PKG_TYPE_APU)) {
+ bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
+ int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
+ KFD_CRAT_INTRA_SOCKET_WEIGHT;
+ uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
+ kdev->adev, NULL, true) : mem_bw;
+
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -2010,14 +2027,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
*/
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
- sub_type_hdr->num_hops_xgmi = 1;
- if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
- sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->adev, NULL, true);
- sub_type_hdr->maximum_bandwidth_mbs =
- sub_type_hdr->minimum_bandwidth_mbs;
- }
+ sub_type_hdr->weight_xgmi = weight;
+ sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
+ sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2029,7 +2041,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain;
#ifdef CONFIG_ACPI_NUMA
- if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
+ if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
+ num_possible_nodes() > 1)
kfd_find_numa_node_in_srat(kdev);
#endif
#ifdef CONFIG_NUMA
@@ -2044,12 +2057,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
}
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
- struct kfd_dev *kdev,
- struct kfd_dev *peer_kdev,
+ struct kfd_node *kdev,
+ struct kfd_node *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
{
+ bool use_ta_info = kdev->kfd->num_nodes == 1;
+
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -2064,12 +2079,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
- sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+
+ if (use_ta_info) {
+ sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
+ sub_type_hdr->maximum_bandwidth_mbs =
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
+ peer_kdev->adev, false);
+ sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+ } else {
+ bool is_single_hop = kdev->kfd == peer_kdev->kfd;
+ int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
+ (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
+ int mem_bw = 819200;
+
+ sub_type_hdr->weight_xgmi = weight;
+ sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ }
return 0;
}
@@ -2081,7 +2109,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
* [OUT] actual size of data filled in crat_image
*/
static int kfd_create_vcrat_image_gpu(void *pcrat_image,
- size_t *size, struct kfd_dev *kdev,
+ size_t *size, struct kfd_node *kdev,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
@@ -2153,7 +2181,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
- if (!kfd_iommu_check_device(kdev))
+ if (!kfd_iommu_check_device(kdev->kfd))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
crat_table->length += sub_type_hdr->length;
@@ -2216,12 +2244,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (from other GPU to this GPU) will be added
* in kfd_parse_subtype_iolink.
*/
- if (kdev->hive_id) {
+ if (kdev->kfd->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
- if (peer_dev->gpu->hive_id != kdev->hive_id)
+ if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
@@ -2255,12 +2283,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
* -- this option is not currently implemented.
* The assumption is that all AMD APUs will have CRAT
- * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
*
* Return 0 if successful else return -ve value
*/
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
- int flags, struct kfd_dev *kdev,
+ int flags, struct kfd_node *kdev,
uint32_t proximity_domain)
{
void *pcrat_image = NULL;