diff options
author | Shaoyun Liu <Shaoyun.Liu@amd.com> | 2018-08-13 14:04:11 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-09-10 23:49:00 -0400 |
commit | ae9a25aea7f33573f56a422818bfead12aa8bfd6 (patch) | |
tree | 46c1668002c7676cc897ccab8727a156aea8ce73 | |
parent | aa64ca38ed8253e293b5ce24b40f31f39426e232 (diff) |
drm/amdkfd: Generate xGMI direct iolink
Generate xGMI iolink for upper level usage
Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 78 |
1 files changed, 68 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index ee4996029a86..130db4dc115f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c | |||
@@ -346,7 +346,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, | |||
346 | struct list_head *device_list) | 346 | struct list_head *device_list) |
347 | { | 347 | { |
348 | struct kfd_iolink_properties *props = NULL, *props2; | 348 | struct kfd_iolink_properties *props = NULL, *props2; |
349 | struct kfd_topology_device *dev, *cpu_dev; | 349 | struct kfd_topology_device *dev, *to_dev; |
350 | uint32_t id_from; | 350 | uint32_t id_from; |
351 | uint32_t id_to; | 351 | uint32_t id_to; |
352 | 352 | ||
@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, | |||
369 | 369 | ||
370 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) | 370 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
371 | props->weight = 20; | 371 | props->weight = 20; |
372 | else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) | ||
373 | props->weight = 15; | ||
372 | else | 374 | else |
373 | props->weight = node_distance(id_from, id_to); | 375 | props->weight = node_distance(id_from, id_to); |
374 | 376 | ||
@@ -390,19 +392,22 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, | |||
390 | * links are not built at that time. If a PCIe type is discovered, it | 392 | * links are not built at that time. If a PCIe type is discovered, it |
391 | * means a GPU is detected and we are adding GPU->CPU to the topology. | 393 | * means a GPU is detected and we are adding GPU->CPU to the topology. |
392 | * At this time, also add the corresponded CPU->GPU link. | 394 | * At this time, also add the corresponded CPU->GPU link. |
395 | * For xGMI, we only added the link with one direction in the crat | ||
396 | * table, add corresponded reversed direction link now. | ||
393 | */ | 397 | */ |
394 | if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { | 398 | if (props && (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || |
395 | cpu_dev = kfd_topology_device_by_proximity_domain(id_to); | 399 | props->iolink_type == CRAT_IOLINK_TYPE_XGMI)) { |
396 | if (!cpu_dev) | 400 | to_dev = kfd_topology_device_by_proximity_domain(id_to); |
401 | if (!to_dev) | ||
397 | return -ENODEV; | 402 | return -ENODEV; |
398 | /* same everything but the other direction */ | 403 | /* same everything but the other direction */ |
399 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); | 404 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); |
400 | props2->node_from = id_to; | 405 | props2->node_from = id_to; |
401 | props2->node_to = id_from; | 406 | props2->node_to = id_from; |
402 | props2->kobj = NULL; | 407 | props2->kobj = NULL; |
403 | cpu_dev->io_link_count++; | 408 | to_dev->io_link_count++; |
404 | cpu_dev->node_props.io_links_count++; | 409 | to_dev->node_props.io_links_count++; |
405 | list_add_tail(&props2->list, &cpu_dev->io_link_props); | 410 | list_add_tail(&props2->list, &to_dev->io_link_props); |
406 | } | 411 | } |
407 | 412 | ||
408 | return 0; | 413 | return 0; |
@@ -1037,7 +1042,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, | |||
1037 | * | 1042 | * |
1038 | * Return 0 if successful else return -ve value | 1043 | * Return 0 if successful else return -ve value |
1039 | */ | 1044 | */ |
1040 | static int kfd_fill_gpu_direct_io_link(int *avail_size, | 1045 | static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, |
1041 | struct kfd_dev *kdev, | 1046 | struct kfd_dev *kdev, |
1042 | struct crat_subtype_iolink *sub_type_hdr, | 1047 | struct crat_subtype_iolink *sub_type_hdr, |
1043 | uint32_t proximity_domain) | 1048 | uint32_t proximity_domain) |
@@ -1069,6 +1074,28 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, | |||
1069 | return 0; | 1074 | return 0; |
1070 | } | 1075 | } |
1071 | 1076 | ||
1077 | static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, | ||
1078 | struct kfd_dev *kdev, | ||
1079 | struct crat_subtype_iolink *sub_type_hdr, | ||
1080 | uint32_t proximity_domain_from, | ||
1081 | uint32_t proximity_domain_to) | ||
1082 | { | ||
1083 | *avail_size -= sizeof(struct crat_subtype_iolink); | ||
1084 | if (*avail_size < 0) | ||
1085 | return -ENOMEM; | ||
1086 | |||
1087 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | ||
1088 | |||
1089 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | ||
1090 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | ||
1091 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; | ||
1092 | |||
1093 | sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; | ||
1094 | sub_type_hdr->proximity_domain_from = proximity_domain_from; | ||
1095 | sub_type_hdr->proximity_domain_to = proximity_domain_to; | ||
1096 | return 0; | ||
1097 | } | ||
1098 | |||
1072 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU | 1099 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU |
1073 | * | 1100 | * |
1074 | * @pcrat_image: Fill in VCRAT for GPU | 1101 | * @pcrat_image: Fill in VCRAT for GPU |
@@ -1081,14 +1108,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
1081 | { | 1108 | { |
1082 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; | 1109 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; |
1083 | struct crat_subtype_generic *sub_type_hdr; | 1110 | struct crat_subtype_generic *sub_type_hdr; |
1111 | struct kfd_local_mem_info local_mem_info; | ||
1112 | struct kfd_topology_device *peer_dev; | ||
1084 | struct crat_subtype_computeunit *cu; | 1113 | struct crat_subtype_computeunit *cu; |
1085 | struct kfd_cu_info cu_info; | 1114 | struct kfd_cu_info cu_info; |
1086 | int avail_size = *size; | 1115 | int avail_size = *size; |
1087 | uint32_t total_num_of_cu; | 1116 | uint32_t total_num_of_cu; |
1088 | int num_of_cache_entries = 0; | 1117 | int num_of_cache_entries = 0; |
1089 | int cache_mem_filled = 0; | 1118 | int cache_mem_filled = 0; |
1119 | uint32_t nid = 0; | ||
1090 | int ret = 0; | 1120 | int ret = 0; |
1091 | struct kfd_local_mem_info local_mem_info; | ||
1092 | 1121 | ||
1093 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) | 1122 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) |
1094 | return -EINVAL; | 1123 | return -EINVAL; |
@@ -1212,7 +1241,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
1212 | */ | 1241 | */ |
1213 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | 1242 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
1214 | cache_mem_filled); | 1243 | cache_mem_filled); |
1215 | ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, | 1244 | ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, |
1216 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); | 1245 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); |
1217 | 1246 | ||
1218 | if (ret < 0) | 1247 | if (ret < 0) |
@@ -1221,6 +1250,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
1221 | crat_table->length += sub_type_hdr->length; | 1250 | crat_table->length += sub_type_hdr->length; |
1222 | crat_table->total_entries++; | 1251 | crat_table->total_entries++; |
1223 | 1252 | ||
1253 | |||
1254 | /* Fill in Subtype: IO_LINKS | ||
1255 | * Direct links from GPU to other GPUs through xGMI. | ||
1256 | * We will loop GPUs that already be processed (with lower value | ||
1257 | * of proximity_domain), add the link for the GPUs with same | ||
1258 | * hive id (from this GPU to other GPU) . The reversed iolink | ||
1259 | * (from other GPU to this GPU) will be added | ||
1260 | * in kfd_parse_subtype_iolink. | ||
1261 | */ | ||
1262 | if (kdev->hive_id) { | ||
1263 | for (nid = 0; nid < proximity_domain; ++nid) { | ||
1264 | peer_dev = kfd_topology_device_by_proximity_domain(nid); | ||
1265 | if (!peer_dev->gpu) | ||
1266 | continue; | ||
1267 | if (peer_dev->gpu->hive_id != kdev->hive_id) | ||
1268 | continue; | ||
1269 | sub_type_hdr = (typeof(sub_type_hdr))( | ||
1270 | (char *)sub_type_hdr + | ||
1271 | sizeof(struct crat_subtype_iolink)); | ||
1272 | ret = kfd_fill_gpu_xgmi_link_to_gpu( | ||
1273 | &avail_size, kdev, | ||
1274 | (struct crat_subtype_iolink *)sub_type_hdr, | ||
1275 | proximity_domain, nid); | ||
1276 | if (ret < 0) | ||
1277 | return ret; | ||
1278 | crat_table->length += sub_type_hdr->length; | ||
1279 | crat_table->total_entries++; | ||
1280 | } | ||
1281 | } | ||
1224 | *size = crat_table->length; | 1282 | *size = crat_table->length; |
1225 | pr_info("Virtual CRAT table created for GPU\n"); | 1283 | pr_info("Virtual CRAT table created for GPU\n"); |
1226 | 1284 | ||