aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaoyun Liu <Shaoyun.Liu@amd.com>2018-08-13 14:04:11 -0400
committerAlex Deucher <alexander.deucher@amd.com>2018-09-10 23:49:00 -0400
commitae9a25aea7f33573f56a422818bfead12aa8bfd6 (patch)
tree46c1668002c7676cc897ccab8727a156aea8ce73
parentaa64ca38ed8253e293b5ce24b40f31f39426e232 (diff)
drm/amdkfd: Generate xGMI direct iolink
Generate xGMI iolink for upper level usage Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c78
1 files changed, 68 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index ee4996029a86..130db4dc115f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -346,7 +346,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
346 struct list_head *device_list) 346 struct list_head *device_list)
347{ 347{
348 struct kfd_iolink_properties *props = NULL, *props2; 348 struct kfd_iolink_properties *props = NULL, *props2;
349 struct kfd_topology_device *dev, *cpu_dev; 349 struct kfd_topology_device *dev, *to_dev;
350 uint32_t id_from; 350 uint32_t id_from;
351 uint32_t id_to; 351 uint32_t id_to;
352 352
@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
369 369
370 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 370 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
371 props->weight = 20; 371 props->weight = 20;
372 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
373 props->weight = 15;
372 else 374 else
373 props->weight = node_distance(id_from, id_to); 375 props->weight = node_distance(id_from, id_to);
374 376
@@ -390,19 +392,22 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
390 * links are not built at that time. If a PCIe type is discovered, it 392 * links are not built at that time. If a PCIe type is discovered, it
391 * means a GPU is detected and we are adding GPU->CPU to the topology. 393 * means a GPU is detected and we are adding GPU->CPU to the topology.
392 * At this time, also add the corresponded CPU->GPU link. 394 * At this time, also add the corresponded CPU->GPU link.
395 * For xGMI, we only added the link with one direction in the crat
396 * table, add corresponded reversed direction link now.
393 */ 397 */
394 if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { 398 if (props && (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
395 cpu_dev = kfd_topology_device_by_proximity_domain(id_to); 399 props->iolink_type == CRAT_IOLINK_TYPE_XGMI)) {
396 if (!cpu_dev) 400 to_dev = kfd_topology_device_by_proximity_domain(id_to);
401 if (!to_dev)
397 return -ENODEV; 402 return -ENODEV;
398 /* same everything but the other direction */ 403 /* same everything but the other direction */
399 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); 404 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
400 props2->node_from = id_to; 405 props2->node_from = id_to;
401 props2->node_to = id_from; 406 props2->node_to = id_from;
402 props2->kobj = NULL; 407 props2->kobj = NULL;
403 cpu_dev->io_link_count++; 408 to_dev->io_link_count++;
404 cpu_dev->node_props.io_links_count++; 409 to_dev->node_props.io_links_count++;
405 list_add_tail(&props2->list, &cpu_dev->io_link_props); 410 list_add_tail(&props2->list, &to_dev->io_link_props);
406 } 411 }
407 412
408 return 0; 413 return 0;
@@ -1037,7 +1042,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
1037 * 1042 *
1038 * Return 0 if successful else return -ve value 1043 * Return 0 if successful else return -ve value
1039 */ 1044 */
1040static int kfd_fill_gpu_direct_io_link(int *avail_size, 1045static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1041 struct kfd_dev *kdev, 1046 struct kfd_dev *kdev,
1042 struct crat_subtype_iolink *sub_type_hdr, 1047 struct crat_subtype_iolink *sub_type_hdr,
1043 uint32_t proximity_domain) 1048 uint32_t proximity_domain)
@@ -1069,6 +1074,28 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
1069 return 0; 1074 return 0;
1070} 1075}
1071 1076
1077static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
1078 struct kfd_dev *kdev,
1079 struct crat_subtype_iolink *sub_type_hdr,
1080 uint32_t proximity_domain_from,
1081 uint32_t proximity_domain_to)
1082{
1083 *avail_size -= sizeof(struct crat_subtype_iolink);
1084 if (*avail_size < 0)
1085 return -ENOMEM;
1086
1087 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1088
1089 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1090 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1091 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1092
1093 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
1094 sub_type_hdr->proximity_domain_from = proximity_domain_from;
1095 sub_type_hdr->proximity_domain_to = proximity_domain_to;
1096 return 0;
1097}
1098
1072/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU 1099/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
1073 * 1100 *
1074 * @pcrat_image: Fill in VCRAT for GPU 1101 * @pcrat_image: Fill in VCRAT for GPU
@@ -1081,14 +1108,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
1081{ 1108{
1082 struct crat_header *crat_table = (struct crat_header *)pcrat_image; 1109 struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1083 struct crat_subtype_generic *sub_type_hdr; 1110 struct crat_subtype_generic *sub_type_hdr;
1111 struct kfd_local_mem_info local_mem_info;
1112 struct kfd_topology_device *peer_dev;
1084 struct crat_subtype_computeunit *cu; 1113 struct crat_subtype_computeunit *cu;
1085 struct kfd_cu_info cu_info; 1114 struct kfd_cu_info cu_info;
1086 int avail_size = *size; 1115 int avail_size = *size;
1087 uint32_t total_num_of_cu; 1116 uint32_t total_num_of_cu;
1088 int num_of_cache_entries = 0; 1117 int num_of_cache_entries = 0;
1089 int cache_mem_filled = 0; 1118 int cache_mem_filled = 0;
1119 uint32_t nid = 0;
1090 int ret = 0; 1120 int ret = 0;
1091 struct kfd_local_mem_info local_mem_info;
1092 1121
1093 if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) 1122 if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
1094 return -EINVAL; 1123 return -EINVAL;
@@ -1212,7 +1241,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
1212 */ 1241 */
1213 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1242 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1214 cache_mem_filled); 1243 cache_mem_filled);
1215 ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, 1244 ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
1216 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); 1245 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
1217 1246
1218 if (ret < 0) 1247 if (ret < 0)
@@ -1221,6 +1250,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
1221 crat_table->length += sub_type_hdr->length; 1250 crat_table->length += sub_type_hdr->length;
1222 crat_table->total_entries++; 1251 crat_table->total_entries++;
1223 1252
1253
1254 /* Fill in Subtype: IO_LINKS
1255 * Direct links from GPU to other GPUs through xGMI.
1256 * We will loop GPUs that already be processed (with lower value
1257 * of proximity_domain), add the link for the GPUs with same
1258 * hive id (from this GPU to other GPU) . The reversed iolink
1259 * (from other GPU to this GPU) will be added
1260 * in kfd_parse_subtype_iolink.
1261 */
1262 if (kdev->hive_id) {
1263 for (nid = 0; nid < proximity_domain; ++nid) {
1264 peer_dev = kfd_topology_device_by_proximity_domain(nid);
1265 if (!peer_dev->gpu)
1266 continue;
1267 if (peer_dev->gpu->hive_id != kdev->hive_id)
1268 continue;
1269 sub_type_hdr = (typeof(sub_type_hdr))(
1270 (char *)sub_type_hdr +
1271 sizeof(struct crat_subtype_iolink));
1272 ret = kfd_fill_gpu_xgmi_link_to_gpu(
1273 &avail_size, kdev,
1274 (struct crat_subtype_iolink *)sub_type_hdr,
1275 proximity_domain, nid);
1276 if (ret < 0)
1277 return ret;
1278 crat_table->length += sub_type_hdr->length;
1279 crat_table->total_entries++;
1280 }
1281 }
1224 *size = crat_table->length; 1282 *size = crat_table->length;
1225 pr_info("Virtual CRAT table created for GPU\n"); 1283 pr_info("Virtual CRAT table created for GPU\n");
1226 1284