aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-12-08 23:08:58 -0500
committerOded Gabbay <oded.gabbay@gmail.com>2017-12-08 23:08:58 -0500
commit520b8fb755ccfb07d8d743da5753cff1fcb74b9f (patch)
tree20d8d603e2560c04160c2449d8b8e90274d50c49 /drivers
parentbc0c75a36722be4537a9266940ddcd4f826234c4 (diff)
drm/amdkfd: Add topology support for CPUs
Currently, the KFD topology information is generated by parsing the CRAT (ACPI) table. However, at present CRAT table is available only for AMD APUs. To support CPUs on systems without a CRAT table, the KFD driver will create a Virtual CRAT (VCRAT) table and then the existing code will parse that table to generate topology. Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c321
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c190
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
5 files changed, 489 insertions, 35 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index e62493b39041..c8afbf8015f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -21,10 +21,9 @@
21 */ 21 */
22#include <linux/acpi.h> 22#include <linux/acpi.h>
23#include "kfd_crat.h" 23#include "kfd_crat.h"
24#include "kfd_priv.h"
24#include "kfd_topology.h" 25#include "kfd_topology.h"
25 26
26extern struct kfd_system_properties sys_props;
27
28static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 27static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
29 struct crat_subtype_computeunit *cu) 28 struct crat_subtype_computeunit *cu)
30{ 29{
@@ -281,7 +280,7 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
281int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 280int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
282 uint32_t proximity_domain) 281 uint32_t proximity_domain)
283{ 282{
284 struct kfd_topology_device *top_dev; 283 struct kfd_topology_device *top_dev = NULL;
285 struct crat_subtype_generic *sub_type_hdr; 284 struct crat_subtype_generic *sub_type_hdr;
286 uint16_t node_id; 285 uint16_t node_id;
287 int ret = 0; 286 int ret = 0;
@@ -314,10 +313,10 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
314 goto err; 313 goto err;
315 } 314 }
316 315
317 sys_props.platform_id = 316 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
318 (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; 317 memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
319 sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); 318 CRAT_OEMTABLEID_LENGTH);
320 sys_props.platform_rev = crat_table->revision; 319 top_dev->oem_revision = crat_table->oem_revision;
321 320
322 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 321 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
323 while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 322 while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
@@ -385,8 +384,312 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
385 return 0; 384 return 0;
386} 385}
387 386
388/* 387/* Memory required to create Virtual CRAT.
389 * kfd_destroy_crat_image 388 * Since there is no easy way to predict the amount of memory required, the
389 * following amount are allocated for CPU and GPU Virtual CRAT. This is
390 * expected to cover all known conditions. But to be safe additional check
391 * is put in the code to ensure we don't overwrite.
392 */
393#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE)
394#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE)
395
396/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
397 *
398 * @numa_node_id: CPU NUMA node id
399 * @avail_size: Available size in the memory
400 * @sub_type_hdr: Memory into which compute info will be filled in
401 *
402 * Return 0 if successful else return -ve value
403 */
404static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
405 int proximity_domain,
406 struct crat_subtype_computeunit *sub_type_hdr)
407{
408 const struct cpumask *cpumask;
409
410 *avail_size -= sizeof(struct crat_subtype_computeunit);
411 if (*avail_size < 0)
412 return -ENOMEM;
413
414 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
415
416 /* Fill in subtype header data */
417 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
418 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
419 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
420
421 cpumask = cpumask_of_node(numa_node_id);
422
423 /* Fill in CU data */
424 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
425 sub_type_hdr->proximity_domain = proximity_domain;
426 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
427 if (sub_type_hdr->processor_id_low == -1)
428 return -EINVAL;
429
430 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
431
432 return 0;
433}
434
435/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
436 *
437 * @numa_node_id: CPU NUMA node id
438 * @avail_size: Available size in the memory
439 * @sub_type_hdr: Memory into which compute info will be filled in
440 *
441 * Return 0 if successful else return -ve value
442 */
443static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
444 int proximity_domain,
445 struct crat_subtype_memory *sub_type_hdr)
446{
447 uint64_t mem_in_bytes = 0;
448 pg_data_t *pgdat;
449 int zone_type;
450
451 *avail_size -= sizeof(struct crat_subtype_memory);
452 if (*avail_size < 0)
453 return -ENOMEM;
454
455 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
456
457 /* Fill in subtype header data */
458 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
459 sub_type_hdr->length = sizeof(struct crat_subtype_memory);
460 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
461
462 /* Fill in Memory Subunit data */
463
464 /* Unlike si_meminfo, si_meminfo_node is not exported. So
465 * the following lines are duplicated from si_meminfo_node
466 * function
467 */
468 pgdat = NODE_DATA(numa_node_id);
469 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
470 mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
471 mem_in_bytes <<= PAGE_SHIFT;
472
473 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
474 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
475 sub_type_hdr->proximity_domain = proximity_domain;
476
477 return 0;
478}
479
480static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
481 uint32_t *num_entries,
482 struct crat_subtype_iolink *sub_type_hdr)
483{
484 int nid;
485 struct cpuinfo_x86 *c = &cpu_data(0);
486 uint8_t link_type;
487
488 if (c->x86_vendor == X86_VENDOR_AMD)
489 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
490 else
491 link_type = CRAT_IOLINK_TYPE_QPI_1_1;
492
493 *num_entries = 0;
494
495 /* Create IO links from this node to other CPU nodes */
496 for_each_online_node(nid) {
497 if (nid == numa_node_id) /* node itself */
498 continue;
499
500 *avail_size -= sizeof(struct crat_subtype_iolink);
501 if (*avail_size < 0)
502 return -ENOMEM;
503
504 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
505
506 /* Fill in subtype header data */
507 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
508 sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
509 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
510
511 /* Fill in IO link data */
512 sub_type_hdr->proximity_domain_from = numa_node_id;
513 sub_type_hdr->proximity_domain_to = nid;
514 sub_type_hdr->io_interface_type = link_type;
515
516 (*num_entries)++;
517 sub_type_hdr++;
518 }
519
520 return 0;
521}
522
523/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
524 *
525 * @pcrat_image: Fill in VCRAT for CPU
526 * @size: [IN] allocated size of crat_image.
527 * [OUT] actual size of data filled in crat_image
528 */
529static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
530{
531 struct crat_header *crat_table = (struct crat_header *)pcrat_image;
532 struct acpi_table_header *acpi_table;
533 acpi_status status;
534 struct crat_subtype_generic *sub_type_hdr;
535 int avail_size = *size;
536 int numa_node_id;
537 uint32_t entries = 0;
538 int ret = 0;
539
540 if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
541 return -EINVAL;
542
543 /* Fill in CRAT Header.
544 * Modify length and total_entries as subunits are added.
545 */
546 avail_size -= sizeof(struct crat_header);
547 if (avail_size < 0)
548 return -ENOMEM;
549
550 memset(crat_table, 0, sizeof(struct crat_header));
551 memcpy(&crat_table->signature, CRAT_SIGNATURE,
552 sizeof(crat_table->signature));
553 crat_table->length = sizeof(struct crat_header);
554
555 status = acpi_get_table("DSDT", 0, &acpi_table);
556 if (status == AE_NOT_FOUND)
557 pr_warn("DSDT table not found for OEM information\n");
558 else {
559 crat_table->oem_revision = acpi_table->revision;
560 memcpy(crat_table->oem_id, acpi_table->oem_id,
561 CRAT_OEMID_LENGTH);
562 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
563 CRAT_OEMTABLEID_LENGTH);
564 }
565 crat_table->total_entries = 0;
566 crat_table->num_domains = 0;
567
568 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
569
570 for_each_online_node(numa_node_id) {
571 if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
572 continue;
573
574 /* Fill in Subtype: Compute Unit */
575 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
576 crat_table->num_domains,
577 (struct crat_subtype_computeunit *)sub_type_hdr);
578 if (ret < 0)
579 return ret;
580 crat_table->length += sub_type_hdr->length;
581 crat_table->total_entries++;
582
583 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
584 sub_type_hdr->length);
585
586 /* Fill in Subtype: Memory */
587 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
588 crat_table->num_domains,
589 (struct crat_subtype_memory *)sub_type_hdr);
590 if (ret < 0)
591 return ret;
592 crat_table->length += sub_type_hdr->length;
593 crat_table->total_entries++;
594
595 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
596 sub_type_hdr->length);
597
598 /* Fill in Subtype: IO Link */
599 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
600 &entries,
601 (struct crat_subtype_iolink *)sub_type_hdr);
602 if (ret < 0)
603 return ret;
604 crat_table->length += (sub_type_hdr->length * entries);
605 crat_table->total_entries += entries;
606
607 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
608 sub_type_hdr->length * entries);
609
610 crat_table->num_domains++;
611 }
612
613 /* TODO: Add cache Subtype for CPU.
614 * Currently, CPU cache information is available in function
615 * detect_cache_attributes(cpu) defined in the file
616 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
617 * exported and to get the same information the code needs to be
618 * duplicated.
619 */
620
621 *size = crat_table->length;
622 pr_info("Virtual CRAT table created for CPU\n");
623
624 return 0;
625}
626
627/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
628 * creates a Virtual CRAT (VCRAT) image
629 *
630 * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
631 *
632 * @crat_image: VCRAT image created because ACPI does not have a
633 * CRAT for this device
634 * @size: [OUT] size of virtual crat_image
635 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
636 * COMPUTE_UNIT_GPU - Create VCRAT for GPU
637 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
638 * -- this option is not currently implemented.
639 * The assumption is that all AMD APUs will have CRAT
640 * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
641 *
642 * Return 0 if successful else return -ve value
643 */
644int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
645 int flags, struct kfd_dev *kdev,
646 uint32_t proximity_domain)
647{
648 void *pcrat_image = NULL;
649 int ret = 0;
650
651 if (!crat_image)
652 return -EINVAL;
653
654 *crat_image = NULL;
655
656 /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
657 * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
658 * all the current conditions. A check is put not to overwrite beyond
659 * allocated size
660 */
661 switch (flags) {
662 case COMPUTE_UNIT_CPU:
663 pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
664 if (!pcrat_image)
665 return -ENOMEM;
666 *size = VCRAT_SIZE_FOR_CPU;
667 ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
668 break;
669 case COMPUTE_UNIT_GPU:
670 /* TODO: */
671 ret = -EINVAL;
672 pr_err("VCRAT not implemented for dGPU\n");
673 break;
674 case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
675 /* TODO: */
676 ret = -EINVAL;
677 pr_err("VCRAT not implemented for APU\n");
678 break;
679 default:
680 ret = -EINVAL;
681 }
682
683 if (!ret)
684 *crat_image = pcrat_image;
685 else
686 kfree(pcrat_image);
687
688 return ret;
689}
690
691
692/* kfd_destroy_crat_image
390 * 693 *
391 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) 694 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
392 * 695 *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index c15adbd1461e..1711ab664ec6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -44,6 +44,10 @@
44 44
45#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) 45#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
46 46
47/* Compute Unit flags */
48#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
49#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
50
47struct crat_header { 51struct crat_header {
48 uint32_t signature; 52 uint32_t signature;
49 uint32_t length; 53 uint32_t length;
@@ -302,9 +306,14 @@ struct cdit_header {
302 306
303#pragma pack() 307#pragma pack()
304 308
309struct kfd_dev;
310
305int kfd_create_crat_image_acpi(void **crat_image, size_t *size); 311int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
306void kfd_destroy_crat_image(void *crat_image); 312void kfd_destroy_crat_image(void *crat_image);
307int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 313int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
308 uint32_t proximity_domain); 314 uint32_t proximity_domain);
315int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
316 int flags, struct kfd_dev *kdev,
317 uint32_t proximity_domain);
309 318
310#endif /* KFD_CRAT_H_INCLUDED */ 319#endif /* KFD_CRAT_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 69a6206b67ba..aeee9d42171b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -671,6 +671,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu);
671struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 671struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
672struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 672struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
673int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 673int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
674int kfd_numa_node_to_apic_id(int numa_node_id);
674 675
675/* Interrupts */ 676/* Interrupts */
676int kfd_interrupt_init(struct kfd_dev *dev); 677int kfd_interrupt_init(struct kfd_dev *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 622fedaa5b39..9aa600477fff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -28,6 +28,8 @@
28#include <linux/hash.h> 28#include <linux/hash.h>
29#include <linux/cpufreq.h> 29#include <linux/cpufreq.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/dmi.h>
32#include <linux/atomic.h>
31 33
32#include "kfd_priv.h" 34#include "kfd_priv.h"
33#include "kfd_crat.h" 35#include "kfd_crat.h"
@@ -36,9 +38,10 @@
36 38
37/* topology_device_list - Master list of all topology devices */ 39/* topology_device_list - Master list of all topology devices */
38static struct list_head topology_device_list; 40static struct list_head topology_device_list;
39struct kfd_system_properties sys_props; 41static struct kfd_system_properties sys_props;
40 42
41static DECLARE_RWSEM(topology_lock); 43static DECLARE_RWSEM(topology_lock);
44static atomic_t topology_crat_proximity_domain;
42 45
43struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) 46struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
44{ 47{
@@ -691,12 +694,92 @@ static void kfd_topology_update_device_list(struct list_head *temp_list,
691 } 694 }
692} 695}
693 696
697static void kfd_debug_print_topology(void)
698{
699 struct kfd_topology_device *dev;
700
701 down_read(&topology_lock);
702
703 dev = list_last_entry(&topology_device_list,
704 struct kfd_topology_device, list);
705 if (dev) {
706 if (dev->node_props.cpu_cores_count &&
707 dev->node_props.simd_count) {
708 pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
709 dev->node_props.device_id,
710 dev->node_props.vendor_id);
711 } else if (dev->node_props.cpu_cores_count)
712 pr_info("Topology: Add CPU node\n");
713 else if (dev->node_props.simd_count)
714 pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
715 dev->node_props.device_id,
716 dev->node_props.vendor_id);
717 }
718 up_read(&topology_lock);
719}
720
721/* Helper function for intializing platform_xx members of
722 * kfd_system_properties. Uses OEM info from the last CPU/APU node.
723 */
724static void kfd_update_system_properties(void)
725{
726 struct kfd_topology_device *dev;
727
728 down_read(&topology_lock);
729 dev = list_last_entry(&topology_device_list,
730 struct kfd_topology_device, list);
731 if (dev) {
732 sys_props.platform_id =
733 (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
734 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
735 sys_props.platform_rev = dev->oem_revision;
736 }
737 up_read(&topology_lock);
738}
739
740static void find_system_memory(const struct dmi_header *dm,
741 void *private)
742{
743 struct kfd_mem_properties *mem;
744 u16 mem_width, mem_clock;
745 struct kfd_topology_device *kdev =
746 (struct kfd_topology_device *)private;
747 const u8 *dmi_data = (const u8 *)(dm + 1);
748
749 if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
750 mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
751 mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
752 list_for_each_entry(mem, &kdev->mem_props, list) {
753 if (mem_width != 0xFFFF && mem_width != 0)
754 mem->width = mem_width;
755 if (mem_clock != 0)
756 mem->mem_clk_max = mem_clock;
757 }
758 }
759}
760/* kfd_add_non_crat_information - Add information that is not currently
761 * defined in CRAT but is necessary for KFD topology
762 * @dev - topology device to which addition info is added
763 */
764static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
765{
766 /* Check if CPU only node. */
767 if (!kdev->gpu) {
768 /* Add system memory information */
769 dmi_walk(find_system_memory, kdev);
770 }
771 /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
772}
773
694int kfd_topology_init(void) 774int kfd_topology_init(void)
695{ 775{
696 void *crat_image = NULL; 776 void *crat_image = NULL;
697 size_t image_size = 0; 777 size_t image_size = 0;
698 int ret; 778 int ret;
699 struct list_head temp_topology_device_list; 779 struct list_head temp_topology_device_list;
780 int cpu_only_node = 0;
781 struct kfd_topology_device *kdev;
782 int proximity_domain;
700 783
701 /* topology_device_list - Master list of all topology devices 784 /* topology_device_list - Master list of all topology devices
702 * temp_topology_device_list - temporary list created while parsing CRAT 785 * temp_topology_device_list - temporary list created while parsing CRAT
@@ -711,36 +794,78 @@ int kfd_topology_init(void)
711 794
712 memset(&sys_props, 0, sizeof(sys_props)); 795 memset(&sys_props, 0, sizeof(sys_props));
713 796
797 /* Proximity domains in ACPI CRAT tables start counting at
798 * 0. The same should be true for virtual CRAT tables created
799 * at this stage. GPUs added later in kfd_topology_add_device
800 * use a counter.
801 */
802 proximity_domain = 0;
803
714 /* 804 /*
715 * Get the CRAT image from the ACPI 805 * Get the CRAT image from the ACPI. If ACPI doesn't have one
806 * create a virtual CRAT.
807 * NOTE: The current implementation expects all AMD APUs to have
808 * CRAT. If no CRAT is available, it is assumed to be a CPU
716 */ 809 */
717 ret = kfd_create_crat_image_acpi(&crat_image, &image_size); 810 ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
718 if (!ret) { 811 if (!ret) {
719 ret = kfd_parse_crat_table(crat_image, 812 ret = kfd_parse_crat_table(crat_image,
720 &temp_topology_device_list, 0); 813 &temp_topology_device_list,
721 if (ret) 814 proximity_domain);
815 if (ret) {
816 kfd_release_topology_device_list(
817 &temp_topology_device_list);
818 kfd_destroy_crat_image(crat_image);
819 crat_image = NULL;
820 }
821 }
822
823 if (!crat_image) {
824 ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
825 COMPUTE_UNIT_CPU, NULL,
826 proximity_domain);
827 cpu_only_node = 1;
828 if (ret) {
829 pr_err("Error creating VCRAT table for CPU\n");
830 return ret;
831 }
832
833 ret = kfd_parse_crat_table(crat_image,
834 &temp_topology_device_list,
835 proximity_domain);
836 if (ret) {
837 pr_err("Error parsing VCRAT table for CPU\n");
722 goto err; 838 goto err;
723 } else if (ret == -ENODATA) { 839 }
724 /* TODO: Create fake CRAT table */
725 ret = 0;
726 goto err;
727 } else {
728 pr_err("Couldn't get CRAT table size from ACPI\n");
729 goto err;
730 } 840 }
731 841
732 down_write(&topology_lock); 842 down_write(&topology_lock);
733 kfd_topology_update_device_list(&temp_topology_device_list, 843 kfd_topology_update_device_list(&temp_topology_device_list,
734 &topology_device_list); 844 &topology_device_list);
845 atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
735 ret = kfd_topology_update_sysfs(); 846 ret = kfd_topology_update_sysfs();
736 up_write(&topology_lock); 847 up_write(&topology_lock);
737 848
738 if (!ret) { 849 if (!ret) {
739 sys_props.generation_count++; 850 sys_props.generation_count++;
851 kfd_update_system_properties();
852 kfd_debug_print_topology();
740 pr_info("Finished initializing topology\n"); 853 pr_info("Finished initializing topology\n");
741 } else 854 } else
742 pr_err("Failed to update topology in sysfs ret=%d\n", ret); 855 pr_err("Failed to update topology in sysfs ret=%d\n", ret);
743 856
857 /* For nodes with GPU, this information gets added
858 * when GPU is detected (kfd_topology_add_device).
859 */
860 if (cpu_only_node) {
861 /* Add additional information to CPU only node created above */
862 down_write(&topology_lock);
863 kdev = list_first_entry(&topology_device_list,
864 struct kfd_topology_device, list);
865 up_write(&topology_lock);
866 kfd_add_non_crat_information(kdev);
867 }
868
744err: 869err:
745 kfd_destroy_crat_image(crat_image); 870 kfd_destroy_crat_image(crat_image);
746 return ret; 871 return ret;
@@ -754,21 +879,6 @@ void kfd_topology_shutdown(void)
754 up_write(&topology_lock); 879 up_write(&topology_lock);
755} 880}
756 881
757static void kfd_debug_print_topology(void)
758{
759 struct kfd_topology_device *dev;
760 uint32_t i = 0;
761
762 pr_info("DEBUG PRINT OF TOPOLOGY:");
763 list_for_each_entry(dev, &topology_device_list, list) {
764 pr_info("Node: %d\n", i);
765 pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
766 pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
767 pr_info("\tSIMD count: %d\n", dev->node_props.simd_count);
768 i++;
769 }
770}
771
772static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) 882static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
773{ 883{
774 uint32_t hashout; 884 uint32_t hashout;
@@ -954,6 +1064,34 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
954 1064
955} 1065}
956 1066
1067static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1068{
1069 const struct cpuinfo_x86 *cpuinfo;
1070 int first_cpu_of_numa_node;
1071
1072 if (!cpumask || cpumask == cpu_none_mask)
1073 return -1;
1074 first_cpu_of_numa_node = cpumask_first(cpumask);
1075 if (first_cpu_of_numa_node >= nr_cpu_ids)
1076 return -1;
1077 cpuinfo = &cpu_data(first_cpu_of_numa_node);
1078
1079 return cpuinfo->apicid;
1080}
1081
1082/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1083 * of the given NUMA node (numa_node_id)
1084 * Return -1 on failure
1085 */
1086int kfd_numa_node_to_apic_id(int numa_node_id)
1087{
1088 if (numa_node_id == -1) {
1089 pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1090 return kfd_cpumask_to_apic_id(cpu_online_mask);
1091 }
1092 return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1093}
1094
957#if defined(CONFIG_DEBUG_FS) 1095#if defined(CONFIG_DEBUG_FS)
958 1096
959int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) 1097int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 50a741ba51e0..866818964a9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -148,6 +148,9 @@ struct kfd_topology_device {
148 struct attribute attr_gpuid; 148 struct attribute attr_gpuid;
149 struct attribute attr_name; 149 struct attribute attr_name;
150 struct attribute attr_props; 150 struct attribute attr_props;
151 uint8_t oem_id[CRAT_OEMID_LENGTH];
152 uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
153 uint32_t oem_revision;
151}; 154};
152 155
153struct kfd_system_properties { 156struct kfd_system_properties {