diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-07-20 17:05:21 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-07-20 17:05:21 -0400 |
commit | cbe68f5bc31b0927401c938e84f3160060a6c5e8 (patch) | |
tree | eccdc7f5ab91a13d9dbda06a7ec9e6f62a048edd | |
parent | a1598f27a124cb0b5263276f7098ae3a68460b61 (diff) |
Improve copy engine register documentation in nvdebug.h + cleanup
-rw-r--r-- | device_info_procfs.c | 2 | ||||
-rw-r--r-- | mmu.c | 1 | ||||
-rw-r--r-- | nvdebug.h | 162 | ||||
-rw-r--r-- | stubs.h | 1 |
4 files changed, 109 insertions, 57 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index 1fc0586..5fc417f 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
@@ -57,7 +57,7 @@ static void* device_info_file_seq_next(struct seq_file *s, void *idx, | |||
57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { | 57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { |
58 | ptop_device_info_t curr_info; | 58 | ptop_device_info_t curr_info; |
59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
60 | 60 | ||
61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); | 61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); |
62 | // Check for read errors | 62 | // Check for read errors |
63 | if (curr_info.raw == -1) | 63 | if (curr_info.raw == -1) |
@@ -198,7 +198,6 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, | |||
198 | // If we find a matching PTE, return its virtual address | 198 | // If we find a matching PTE, return its virtual address |
199 | if ((uint64_t)pte.addr << 12 == addr_to_find) | 199 | if ((uint64_t)pte.addr << 12 == addr_to_find) |
200 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; | 200 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; |
201 | |||
202 | } | 201 | } |
203 | } while (++i < NV_MMU_PT_V1_SZ[0]); | 202 | } while (++i < NV_MMU_PT_V1_SZ[0]); |
204 | return 0; | 203 | return 0; |
@@ -1,5 +1,12 @@ | |||
1 | /* Copyright 2021 Joshua Bakita | 1 | /* Copyright 2021 Joshua Bakita |
2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
3 | * | ||
4 | * File outline: | ||
5 | * - Runlist, preemption, and channel control (FIFO) | ||
6 | * - Basic GPU information (MC) | ||
7 | * - Detailed GPU information (PTOP, FUSE, and CE) | ||
8 | * - PRAMIN, BAR1/2, and page table status | ||
9 | * - Helper functions for nvdebug | ||
3 | */ | 10 | */ |
4 | 11 | ||
5 | // TODO(jbakita): Don't depend on these. | 12 | // TODO(jbakita): Don't depend on these. |
@@ -448,53 +455,6 @@ typedef union { | |||
448 | } __attribute__((packed)); | 455 | } __attribute__((packed)); |
449 | } mc_boot_0_t; | 456 | } mc_boot_0_t; |
450 | 457 | ||
451 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
452 | enum ENGINE_TYPES { | ||
453 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
454 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
455 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
456 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
457 | |||
458 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
459 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
460 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
461 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
462 | ENGINE_VIC = 12, // Video Image Compositor | ||
463 | ENGINE_SEC = 13, // SEquenCer [?] | ||
464 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
465 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
466 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
467 | |||
468 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
469 | ENGINE_LCE = 19, // Logical Copy Engine | ||
470 | ENGINE_GSP = 20, // Gpu System Processor | ||
471 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
472 | }; | ||
473 | #define ENGINE_TYPES_LEN 22 | ||
474 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
475 | "Graphics/Compute", | ||
476 | "COPY0", | ||
477 | "COPY1", | ||
478 | "COPY2", | ||
479 | "Unknown Engine ID#4", | ||
480 | "Unknown Engine ID#5", | ||
481 | "Unknown Engine ID#6", | ||
482 | "Unknown Engine ID#7", | ||
483 | "MSPDEC: Picture Decoder", | ||
484 | "MSPPP: Post Processing", | ||
485 | "MSVLD: Variable Length Decoder", | ||
486 | "MSENC: Encoder", | ||
487 | "VIC: Video Image Compositor", | ||
488 | "SEC: Sequencer", | ||
489 | "NVENC0: NVIDIA Video Encoder #0", | ||
490 | "NVENC1: NVIDIA Video Encoder #1", | ||
491 | "NVDEC: NVIDIA Video Decoder", | ||
492 | "Unknown Engine ID#17", | ||
493 | "IOCTRL: I/O Controller", | ||
494 | "LCE: Logical Copy Engine", | ||
495 | "GSP: GPU System Processor", | ||
496 | "NVJPG: NVIDIA JPEG Decoder", | ||
497 | }; | ||
498 | 458 | ||
499 | /* GPU engine information and control register offsets | 459 | /* GPU engine information and control register offsets |
500 | Each engine is described by one or more entries (terminated by an entry with | 460 | Each engine is described by one or more entries (terminated by an entry with |
@@ -553,6 +513,54 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
553 | */ | 513 | */ |
554 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | 514 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) |
555 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | 515 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 |
516 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
517 | enum ENGINE_TYPES { | ||
518 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
519 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
520 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
521 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
522 | |||
523 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
524 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
525 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
526 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
527 | ENGINE_VIC = 12, // Video Image Compositor | ||
528 | ENGINE_SEC = 13, // SEquenCer [?] | ||
529 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
530 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
531 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
532 | |||
533 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
534 | ENGINE_LCE = 19, // Logical Copy Engine | ||
535 | ENGINE_GSP = 20, // Gpu System Processor | ||
536 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
537 | }; | ||
538 | #define ENGINE_TYPES_LEN 22 | ||
539 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
540 | "Graphics/Compute", | ||
541 | "COPY0", | ||
542 | "COPY1", | ||
543 | "COPY2", | ||
544 | "Unknown Engine ID#4", | ||
545 | "Unknown Engine ID#5", | ||
546 | "Unknown Engine ID#6", | ||
547 | "Unknown Engine ID#7", | ||
548 | "MSPDEC: Picture Decoder", | ||
549 | "MSPPP: Post Processing", | ||
550 | "MSVLD: Variable Length Decoder", | ||
551 | "MSENC: Encoder", | ||
552 | "VIC: Video Image Compositor", | ||
553 | "SEC: Sequencer", | ||
554 | "NVENC0: NVIDIA Video Encoder #0", | ||
555 | "NVENC1: NVIDIA Video Encoder #1", | ||
556 | "NVDEC: NVIDIA Video Decoder", | ||
557 | "Unknown Engine ID#17", | ||
558 | "IOCTRL: I/O Controller", | ||
559 | "LCE: Logical Copy Engine", | ||
560 | "GSP: GPU System Processor", | ||
561 | "NVJPG: NVIDIA JPEG Decoder", | ||
562 | }; | ||
563 | |||
556 | typedef union { | 564 | typedef union { |
557 | // DATA type fields | 565 | // DATA type fields |
558 | struct { | 566 | struct { |
@@ -592,16 +600,60 @@ typedef union { | |||
592 | uint32_t raw; | 600 | uint32_t raw; |
593 | } ptop_device_info_t; | 601 | } ptop_device_info_t; |
594 | 602 | ||
603 | /* Graphics Processing Cluster (GPC) information | ||
604 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | ||
605 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | ||
606 | |||
607 | Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain | ||
608 | Streaming Multiprocessors (SMs). | ||
609 | |||
610 | |||
611 | */ | ||
612 | // Support: Fermi through Blackwell | ||
613 | // Get the number of GPCs **on die** | ||
595 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | 614 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 |
615 | // Get the number of TPCs per GPC **on die** | ||
596 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | 616 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 |
597 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
598 | // PCE_MAP is Volta+ only | ||
599 | #define NV_CE_PCE_MAP 0x00104028 | ||
600 | |||
601 | // GPC and TPC masks | 617 | // GPC and TPC masks |
602 | // Support: Maxwell+ | 618 | // Support: Maxwell, Pascal, Volta, Turing |
619 | // Bitmask of which GPC **are enabled** of the max on die | ||
603 | #define NV_FUSE_GPC 0x00021c1c | 620 | #define NV_FUSE_GPC 0x00021c1c |
621 | // Bitmask of which TPCs **are enabled** on each GPC | ||
604 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | 622 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) |
623 | // Support: Ampere, Ada, Hopper, Blackwell | ||
624 | //#define NV_FUSE_GPC 0x00820c1c | ||
625 | //#define NV_FUSE_TPC_FOR_GPC(i) (0x00820c38+(i)*4) | ||
626 | |||
627 | /* Logical Copy Engine (LCE) Information | ||
628 | Every GPU has some number of copy engines which can process transfers to, | ||
629 | from, or within a GPU. Up until Maxwell, the hardware engines were directly | ||
630 | accessible, and this register exposes how many there are. | ||
631 | |||
632 | Starting with Pascal, an additional layer of indirection was added---logical | ||
633 | copy engines. Only logical copy engines can be directly dispatched to, and | ||
634 | there are normally more logical copy engines than there are physical ones. On | ||
635 | Pascal+ this register stores the number of logical copy engines. | ||
636 | |||
637 | SCAL_NUM_CES : Number of externally accessible copy engines | ||
638 | |||
639 | Support: Kepler through (at least) Blackwell | ||
640 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | ||
641 | */ | ||
642 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
643 | |||
644 | /* Physical Copy Engine (PCE) information | ||
645 | On Pascal GPUs or newer, this register complements the above information by | ||
646 | exposing which, and how many, physical copy engines are enabled on the GPU. | ||
647 | |||
648 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index | ||
649 | is enabled (not floorswept) on this GPU. Count the number of set | ||
650 | bits to get the number of PCEs. | ||
651 | |||
652 | Support: Kepler through (at least) Blackwell | ||
653 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | ||
654 | */ | ||
655 | #define NV_CE_PCE_MAP 0x00104028 | ||
656 | |||
605 | 657 | ||
606 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 658 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |
607 | Support: Fermi+ (?), Pascal | 659 | Support: Fermi+ (?), Pascal |
@@ -665,7 +717,7 @@ typedef union { | |||
665 | 717 | ||
666 | Note: Format changed with Pascal (how?) | 718 | Note: Format changed with Pascal (how?) |
667 | 719 | ||
668 | Support: Pascal, Volta, Turing, Ampere | 720 | Support: Pascal, Volta, Turing, Ampere, Ada |
669 | */ | 721 | */ |
670 | // FIXME: PDE/PTEs are actually 64 bits =S | 722 | // FIXME: PDE/PTEs are actually 64 bits =S |
671 | // Important: Aperture keys are different with PDEs | 723 | // Important: Aperture keys are different with PDEs |
@@ -704,7 +756,9 @@ static inline char* pd_target_to_text(enum PD_TARGET t) { | |||
704 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | 756 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry |
705 | // is a PTE or not, this combines them into a single target field to | 757 | // is a PTE or not, this combines them into a single target field to |
706 | // simplify comparisons. | 758 | // simplify comparisons. |
707 | // Support: Pascal, Turing, Ampere | 759 | // Support: Pascal, Volta, Turing, Ampere, Ada |
760 | // | ||
761 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 | ||
708 | typedef union { | 762 | typedef union { |
709 | // Page Directory Entry (PDE) | 763 | // Page Directory Entry (PDE) |
710 | struct { | 764 | struct { |
@@ -77,4 +77,3 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, | |||
77 | pci_dev_put(from); | 77 | pci_dev_put(from); |
78 | return pdev; | 78 | return pdev; |
79 | } | 79 | } |
80 | |||