diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-07-20 17:05:21 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-07-20 17:05:21 -0400 |
| commit | cbe68f5bc31b0927401c938e84f3160060a6c5e8 (patch) | |
| tree | eccdc7f5ab91a13d9dbda06a7ec9e6f62a048edd | |
| parent | a1598f27a124cb0b5263276f7098ae3a68460b61 (diff) | |
Improve copy engine register documentation in nvdebug.h + cleanup
| -rw-r--r-- | device_info_procfs.c | 2 | ||||
| -rw-r--r-- | mmu.c | 1 | ||||
| -rw-r--r-- | nvdebug.h | 162 | ||||
| -rw-r--r-- | stubs.h | 1 |
4 files changed, 109 insertions, 57 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index 1fc0586..5fc417f 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
| @@ -57,7 +57,7 @@ static void* device_info_file_seq_next(struct seq_file *s, void *idx, | |||
| 57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { | 57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { |
| 58 | ptop_device_info_t curr_info; | 58 | ptop_device_info_t curr_info; |
| 59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
| 60 | 60 | ||
| 61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); | 61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); |
| 62 | // Check for read errors | 62 | // Check for read errors |
| 63 | if (curr_info.raw == -1) | 63 | if (curr_info.raw == -1) |
| @@ -198,7 +198,6 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, | |||
| 198 | // If we find a matching PTE, return its virtual address | 198 | // If we find a matching PTE, return its virtual address |
| 199 | if ((uint64_t)pte.addr << 12 == addr_to_find) | 199 | if ((uint64_t)pte.addr << 12 == addr_to_find) |
| 200 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; | 200 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; |
| 201 | |||
| 202 | } | 201 | } |
| 203 | } while (++i < NV_MMU_PT_V1_SZ[0]); | 202 | } while (++i < NV_MMU_PT_V1_SZ[0]); |
| 204 | return 0; | 203 | return 0; |
| @@ -1,5 +1,12 @@ | |||
| 1 | /* Copyright 2021 Joshua Bakita | 1 | /* Copyright 2021 Joshua Bakita |
| 2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
| 3 | * | ||
| 4 | * File outline: | ||
| 5 | * - Runlist, preemption, and channel control (FIFO) | ||
| 6 | * - Basic GPU information (MC) | ||
| 7 | * - Detailed GPU information (PTOP, FUSE, and CE) | ||
| 8 | * - PRAMIN, BAR1/2, and page table status | ||
| 9 | * - Helper functions for nvdebug | ||
| 3 | */ | 10 | */ |
| 4 | 11 | ||
| 5 | // TODO(jbakita): Don't depend on these. | 12 | // TODO(jbakita): Don't depend on these. |
| @@ -448,53 +455,6 @@ typedef union { | |||
| 448 | } __attribute__((packed)); | 455 | } __attribute__((packed)); |
| 449 | } mc_boot_0_t; | 456 | } mc_boot_0_t; |
| 450 | 457 | ||
| 451 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
| 452 | enum ENGINE_TYPES { | ||
| 453 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
| 454 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
| 455 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
| 456 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
| 457 | |||
| 458 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
| 459 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
| 460 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
| 461 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
| 462 | ENGINE_VIC = 12, // Video Image Compositor | ||
| 463 | ENGINE_SEC = 13, // SEquenCer [?] | ||
| 464 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
| 465 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
| 466 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
| 467 | |||
| 468 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
| 469 | ENGINE_LCE = 19, // Logical Copy Engine | ||
| 470 | ENGINE_GSP = 20, // Gpu System Processor | ||
| 471 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
| 472 | }; | ||
| 473 | #define ENGINE_TYPES_LEN 22 | ||
| 474 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
| 475 | "Graphics/Compute", | ||
| 476 | "COPY0", | ||
| 477 | "COPY1", | ||
| 478 | "COPY2", | ||
| 479 | "Unknown Engine ID#4", | ||
| 480 | "Unknown Engine ID#5", | ||
| 481 | "Unknown Engine ID#6", | ||
| 482 | "Unknown Engine ID#7", | ||
| 483 | "MSPDEC: Picture Decoder", | ||
| 484 | "MSPPP: Post Processing", | ||
| 485 | "MSVLD: Variable Length Decoder", | ||
| 486 | "MSENC: Encoder", | ||
| 487 | "VIC: Video Image Compositor", | ||
| 488 | "SEC: Sequencer", | ||
| 489 | "NVENC0: NVIDIA Video Encoder #0", | ||
| 490 | "NVENC1: NVIDIA Video Encoder #1", | ||
| 491 | "NVDEC: NVIDIA Video Decoder", | ||
| 492 | "Unknown Engine ID#17", | ||
| 493 | "IOCTRL: I/O Controller", | ||
| 494 | "LCE: Logical Copy Engine", | ||
| 495 | "GSP: GPU System Processor", | ||
| 496 | "NVJPG: NVIDIA JPEG Decoder", | ||
| 497 | }; | ||
| 498 | 458 | ||
| 499 | /* GPU engine information and control register offsets | 459 | /* GPU engine information and control register offsets |
| 500 | Each engine is described by one or more entries (terminated by an entry with | 460 | Each engine is described by one or more entries (terminated by an entry with |
| @@ -553,6 +513,54 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 553 | */ | 513 | */ |
| 554 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | 514 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) |
| 555 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | 515 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 |
| 516 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
| 517 | enum ENGINE_TYPES { | ||
| 518 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
| 519 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
| 520 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
| 521 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
| 522 | |||
| 523 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
| 524 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
| 525 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
| 526 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
| 527 | ENGINE_VIC = 12, // Video Image Compositor | ||
| 528 | ENGINE_SEC = 13, // SEquenCer [?] | ||
| 529 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
| 530 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
| 531 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
| 532 | |||
| 533 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
| 534 | ENGINE_LCE = 19, // Logical Copy Engine | ||
| 535 | ENGINE_GSP = 20, // Gpu System Processor | ||
| 536 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
| 537 | }; | ||
| 538 | #define ENGINE_TYPES_LEN 22 | ||
| 539 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
| 540 | "Graphics/Compute", | ||
| 541 | "COPY0", | ||
| 542 | "COPY1", | ||
| 543 | "COPY2", | ||
| 544 | "Unknown Engine ID#4", | ||
| 545 | "Unknown Engine ID#5", | ||
| 546 | "Unknown Engine ID#6", | ||
| 547 | "Unknown Engine ID#7", | ||
| 548 | "MSPDEC: Picture Decoder", | ||
| 549 | "MSPPP: Post Processing", | ||
| 550 | "MSVLD: Variable Length Decoder", | ||
| 551 | "MSENC: Encoder", | ||
| 552 | "VIC: Video Image Compositor", | ||
| 553 | "SEC: Sequencer", | ||
| 554 | "NVENC0: NVIDIA Video Encoder #0", | ||
| 555 | "NVENC1: NVIDIA Video Encoder #1", | ||
| 556 | "NVDEC: NVIDIA Video Decoder", | ||
| 557 | "Unknown Engine ID#17", | ||
| 558 | "IOCTRL: I/O Controller", | ||
| 559 | "LCE: Logical Copy Engine", | ||
| 560 | "GSP: GPU System Processor", | ||
| 561 | "NVJPG: NVIDIA JPEG Decoder", | ||
| 562 | }; | ||
| 563 | |||
| 556 | typedef union { | 564 | typedef union { |
| 557 | // DATA type fields | 565 | // DATA type fields |
| 558 | struct { | 566 | struct { |
| @@ -592,16 +600,60 @@ typedef union { | |||
| 592 | uint32_t raw; | 600 | uint32_t raw; |
| 593 | } ptop_device_info_t; | 601 | } ptop_device_info_t; |
| 594 | 602 | ||
| 603 | /* Graphics Processing Cluster (GPC) information | ||
| 604 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | ||
| 605 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | ||
| 606 | |||
| 607 | Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain | ||
| 608 | Streaming Multiprocessors (SMs). | ||
| 609 | |||
| 610 | |||
| 611 | */ | ||
| 612 | // Support: Fermi through Blackwell | ||
| 613 | // Get the number of GPCs **on die** | ||
| 595 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | 614 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 |
| 615 | // Get the number of TPCs per GPC **on die** | ||
| 596 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | 616 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 |
| 597 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
| 598 | // PCE_MAP is Volta+ only | ||
| 599 | #define NV_CE_PCE_MAP 0x00104028 | ||
| 600 | |||
| 601 | // GPC and TPC masks | 617 | // GPC and TPC masks |
| 602 | // Support: Maxwell+ | 618 | // Support: Maxwell, Pascal, Volta, Turing |
| 619 | // Bitmask of which GPC **are enabled** of the max on die | ||
| 603 | #define NV_FUSE_GPC 0x00021c1c | 620 | #define NV_FUSE_GPC 0x00021c1c |
| 621 | // Bitmask of which TPCs **are enabled** on each GPC | ||
| 604 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | 622 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) |
| 623 | // Support: Ampere, Ada, Hopper, Blackwell | ||
| 624 | //#define NV_FUSE_GPC 0x00820c1c | ||
| 625 | //#define NV_FUSE_TPC_FOR_GPC(i) (0x00820c38+(i)*4) | ||
| 626 | |||
| 627 | /* Logical Copy Engine (LCE) Information | ||
| 628 | Every GPU has some number of copy engines which can process transfers to, | ||
| 629 | from, or within a GPU. Up until Maxwell, the hardware engines were directly | ||
| 630 | accessible, and this register exposes how many there are. | ||
| 631 | |||
| 632 | Starting with Pascal, an additional layer of indirection was added---logical | ||
| 633 | copy engines. Only logical copy engines can be directly dispatched to, and | ||
| 634 | there are normally more logical copy engines than there are physical ones. On | ||
| 635 | Pascal+ this register stores the number of logical copy engines. | ||
| 636 | |||
| 637 | SCAL_NUM_CES : Number of externally accessible copy engines | ||
| 638 | |||
| 639 | Support: Kepler through (at least) Blackwell | ||
| 640 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | ||
| 641 | */ | ||
| 642 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
| 643 | |||
| 644 | /* Physical Copy Engine (PCE) information | ||
| 645 | On Pascal GPUs or newer, this register complements the above information by | ||
| 646 | exposing which, and how many, physical copy engines are enabled on the GPU. | ||
| 647 | |||
| 648 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index | ||
| 649 | is enabled (not floorswept) on this GPU. Count the number of set | ||
| 650 | bits to get the number of PCEs. | ||
| 651 | |||
| 652 | Support: Kepler through (at least) Blackwell | ||
| 653 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | ||
| 654 | */ | ||
| 655 | #define NV_CE_PCE_MAP 0x00104028 | ||
| 656 | |||
| 605 | 657 | ||
| 606 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 658 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |
| 607 | Support: Fermi+ (?), Pascal | 659 | Support: Fermi+ (?), Pascal |
| @@ -665,7 +717,7 @@ typedef union { | |||
| 665 | 717 | ||
| 666 | Note: Format changed with Pascal (how?) | 718 | Note: Format changed with Pascal (how?) |
| 667 | 719 | ||
| 668 | Support: Pascal, Volta, Turing, Ampere | 720 | Support: Pascal, Volta, Turing, Ampere, Ada |
| 669 | */ | 721 | */ |
| 670 | // FIXME: PDE/PTEs are actually 64 bits =S | 722 | // FIXME: PDE/PTEs are actually 64 bits =S |
| 671 | // Important: Aperture keys are different with PDEs | 723 | // Important: Aperture keys are different with PDEs |
| @@ -704,7 +756,9 @@ static inline char* pd_target_to_text(enum PD_TARGET t) { | |||
| 704 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | 756 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry |
| 705 | // is a PTE or not, this combines them into a single target field to | 757 | // is a PTE or not, this combines them into a single target field to |
| 706 | // simplify comparisons. | 758 | // simplify comparisons. |
| 707 | // Support: Pascal, Turing, Ampere | 759 | // Support: Pascal, Volta, Turing, Ampere, Ada |
| 760 | // | ||
| 761 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 | ||
| 708 | typedef union { | 762 | typedef union { |
| 709 | // Page Directory Entry (PDE) | 763 | // Page Directory Entry (PDE) |
| 710 | struct { | 764 | struct { |
| @@ -77,4 +77,3 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, | |||
| 77 | pci_dev_put(from); | 77 | pci_dev_put(from); |
| 78 | return pdev; | 78 | return pdev; |
| 79 | } | 79 | } |
| 80 | |||
