aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--device_info_procfs.c2
-rw-r--r--mmu.c1
-rw-r--r--nvdebug.h162
-rw-r--r--stubs.h1
4 files changed, 109 insertions, 57 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c
index 1fc0586..5fc417f 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -57,7 +57,7 @@ static void* device_info_file_seq_next(struct seq_file *s, void *idx,
57static int device_info_file_seq_show(struct seq_file *s, void *idx) { 57static int device_info_file_seq_show(struct seq_file *s, void *idx) {
58 ptop_device_info_t curr_info; 58 ptop_device_info_t curr_info;
59 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; 59 struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
60 60
61 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); 61 curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx));
62 // Check for read errors 62 // Check for read errors
63 if (curr_info.raw == -1) 63 if (curr_info.raw == -1)
diff --git a/mmu.c b/mmu.c
index 26c7af5..4881f66 100644
--- a/mmu.c
+++ b/mmu.c
@@ -198,7 +198,6 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g,
198 // If we find a matching PTE, return its virtual address 198 // If we find a matching PTE, return its virtual address
199 if ((uint64_t)pte.addr << 12 == addr_to_find) 199 if ((uint64_t)pte.addr << 12 == addr_to_find)
200 return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; 200 return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
201
202 } 201 }
203 } while (++i < NV_MMU_PT_V1_SZ[0]); 202 } while (++i < NV_MMU_PT_V1_SZ[0]);
204 return 0; 203 return 0;
diff --git a/nvdebug.h b/nvdebug.h
index 8db07ee..630d40d 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1,5 +1,12 @@
1/* Copyright 2021 Joshua Bakita 1/* Copyright 2021 Joshua Bakita
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 *
4 * File outline:
5 * - Runlist, preemption, and channel control (FIFO)
6 * - Basic GPU information (MC)
7 * - Detailed GPU information (PTOP, FUSE, and CE)
8 * - PRAMIN, BAR1/2, and page table status
9 * - Helper functions for nvdebug
3 */ 10 */
4 11
5// TODO(jbakita): Don't depend on these. 12// TODO(jbakita): Don't depend on these.
@@ -448,53 +455,6 @@ typedef union {
448 } __attribute__((packed)); 455 } __attribute__((packed));
449} mc_boot_0_t; 456} mc_boot_0_t;
450 457
451enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
452enum ENGINE_TYPES {
453 ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]
454 ENGINE_COPY0 = 1, // [raw/physical] COPY #0
455 ENGINE_COPY1 = 2, // [raw/physical] COPY #1
456 ENGINE_COPY2 = 3, // [raw/physical] COPY #2
457
458 ENGINE_MSPDEC = 8, // Picture DECoder
459 ENGINE_MSPPP = 9, // [Video] Post Processing
460 ENGINE_MSVLD = 10, // [Video] Variable Length Decoder
461 ENGINE_MSENC = 11, // [Video] ENCoding
462 ENGINE_VIC = 12, // Video Image Compositor
463 ENGINE_SEC = 13, // SEquenCer [?]
464 ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0
465 ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1
466 ENGINE_NVDEC = 16, // Nvidia Video DECoder
467
468 ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]
469 ENGINE_LCE = 19, // Logical Copy Engine
470 ENGINE_GSP = 20, // Gpu System Processor
471 ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+)
472};
473#define ENGINE_TYPES_LEN 22
474static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
475 "Graphics/Compute",
476 "COPY0",
477 "COPY1",
478 "COPY2",
479 "Unknown Engine ID#4",
480 "Unknown Engine ID#5",
481 "Unknown Engine ID#6",
482 "Unknown Engine ID#7",
483 "MSPDEC: Picture Decoder",
484 "MSPPP: Post Processing",
485 "MSVLD: Variable Length Decoder",
486 "MSENC: Encoder",
487 "VIC: Video Image Compositor",
488 "SEC: Sequencer",
489 "NVENC0: NVIDIA Video Encoder #0",
490 "NVENC1: NVIDIA Video Encoder #1",
491 "NVDEC: NVIDIA Video Decoder",
492 "Unknown Engine ID#17",
493 "IOCTRL: I/O Controller",
494 "LCE: Logical Copy Engine",
495 "GSP: GPU System Processor",
496 "NVJPG: NVIDIA JPEG Decoder",
497};
498 458
499/* GPU engine information and control register offsets 459/* GPU engine information and control register offsets
500 Each engine is described by one or more entries (terminated by an entry with 460 Each engine is described by one or more entries (terminated by an entry with
@@ -553,6 +513,54 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
553*/ 513*/
554#define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) 514#define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4)
555#define NV_PTOP_DEVICE_INFO__SIZE_1 64 515#define NV_PTOP_DEVICE_INFO__SIZE_1 64
516enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
517enum ENGINE_TYPES {
518 ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]
519 ENGINE_COPY0 = 1, // [raw/physical] COPY #0
520 ENGINE_COPY1 = 2, // [raw/physical] COPY #1
521 ENGINE_COPY2 = 3, // [raw/physical] COPY #2
522
523 ENGINE_MSPDEC = 8, // Picture DECoder
524 ENGINE_MSPPP = 9, // [Video] Post Processing
525 ENGINE_MSVLD = 10, // [Video] Variable Length Decoder
526 ENGINE_MSENC = 11, // [Video] ENCoding
527 ENGINE_VIC = 12, // Video Image Compositor
528 ENGINE_SEC = 13, // SEquenCer [?]
529 ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0
530 ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1
531 ENGINE_NVDEC = 16, // Nvidia Video DECoder
532
533 ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]
534 ENGINE_LCE = 19, // Logical Copy Engine
535 ENGINE_GSP = 20, // Gpu System Processor
536 ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+)
537};
538#define ENGINE_TYPES_LEN 22
539static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
540 "Graphics/Compute",
541 "COPY0",
542 "COPY1",
543 "COPY2",
544 "Unknown Engine ID#4",
545 "Unknown Engine ID#5",
546 "Unknown Engine ID#6",
547 "Unknown Engine ID#7",
548 "MSPDEC: Picture Decoder",
549 "MSPPP: Post Processing",
550 "MSVLD: Variable Length Decoder",
551 "MSENC: Encoder",
552 "VIC: Video Image Compositor",
553 "SEC: Sequencer",
554 "NVENC0: NVIDIA Video Encoder #0",
555 "NVENC1: NVIDIA Video Encoder #1",
556 "NVDEC: NVIDIA Video Decoder",
557 "Unknown Engine ID#17",
558 "IOCTRL: I/O Controller",
559 "LCE: Logical Copy Engine",
560 "GSP: GPU System Processor",
561 "NVJPG: NVIDIA JPEG Decoder",
562};
563
556typedef union { 564typedef union {
557 // DATA type fields 565 // DATA type fields
558 struct { 566 struct {
@@ -592,16 +600,60 @@ typedef union {
592 uint32_t raw; 600 uint32_t raw;
593} ptop_device_info_t; 601} ptop_device_info_t;
594 602
603/* Graphics Processing Cluster (GPC) information
604 The GPU's Compute/Graphics engine is subdivided into Graphics Processing
605 Clusters (also known as GPU Processing Clusters, starting with Ampere).
606
607 Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain
608 Streaming Multiprocessors (SMs).
609
610
611*/
612// Support: Fermi through Blackwell
613// Get the number of GPCs **on die**
595#define NV_PTOP_SCAL_NUM_GPCS 0x00022430 614#define NV_PTOP_SCAL_NUM_GPCS 0x00022430
615// Get the number of TPCs per GPC **on die**
596#define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 616#define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434
597#define NV_PTOP_SCAL_NUM_CES 0x00022444
598// PCE_MAP is Volta+ only
599#define NV_CE_PCE_MAP 0x00104028
600
601// GPC and TPC masks 617// GPC and TPC masks
602// Support: Maxwell+ 618// Support: Maxwell, Pascal, Volta, Turing
619// Bitmask of which GPC **are enabled** of the max on die
603#define NV_FUSE_GPC 0x00021c1c 620#define NV_FUSE_GPC 0x00021c1c
621// Bitmask of which TPCs **are enabled** on each GPC
604#define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) 622#define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4)
623// Support: Ampere, Ada, Hopper, Blackwell
624//#define NV_FUSE_GPC 0x00820c1c
625//#define NV_FUSE_TPC_FOR_GPC(i) (0x00820c38+(i)*4)
626
627/* Logical Copy Engine (LCE) Information
628 Every GPU has some number of copy engines which can process transfers to,
629 from, or within a GPU. Up until Maxwell, the hardware engines were directly
630 accessible, and this register exposes how many there are.
631
632 Starting with Pascal, an additional layer of indirection was added---logical
633 copy engines. Only logical copy engines can be directly dispatched to, and
634 there are normally more logical copy engines than there are physical ones. On
635 Pascal+ this register stores the number of logical copy engines.
636
637 SCAL_NUM_CES : Number of externally accessible copy engines
638
639 Support: Kepler through (at least) Blackwell
640 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
641*/
642#define NV_PTOP_SCAL_NUM_CES 0x00022444
643
644/* Physical Copy Engine (PCE) information
645 On Pascal GPUs or newer, this register complements the above information by
646 exposing which, and how many, physical copy engines are enabled on the GPU.
647
648 CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index
649 is enabled (not floorswept) on this GPU. Count the number of set
650 bits to get the number of PCEs.
651
652 Support: Kepler through (at least) Blackwell
653 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
654*/
655#define NV_CE_PCE_MAP 0x00104028
656
605 657
606/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 658/* Location of the 1Kb instance block with page tables for BAR1 and BAR2.
607 Support: Fermi+ (?), Pascal 659 Support: Fermi+ (?), Pascal
@@ -665,7 +717,7 @@ typedef union {
665 717
666 Note: Format changed with Pascal (how?) 718 Note: Format changed with Pascal (how?)
667 719
668 Support: Pascal, Volta, Turing, Ampere 720 Support: Pascal, Volta, Turing, Ampere, Ada
669*/ 721*/
670// FIXME: PDE/PTEs are actually 64 bits =S 722// FIXME: PDE/PTEs are actually 64 bits =S
671// Important: Aperture keys are different with PDEs 723// Important: Aperture keys are different with PDEs
@@ -704,7 +756,9 @@ static inline char* pd_target_to_text(enum PD_TARGET t) {
704// Note: As the meaning of target (bits 2:1) changes depending on if the entry 756// Note: As the meaning of target (bits 2:1) changes depending on if the entry
705// is a PTE or not, this combines them into a single target field to 757// is a PTE or not, this combines them into a single target field to
706// simplify comparisons. 758// simplify comparisons.
707// Support: Pascal, Turing, Ampere 759// Support: Pascal, Volta, Turing, Ampere, Ada
760//
761// V3 introduced with Hopper, but Hopper and Blackwell also support V2
708typedef union { 762typedef union {
709 // Page Directory Entry (PDE) 763 // Page Directory Entry (PDE)
710 struct { 764 struct {
diff --git a/stubs.h b/stubs.h
index bfcc0d7..b909587 100644
--- a/stubs.h
+++ b/stubs.h
@@ -77,4 +77,3 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id,
77 pci_dev_put(from); 77 pci_dev_put(from);
78 return pdev; 78 return pdev;
79} 79}
80