From cbe68f5bc31b0927401c938e84f3160060a6c5e8 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 20 Jul 2023 17:05:21 -0400 Subject: Improve copy engine register documentation in nvdebug.h + cleanup --- device_info_procfs.c | 2 +- mmu.c | 1 - nvdebug.h | 162 ++++++++++++++++++++++++++++++++++----------------- stubs.h | 1 - 4 files changed, 109 insertions(+), 57 deletions(-) diff --git a/device_info_procfs.c b/device_info_procfs.c index 1fc0586..5fc417f 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c @@ -57,7 +57,7 @@ static void* device_info_file_seq_next(struct seq_file *s, void *idx, static int device_info_file_seq_show(struct seq_file *s, void *idx) { ptop_device_info_t curr_info; struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; - + curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); // Check for read errors if (curr_info.raw == -1) diff --git a/mmu.c b/mmu.c index 26c7af5..4881f66 100644 --- a/mmu.c +++ b/mmu.c @@ -198,7 +198,6 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, // If we find a matching PTE, return its virtual address if ((uint64_t)pte.addr << 12 == addr_to_find) return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; - } } while (++i < NV_MMU_PT_V1_SZ[0]); return 0; diff --git a/nvdebug.h b/nvdebug.h index 8db07ee..630d40d 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -1,5 +1,12 @@ /* Copyright 2021 Joshua Bakita * SPDX-License-Identifier: MIT + * + * File outline: + * - Runlist, preemption, and channel control (FIFO) + * - Basic GPU information (MC) + * - Detailed GPU information (PTOP, FUSE, and CE) + * - PRAMIN, BAR1/2, and page table status + * - Helper functions for nvdebug */ // TODO(jbakita): Don't depend on these. @@ -448,53 +455,6 @@ typedef union { } __attribute__((packed)); } mc_boot_0_t; -enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; -enum ENGINE_TYPES { - ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] - ENGINE_COPY0 = 1, // [raw/physical] COPY #0 - ENGINE_COPY1 = 2, // [raw/physical] COPY #1 - ENGINE_COPY2 = 3, // [raw/physical] COPY #2 - - ENGINE_MSPDEC = 8, // Picture DECoder - ENGINE_MSPPP = 9, // [Video] Post Processing - ENGINE_MSVLD = 10, // [Video] Variable Length Decoder - ENGINE_MSENC = 11, // [Video] ENCoding - ENGINE_VIC = 12, // Video Image Compositor - ENGINE_SEC = 13, // SEquenCer [?] - ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 - ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 - ENGINE_NVDEC = 16, // Nvidia Video DECoder - - ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] - ENGINE_LCE = 19, // Logical Copy Engine - ENGINE_GSP = 20, // Gpu System Processor - ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) -}; -#define ENGINE_TYPES_LEN 22 -static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { - "Graphics/Compute", - "COPY0", - "COPY1", - "COPY2", - "Unknown Engine ID#4", - "Unknown Engine ID#5", - "Unknown Engine ID#6", - "Unknown Engine ID#7", - "MSPDEC: Picture Decoder", - "MSPPP: Post Processing", - "MSVLD: Variable Length Decoder", - "MSENC: Encoder", - "VIC: Video Image Compositor", - "SEC: Sequencer", - "NVENC0: NVIDIA Video Encoder #0", - "NVENC1: NVIDIA Video Encoder #1", - "NVDEC: NVIDIA Video Decoder", - "Unknown Engine ID#17", - "IOCTRL: I/O Controller", - "LCE: Logical Copy Engine", - "GSP: GPU System Processor", - "NVJPG: NVIDIA JPEG Decoder", -}; /* GPU engine information and control register offsets Each engine is described by one or more entries (terminated by an entry with @@ -553,6 +513,54 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { */ #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) #define NV_PTOP_DEVICE_INFO__SIZE_1 64 +enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; +enum ENGINE_TYPES { + ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] + ENGINE_COPY0 = 1, // [raw/physical] COPY #0 + ENGINE_COPY1 = 2, // [raw/physical] COPY #1 + ENGINE_COPY2 = 3, // [raw/physical] COPY #2 + + ENGINE_MSPDEC = 8, // Picture DECoder + ENGINE_MSPPP = 9, // [Video] Post Processing + ENGINE_MSVLD = 10, // [Video] Variable Length Decoder + ENGINE_MSENC = 11, // [Video] ENCoding + ENGINE_VIC = 12, // Video Image Compositor + ENGINE_SEC = 13, // SEquenCer [?] + ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 + ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 + ENGINE_NVDEC = 16, // Nvidia Video DECoder + + ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] + ENGINE_LCE = 19, // Logical Copy Engine + ENGINE_GSP = 20, // Gpu System Processor + ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) +}; +#define ENGINE_TYPES_LEN 22 +static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { + "Graphics/Compute", + "COPY0", + "COPY1", + "COPY2", + "Unknown Engine ID#4", + "Unknown Engine ID#5", + "Unknown Engine ID#6", + "Unknown Engine ID#7", + "MSPDEC: Picture Decoder", + "MSPPP: Post Processing", + "MSVLD: Variable Length Decoder", + "MSENC: Encoder", + "VIC: Video Image Compositor", + "SEC: Sequencer", + "NVENC0: NVIDIA Video Encoder #0", + "NVENC1: NVIDIA Video Encoder #1", + "NVDEC: NVIDIA Video Decoder", + "Unknown Engine ID#17", + "IOCTRL: I/O Controller", + "LCE: Logical Copy Engine", + "GSP: GPU System Processor", + "NVJPG: NVIDIA JPEG Decoder", +}; + typedef union { // DATA type fields struct { @@ -592,16 +600,60 @@ typedef union { uint32_t raw; } ptop_device_info_t; +/* Graphics Processing Cluster (GPC) information + The GPU's Compute/Graphics engine is subdivided into Graphics Processing + Clusters (also known as GPU Processing Clusters, starting with Ampere). + + Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain + Streaming Multiprocessors (SMs). + + +*/ +// Support: Fermi through Blackwell +// Get the number of GPCs **on die** #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 +// Get the number of TPCs per GPC **on die** #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 -#define NV_PTOP_SCAL_NUM_CES 0x00022444 -// PCE_MAP is Volta+ only -#define NV_CE_PCE_MAP 0x00104028 - // GPC and TPC masks -// Support: Maxwell+ +// Support: Maxwell, Pascal, Volta, Turing +// Bitmask of which GPC **are enabled** of the max on die #define NV_FUSE_GPC 0x00021c1c +// Bitmask of which TPCs **are enabled** on each GPC #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) +// Support: Ampere, Ada, Hopper, Blackwell +//#define NV_FUSE_GPC 0x00820c1c +//#define NV_FUSE_TPC_FOR_GPC(i) (0x00820c38+(i)*4) + +/* Logical Copy Engine (LCE) Information + Every GPU has some number of copy engines which can process transfers to, + from, or within a GPU. Up until Maxwell, the hardware engines were directly + accessible, and this register exposes how many there are. + + Starting with Pascal, an additional layer of indirection was added---logical + copy engines. Only logical copy engines can be directly dispatched to, and + there are normally more logical copy engines than there are physical ones. On + Pascal+ this register stores the number of logical copy engines. + + SCAL_NUM_CES : Number of externally accessible copy engines + + Support: Kepler through (at least) Blackwell + Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. +*/ +#define NV_PTOP_SCAL_NUM_CES 0x00022444 + +/* Physical Copy Engine (PCE) information + On Pascal GPUs or newer, this register complements the above information by + exposing which, and how many, physical copy engines are enabled on the GPU. + + CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index + is enabled (not floorswept) on this GPU. Count the number of set + bits to get the number of PCEs. + + Support: Kepler through (at least) Blackwell + Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. +*/ +#define NV_CE_PCE_MAP 0x00104028 + /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. Support: Fermi+ (?), Pascal @@ -665,7 +717,7 @@ typedef union { Note: Format changed with Pascal (how?) - Support: Pascal, Volta, Turing, Ampere + Support: Pascal, Volta, Turing, Ampere, Ada */ // FIXME: PDE/PTEs are actually 64 bits =S // Important: Aperture keys are different with PDEs @@ -704,7 +756,9 @@ static inline char* pd_target_to_text(enum PD_TARGET t) { // Note: As the meaning of target (bits 2:1) changes depending on if the entry // is a PTE or not, this combines them into a single target field to // simplify comparisons. -// Support: Pascal, Turing, Ampere +// Support: Pascal, Volta, Turing, Ampere, Ada +// +// V3 introduced with Hopper, but Hopper and Blackwell also support V2 typedef union { // Page Directory Entry (PDE) struct { diff --git a/stubs.h b/stubs.h index bfcc0d7..b909587 100644 --- a/stubs.h +++ b/stubs.h @@ -77,4 +77,3 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, pci_dev_put(from); return pdev; } - -- cgit v1.2.2