aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:36:49 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:36:49 -0400
commit8f9ed4c3b1f0e438107035147b5aa43fdcd66165 (patch)
treea28f2237f85ac9e85cf9837644160e0630deaf18 /nvdebug.h
parent4768fe31f114c5ad788012db5518ce8e37f79c7a (diff)
Fix an off-by-one error in V2 reverse page table lookups
This would occationally manifest as an inability to find the runlist page in BAR2, as only part of the page table was being traversed. Also includes non-functional changes to documentation, scoping, and structure layout.
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h57
1 files changed, 46 insertions, 11 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 39b2e6e..fcc6cff 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -856,13 +856,46 @@ typedef union {
856 uint64_t raw; 856 uint64_t raw;
857} page_dir_config_t; 857} page_dir_config_t;
858 858
859/* Page directory entry 859/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
860 860 straight-forward starting with Pascal ("page table version 2"), except for a
861 Note: Format changed with Pascal (how?) 861 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
862 862
863 Support: Pascal, Volta, Turing, Ampere, Ada 863 All you really need to know is that any given Page Directory Entry (PDE)
864 contains a pointer to the start of a 4k page densely filled with PDEs or Page
865 Table Entries (PTEs).
866
867 == Page Table Refresher ==
868 Page tables convert virtual addresses to physical addresses, and they do this
869 via a tree structure. Leafs (PTEs) contain a physical address, and the path
870 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
871 When decending, the virtual address is sliced into pieces, and one slice is
872 used at each level (as an index) to select the next-visited node (in level+1).
873
874 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
875 PTEs. How the virtual address is sliced to yield an index into each level and
876 a page offset is shown by Fig 1.
877
878 == Figure 1 ==
879 Page Offset (12 bits) <---------------------------------------+
880 Page Table Entry (PTE) (9 bits) <--------------------+ |
881 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
882 PDE1 (9 bits) <--------------------+ | | |
883 PDE2 (9 bits) <-----------+ | | | |
884 PDE3 (2 bits) <--+ | | | | |
885 ^ ^ ^ ^ ^ ^
886 Virtual addr: [48, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
887
888 The following arrays merely represent different projections of Fig. 1, and
889 only one is strictly needed to reconstruct all the others. However, due to
890 the complexity of page tables, we include all of these to aid in readability.
864*/ 891*/
865// FIXME: PDE/PTEs are actually 64 bits =S 892// How many nodes/entries per level in V2 of NVIDIA's page table format
893static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
894// Size in bytes of an entry at a particular level
895static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
896// Which bit index is the least significant in indexing each page level
897static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
898
866// Important: Aperture keys are different with PDEs 899// Important: Aperture keys are different with PDEs
867enum PD_TARGET { 900enum PD_TARGET {
868 PD_AND_TARGET_INVALID = 0, // b000 901 PD_AND_TARGET_INVALID = 0, // b000
@@ -894,25 +927,27 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) {
894 } 927 }
895} 928}
896 929
897// PDE/PTE V2 type 930// Page Directory Entry/Page Table Entry V2 type
898// Note: As the meaning of target (bits 2:1) changes depending on if the entry 931// Note: As the meaning of target (bits 2:1) changes depending on if the entry
899// is a PTE or not, this combines them into a single target field to 932// is a PTE or not, this combines them into a single target field to
900// simplify comparisons. 933// simplify comparisons.
901// Support: Pascal, Volta, Turing, Ampere, Ada 934// Support: Pascal, Volta, Turing, Ampere, Ada
902// 935//
903// V3 introduced with Hopper, but Hopper and Blackwell also support V2 936// V3 introduced with Hopper, but Hopper and Blackwell also support V2
937//
938// FIXME: This structure is 32 bits, but PDE/PTEs are actually 64 bits!
904typedef union { 939typedef union {
905 // Page Directory Entry (PDE) 940 // Page Directory Entry (PDE)
906 struct { 941 struct {
907 bool is_pte:1; 942 enum PD_TARGET target:3;
908 uint32_t __target:2;
909 bool is_volatile:1; 943 bool is_volatile:1;
910 uint32_t padding1:4; 944 uint32_t padding1:4;
911 uint32_t addr:24; 945 uint32_t addr:24;
912 } __attribute__((packed)); 946 } __attribute__((packed));
913 // Page Table Entry (PTE) 947 // Page Table Entry (PTE)
914 struct { 948 struct {
915 enum PD_TARGET target:3; 949 bool is_pte:1;
950 enum INST_TARGET aperture:2;
916 uint32_t __is_volatile:1; 951 uint32_t __is_volatile:1;
917 bool is_encrypted:1; 952 bool is_encrypted:1;
918 bool is_privileged:1; 953 bool is_privileged:1;
@@ -923,7 +958,7 @@ typedef union {
923 uint32_t raw; 958 uint32_t raw;
924} page_dir_entry_t; 959} page_dir_entry_t;
925 960
926// PDE/PTE V1 types 961// Page Directory Entry/Page Table Entry V1 type
927// Support: Fermi, Kepler, Maxwell 962// Support: Fermi, Kepler, Maxwell
928enum V1_PD_TARGET { 963enum V1_PD_TARGET {
929 PD_TARGET_INVALID = 0, 964 PD_TARGET_INVALID = 0,