aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mmu.c54
-rw-r--r--nvdebug.h57
2 files changed, 56 insertions, 55 deletions
diff --git a/mmu.c b/mmu.c
index 4881f66..23adaf2 100644
--- a/mmu.c
+++ b/mmu.c
@@ -39,46 +39,6 @@ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
39 return addr - pramin_base_va; 39 return addr - pramin_base_va;
40} 40}
41 41
42/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
43 straight-forward starting with Pascal ("page table version 2"), except for a
44 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
45
46 All you really need to know is that any given Page Directory Entry (PDE)
47 contains a pointer to the start of a 4k page densely filled with PDEs or Page
48 Table Entries (PTEs).
49
50 == Page Table Refresher ==
51 Page tables convert virtual addresses to physical addresses, and they do this
52 via a tree structure. Leafs (PTEs) contain a physical address, and the path
53 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
54 When decending, the virtual address is sliced into pieces, and one slice is
55 used at each level (as an index) to select the next-visited node (in level+1).
56
57 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
58 PTEs. How the virtual address is sliced to yield an index into each level and
59 a page offset is shown by Fig 1.
60
61 == Figure 1 ==
62 Page Offset (12 bits) <---------------------------------------+
63 Page Table Entry (PTE) (9 bits) <--------------------+ |
64 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
65 PDE1 (8 bits) <--------------------+ | | |
66 PDE2 (8 bits) <-----------+ | | | |
67 PDE3 (2 bits) <--+ | | | | |
68 ^ ^ ^ ^ ^ ^
69 Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
70
71 The following arrays merely represent different projections of Fig. 1, and
72 only one is strictly needed to reconstruct all the others. However, due to
73 the complexity of page tables, we include all of these to aid in readability.
74*/
75// How many nodes/entries per level in V2 of NVIDIA's page table format
76static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
77// Size in bytes of an entry at a particular level
78static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
79// Which bit index is the least significant in indexing each page level
80static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
81
82// Convert a GPU physical address to CPU virtual address via the PRAMIN window 42// Convert a GPU physical address to CPU virtual address via the PRAMIN window
83void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { 43void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
84 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); 44 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
@@ -90,6 +50,7 @@ void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
90} 50}
91*/ 51*/
92 52
53// Internal helper for search_page_directory().
93uint64_t search_page_directory_subtree(struct nvdebug_state *g, 54uint64_t search_page_directory_subtree(struct nvdebug_state *g,
94 void __iomem *pde_offset, 55 void __iomem *pde_offset,
95 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), 56 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
@@ -114,7 +75,7 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g,
114 } 75 }
115 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); 76 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
116 // Depth-first search of the page table 77 // Depth-first search of the page table
117 for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { 78 for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) {
118 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); 79 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
119 // off2addr can fail 80 // off2addr can fail
120 if (!next) { 81 if (!next) {
@@ -128,12 +89,17 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g,
128 return 0; 89 return 0;
129} 90}
130 91
131/* Search a page directory of the GPU MMU 92/* GPU Physical address -> Virtual address ("reverse" translation)
93
94 Depth-first search a page directory of the GPU MMU for where a particular
95 physical address is mapped. Upon finding a mapping, the virtual address is
96 returned.
97
132 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries 98 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries
133 @param off2addr Func to converts VRAM phys addresses to valid CPU VAs 99 @param off2addr Func to convert VRAM phys addresses to valid CPU VAs
134 @param addr_to_find Physical address to reconstruct the virtual address of 100 @param addr_to_find Physical address to reconstruct the virtual address of
135 @return 0 on error, otherwise the virtual address at which addr_to_find is 101 @return 0 on error, otherwise the virtual address at which addr_to_find is
136 mapped into by this page table. 102 mapped into by this page table. (Zero is not a valid virtual address)
137*/ 103*/
138uint64_t search_page_directory(struct nvdebug_state *g, 104uint64_t search_page_directory(struct nvdebug_state *g,
139 void __iomem *pde_offset, 105 void __iomem *pde_offset,
diff --git a/nvdebug.h b/nvdebug.h
index 39b2e6e..fcc6cff 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -856,13 +856,46 @@ typedef union {
856 uint64_t raw; 856 uint64_t raw;
857} page_dir_config_t; 857} page_dir_config_t;
858 858
859/* Page directory entry 859/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
860 860 straight-forward starting with Pascal ("page table version 2"), except for a
861 Note: Format changed with Pascal (how?) 861 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
862 862
863 Support: Pascal, Volta, Turing, Ampere, Ada 863 All you really need to know is that any given Page Directory Entry (PDE)
864 contains a pointer to the start of a 4k page densely filled with PDEs or Page
865 Table Entries (PTEs).
866
867 == Page Table Refresher ==
868 Page tables convert virtual addresses to physical addresses, and they do this
869 via a tree structure. Leafs (PTEs) contain a physical address, and the path
870 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
871 When decending, the virtual address is sliced into pieces, and one slice is
872 used at each level (as an index) to select the next-visited node (in level+1).
873
874 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
875 PTEs. How the virtual address is sliced to yield an index into each level and
876 a page offset is shown by Fig 1.
877
878 == Figure 1 ==
879 Page Offset (12 bits) <---------------------------------------+
880 Page Table Entry (PTE) (9 bits) <--------------------+ |
881 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
882 PDE1 (9 bits) <--------------------+ | | |
883 PDE2 (9 bits) <-----------+ | | | |
884 PDE3 (2 bits) <--+ | | | | |
885 ^ ^ ^ ^ ^ ^
886 Virtual addr: [48, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
887
888 The following arrays merely represent different projections of Fig. 1, and
889 only one is strictly needed to reconstruct all the others. However, due to
890 the complexity of page tables, we include all of these to aid in readability.
864*/ 891*/
865// FIXME: PDE/PTEs are actually 64 bits =S 892// How many nodes/entries per level in V2 of NVIDIA's page table format
893static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
894// Size in bytes of an entry at a particular level
895static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
896// Which bit index is the least significant in indexing each page level
897static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
898
866// Important: Aperture keys are different with PDEs 899// Important: Aperture keys are different with PDEs
867enum PD_TARGET { 900enum PD_TARGET {
868 PD_AND_TARGET_INVALID = 0, // b000 901 PD_AND_TARGET_INVALID = 0, // b000
@@ -894,25 +927,27 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) {
894 } 927 }
895} 928}
896 929
897// PDE/PTE V2 type 930// Page Directory Entry/Page Table Entry V2 type
898// Note: As the meaning of target (bits 2:1) changes depending on if the entry 931// Note: As the meaning of target (bits 2:1) changes depending on if the entry
899// is a PTE or not, this combines them into a single target field to 932// is a PTE or not, this combines them into a single target field to
900// simplify comparisons. 933// simplify comparisons.
901// Support: Pascal, Volta, Turing, Ampere, Ada 934// Support: Pascal, Volta, Turing, Ampere, Ada
902// 935//
903// V3 introduced with Hopper, but Hopper and Blackwell also support V2 936// V3 introduced with Hopper, but Hopper and Blackwell also support V2
937//
938// FIXME: This structure is 32 bits, but PDE/PTEs are actually 64 bits!
904typedef union { 939typedef union {
905 // Page Directory Entry (PDE) 940 // Page Directory Entry (PDE)
906 struct { 941 struct {
907 bool is_pte:1; 942 enum PD_TARGET target:3;
908 uint32_t __target:2;
909 bool is_volatile:1; 943 bool is_volatile:1;
910 uint32_t padding1:4; 944 uint32_t padding1:4;
911 uint32_t addr:24; 945 uint32_t addr:24;
912 } __attribute__((packed)); 946 } __attribute__((packed));
913 // Page Table Entry (PTE) 947 // Page Table Entry (PTE)
914 struct { 948 struct {
915 enum PD_TARGET target:3; 949 bool is_pte:1;
950 enum INST_TARGET aperture:2;
916 uint32_t __is_volatile:1; 951 uint32_t __is_volatile:1;
917 bool is_encrypted:1; 952 bool is_encrypted:1;
918 bool is_privileged:1; 953 bool is_privileged:1;
@@ -923,7 +958,7 @@ typedef union {
923 uint32_t raw; 958 uint32_t raw;
924} page_dir_entry_t; 959} page_dir_entry_t;
925 960
926// PDE/PTE V1 types 961// Page Directory Entry/Page Table Entry V1 type
927// Support: Fermi, Kepler, Maxwell 962// Support: Fermi, Kepler, Maxwell
928enum V1_PD_TARGET { 963enum V1_PD_TARGET {
929 PD_TARGET_INVALID = 0, 964 PD_TARGET_INVALID = 0,