aboutsummaryrefslogtreecommitdiffstats
path: root/mmu.c
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:36:49 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-09 13:36:49 -0400
commit8f9ed4c3b1f0e438107035147b5aa43fdcd66165 (patch)
treea28f2237f85ac9e85cf9837644160e0630deaf18 /mmu.c
parent4768fe31f114c5ad788012db5518ce8e37f79c7a (diff)
Fix an off-by-one error in V2 reverse page table lookups
This would occationally manifest as an inability to find the runlist page in BAR2, as only part of the page table was being traversed. Also includes non-functional changes to documentation, scoping, and structure layout.
Diffstat (limited to 'mmu.c')
-rw-r--r--mmu.c54
1 files changed, 10 insertions, 44 deletions
diff --git a/mmu.c b/mmu.c
index 4881f66..23adaf2 100644
--- a/mmu.c
+++ b/mmu.c
@@ -39,46 +39,6 @@ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
39 return addr - pramin_base_va; 39 return addr - pramin_base_va;
40} 40}
41 41
42/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
43 straight-forward starting with Pascal ("page table version 2"), except for a
44 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
45
46 All you really need to know is that any given Page Directory Entry (PDE)
47 contains a pointer to the start of a 4k page densely filled with PDEs or Page
48 Table Entries (PTEs).
49
50 == Page Table Refresher ==
51 Page tables convert virtual addresses to physical addresses, and they do this
52 via a tree structure. Leafs (PTEs) contain a physical address, and the path
53 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
54 When decending, the virtual address is sliced into pieces, and one slice is
55 used at each level (as an index) to select the next-visited node (in level+1).
56
57 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
58 PTEs. How the virtual address is sliced to yield an index into each level and
59 a page offset is shown by Fig 1.
60
61 == Figure 1 ==
62 Page Offset (12 bits) <---------------------------------------+
63 Page Table Entry (PTE) (9 bits) <--------------------+ |
64 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
65 PDE1 (8 bits) <--------------------+ | | |
66 PDE2 (8 bits) <-----------+ | | | |
67 PDE3 (2 bits) <--+ | | | | |
68 ^ ^ ^ ^ ^ ^
69 Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
70
71 The following arrays merely represent different projections of Fig. 1, and
72 only one is strictly needed to reconstruct all the others. However, due to
73 the complexity of page tables, we include all of these to aid in readability.
74*/
75// How many nodes/entries per level in V2 of NVIDIA's page table format
76static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
77// Size in bytes of an entry at a particular level
78static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
79// Which bit index is the least significant in indexing each page level
80static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
81
82// Convert a GPU physical address to CPU virtual address via the PRAMIN window 42// Convert a GPU physical address to CPU virtual address via the PRAMIN window
83void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { 43void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
84 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); 44 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
@@ -90,6 +50,7 @@ void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
90} 50}
91*/ 51*/
92 52
53// Internal helper for search_page_directory().
93uint64_t search_page_directory_subtree(struct nvdebug_state *g, 54uint64_t search_page_directory_subtree(struct nvdebug_state *g,
94 void __iomem *pde_offset, 55 void __iomem *pde_offset,
95 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), 56 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
@@ -114,7 +75,7 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g,
114 } 75 }
115 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); 76 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
116 // Depth-first search of the page table 77 // Depth-first search of the page table
117 for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { 78 for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) {
118 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); 79 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
119 // off2addr can fail 80 // off2addr can fail
120 if (!next) { 81 if (!next) {
@@ -128,12 +89,17 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g,
128 return 0; 89 return 0;
129} 90}
130 91
131/* Search a page directory of the GPU MMU 92/* GPU Physical address -> Virtual address ("reverse" translation)
93
94 Depth-first search a page directory of the GPU MMU for where a particular
95 physical address is mapped. Upon finding a mapping, the virtual address is
96 returned.
97
132 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries 98 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries
133 @param off2addr Func to converts VRAM phys addresses to valid CPU VAs 99 @param off2addr Func to convert VRAM phys addresses to valid CPU VAs
134 @param addr_to_find Physical address to reconstruct the virtual address of 100 @param addr_to_find Physical address to reconstruct the virtual address of
135 @return 0 on error, otherwise the virtual address at which addr_to_find is 101 @return 0 on error, otherwise the virtual address at which addr_to_find is
136 mapped into by this page table. 102 mapped into by this page table. (Zero is not a valid virtual address)
137*/ 103*/
138uint64_t search_page_directory(struct nvdebug_state *g, 104uint64_t search_page_directory(struct nvdebug_state *g,
139 void __iomem *pde_offset, 105 void __iomem *pde_offset,