diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:36:49 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:36:49 -0400 |
commit | 8f9ed4c3b1f0e438107035147b5aa43fdcd66165 (patch) | |
tree | a28f2237f85ac9e85cf9837644160e0630deaf18 /mmu.c | |
parent | 4768fe31f114c5ad788012db5518ce8e37f79c7a (diff) |
Fix an off-by-one error in V2 reverse page table lookups
This would occationally manifest as an inability to find the runlist
page in BAR2, as only part of the page table was being traversed.
Also includes non-functional changes to documentation, scoping, and
structure layout.
Diffstat (limited to 'mmu.c')
-rw-r--r-- | mmu.c | 54 |
1 files changed, 10 insertions, 44 deletions
@@ -39,46 +39,6 @@ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { | |||
39 | return addr - pramin_base_va; | 39 | return addr - pramin_base_va; |
40 | } | 40 | } |
41 | 41 | ||
42 | /* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly | ||
43 | straight-forward starting with Pascal ("page table version 2"), except for a | ||
44 | few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes). | ||
45 | |||
46 | All you really need to know is that any given Page Directory Entry (PDE) | ||
47 | contains a pointer to the start of a 4k page densely filled with PDEs or Page | ||
48 | Table Entries (PTEs). | ||
49 | |||
50 | == Page Table Refresher == | ||
51 | Page tables convert virtual addresses to physical addresses, and they do this | ||
52 | via a tree structure. Leafs (PTEs) contain a physical address, and the path | ||
53 | from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs. | ||
54 | When decending, the virtual address is sliced into pieces, and one slice is | ||
55 | used at each level (as an index) to select the next-visited node (in level+1). | ||
56 | |||
57 | V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of | ||
58 | PTEs. How the virtual address is sliced to yield an index into each level and | ||
59 | a page offset is shown by Fig 1. | ||
60 | |||
61 | == Figure 1 == | ||
62 | Page Offset (12 bits) <---------------------------------------+ | ||
63 | Page Table Entry (PTE) (9 bits) <--------------------+ | | ||
64 | Page Directory Entry (PDE) 0 (8 bits) <-----+ | | | ||
65 | PDE1 (8 bits) <--------------------+ | | | | ||
66 | PDE2 (8 bits) <-----------+ | | | | | ||
67 | PDE3 (2 bits) <--+ | | | | | | ||
68 | ^ ^ ^ ^ ^ ^ | ||
69 | Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0] | ||
70 | |||
71 | The following arrays merely represent different projections of Fig. 1, and | ||
72 | only one is strictly needed to reconstruct all the others. However, due to | ||
73 | the complexity of page tables, we include all of these to aid in readability. | ||
74 | */ | ||
75 | // How many nodes/entries per level in V2 of NVIDIA's page table format | ||
76 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | ||
77 | // Size in bytes of an entry at a particular level | ||
78 | static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8}; | ||
79 | // Which bit index is the least significant in indexing each page level | ||
80 | static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12}; | ||
81 | |||
82 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window | 42 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window |
83 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { | 43 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { |
84 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); | 44 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); |
@@ -90,6 +50,7 @@ void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { | |||
90 | } | 50 | } |
91 | */ | 51 | */ |
92 | 52 | ||
53 | // Internal helper for search_page_directory(). | ||
93 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 54 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
94 | void __iomem *pde_offset, | 55 | void __iomem *pde_offset, |
95 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | 56 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), |
@@ -114,7 +75,7 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
114 | } | 75 | } |
115 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); | 76 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); |
116 | // Depth-first search of the page table | 77 | // Depth-first search of the page table |
117 | for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { | 78 | for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { |
118 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); | 79 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); |
119 | // off2addr can fail | 80 | // off2addr can fail |
120 | if (!next) { | 81 | if (!next) { |
@@ -128,12 +89,17 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
128 | return 0; | 89 | return 0; |
129 | } | 90 | } |
130 | 91 | ||
131 | /* Search a page directory of the GPU MMU | 92 | /* GPU Physical address -> Virtual address ("reverse" translation) |
93 | |||
94 | Depth-first search a page directory of the GPU MMU for where a particular | ||
95 | physical address is mapped. Upon finding a mapping, the virtual address is | ||
96 | returned. | ||
97 | |||
132 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries | 98 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries |
133 | @param off2addr Func to converts VRAM phys addresses to valid CPU VAs | 99 | @param off2addr Func to convert VRAM phys addresses to valid CPU VAs |
134 | @param addr_to_find Physical address to reconstruct the virtual address of | 100 | @param addr_to_find Physical address to reconstruct the virtual address of |
135 | @return 0 on error, otherwise the virtual address at which addr_to_find is | 101 | @return 0 on error, otherwise the virtual address at which addr_to_find is |
136 | mapped into by this page table. | 102 | mapped into by this page table. (Zero is not a valid virtual address) |
137 | */ | 103 | */ |
138 | uint64_t search_page_directory(struct nvdebug_state *g, | 104 | uint64_t search_page_directory(struct nvdebug_state *g, |
139 | void __iomem *pde_offset, | 105 | void __iomem *pde_offset, |