aboutsummaryrefslogtreecommitdiffstats
path: root/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mmu.c')
-rw-r--r--mmu.c251
1 files changed, 251 insertions, 0 deletions
diff --git a/mmu.c b/mmu.c
new file mode 100644
index 0000000..26c7af5
--- /dev/null
+++ b/mmu.c
@@ -0,0 +1,251 @@
1// Helpers to deal with NVIDIA's MMU and associated page tables
2#include <linux/kernel.h> // Kernel types
3
4#include "nvdebug.h"
5
6/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
7 a configurable 1MB window into VRAM which is mapped into BAR0 (register)
8 space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
9 and appear to be used today to bootstrap page table configuration.
10
11 Why is it mapped at a location called NVIDIA Private RAM Instance? Because
12 this used to point to the entirety of intance RAM, which was seperate from
13 VRAM on older NVIDIA GPUs.
14*/
15
16/* Convert a physical VRAM address to an offset in the PRAMIN window
17 @param addr VRAM address to convert
18 @return 0 on error, PRAMIN offset on success
19
20 Note: Use off2PRAMIN() instead if you want a dereferenceable address
21*/
22uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
23 uint64_t pramin_base_va;
24 bar0_window_t window;
25 window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
26 // Check if the address is valid (49 bits are addressable on-GPU)
27 if (addr & ~0x0001ffffffffffff) {
28 printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
29 addr, __func__);
30 return 0;
31 }
32 // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
33 if (window.target != TARGET_VID_MEM)
34 return 0;
35 pramin_base_va = ((uint64_t)window.base) << 16;
36 // Protect against out-of-bounds accesses
37 if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN)
38 return 0;
39 return addr - pramin_base_va;
40}
41
42/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
43 straight-forward starting with Pascal ("page table version 2"), except for a
44 few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
45
46 All you really need to know is that any given Page Directory Entry (PDE)
47 contains a pointer to the start of a 4k page densely filled with PDEs or Page
48 Table Entries (PTEs).
49
50 == Page Table Refresher ==
51 Page tables convert virtual addresses to physical addresses, and they do this
52 via a tree structure. Leafs (PTEs) contain a physical address, and the path
53 from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
54 When decending, the virtual address is sliced into pieces, and one slice is
55 used at each level (as an index) to select the next-visited node (in level+1).
56
57 V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
58 PTEs. How the virtual address is sliced to yield an index into each level and
59 a page offset is shown by Fig 1.
60
61 == Figure 1 ==
62 Page Offset (12 bits) <---------------------------------------+
63 Page Table Entry (PTE) (9 bits) <--------------------+ |
64 Page Directory Entry (PDE) 0 (8 bits) <-----+ | |
65 PDE1 (8 bits) <--------------------+ | | |
66 PDE2 (8 bits) <-----------+ | | | |
67 PDE3 (2 bits) <--+ | | | | |
68 ^ ^ ^ ^ ^ ^
69 Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
70
71 The following arrays merely represent different projections of Fig. 1, and
72 only one is strictly needed to reconstruct all the others. However, due to
73 the complexity of page tables, we include all of these to aid in readability.
74*/
75// How many nodes/entries per level in V2 of NVIDIA's page table format
76static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
77// Size in bytes of an entry at a particular level
78static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
79// Which bit index is the least significant in indexing each page level
80static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
81
82// Convert a GPU physical address to CPU virtual address via the PRAMIN window
83void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
84 return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
85}
86
87/* FIXME
88void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
89 return g->bar2 + off;
90}
91*/
92
93uint64_t search_page_directory_subtree(struct nvdebug_state *g,
94 void __iomem *pde_offset,
95 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
96 uint64_t addr_to_find,
97 uint32_t level) {
98 uint64_t res, i;
99 void __iomem *next;
100 page_dir_entry_t entry;
101 if (level > sizeof(NV_MMU_PT_V2_SZ))
102 return 0;
103 // Hack to workaround PDE0 being double-size and strangely formatted
104 if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
105 pde_offset += 8;
106 entry.raw = readl(pde_offset);
107 // If we reached an invalid (unpopulated) PDE, walk back up the tree
108 if (entry.target == PD_AND_TARGET_INVALID)
109 return 0;
110 // Succeed when we reach a PTE with the address we want
111 if (entry.is_pte) {
112 printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw);
113 return (uint64_t)entry.addr << 12 == addr_to_find;
114 }
115 printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
116 // Depth-first search of the page table
117 for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) {
118 next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
119 // off2addr can fail
120 if (!next) {
121 printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
122 return 0;
123 }
124 res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
125 if (res)
126 return res | (i << NV_MMU_PT_V2_LSB[level + 1]);
127 }
128 return 0;
129}
130
131/* Search a page directory of the GPU MMU
132 @param pde_offset Dereferenceable pointer to the start of the PDE3 entries
133 @param off2addr Func to converts VRAM phys addresses to valid CPU VAs
134 @param addr_to_find Physical address to reconstruct the virtual address of
135 @return 0 on error, otherwise the virtual address at which addr_to_find is
136 mapped into by this page table.
137*/
138uint64_t search_page_directory(struct nvdebug_state *g,
139 void __iomem *pde_offset,
140 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
141 uint64_t addr_to_find) {
142 uint64_t res, i;
143 // Make sure that the query is page-aligned
144 if (addr_to_find & 0xfff) {
145 printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
146 return 0;
147 }
148 // Search the top-level page directory (PDE3)
149 for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
150 if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
151 return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]);
152 return 0;
153}
154
155/* GMMU Page Tables Version 1
156 This page table only contains 2 levels and is used in the Fermi, Kepler, and
157 Maxwell architectures
158*/
159// Number of entries in the PDE and PTE levels
160static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!!
161// Which bit index is the least significant in indexing each page level
162static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!!
163uint64_t search_v1_page_directory(struct nvdebug_state *g,
164 void __iomem *pde_offset,
165 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
166 uint64_t addr_to_find) {
167 uint64_t j, i = 0;
168 page_dir_entry_v1_t pde;
169 page_tbl_entry_v1_t pte;
170 void __iomem *pte_offset;
171 // For each PDE
172 do {
173 // readq doesn't seem to work on BAR0
174 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
175 pde.raw <<= 32;
176 pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
177 // Verify PDE is present
178 if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
179 continue;
180 // Convert to a dereferencable pointer from CPU virtual address space
181 pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
182 if (!pte_offset)
183 continue;
184// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
185// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
186 // For each PTE
187 for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
188 // Don't overrun the PRAMIN window
189 if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
190 return 0;
191 pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
192 pte.raw <<= 32;
193 pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
194 // Skip non-present PTEs
195 if (!pte.is_present)
196 continue;
197// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
198 // If we find a matching PTE, return its virtual address
199 if ((uint64_t)pte.addr << 12 == addr_to_find)
200 return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
201
202 }
203 } while (++i < NV_MMU_PT_V1_SZ[0]);
204 return 0;
205}
206
207/* GMMU Page Tables Version 0
208 This page table only contains 2 levels and is used in the Tesla architecture
209*/
210/* *** UNTESTED ***
211#define NV_MMU_PT_V0_SZ 2048
212#define NV_MMU_PT_V0_LSB 29
213uint64_t search_v0_page_directory(struct nvdebug_state *g,
214 void __iomem *pde_offset,
215 void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t),
216 uint32_t addr_to_find) {
217 int j, i = 0;
218 page_dir_entry_v0_t pde;
219 page_tbl_entry_v0_t pte;
220 void __iomem *pte_offset;
221 // For each PDE
222 do {
223 // readq doesn't seem to work on BAR0
224 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
225 pde.raw <<= 32;
226 pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
227 //if (pde.raw)
228 //printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
229 // Skip unpopulated PDEs
230 if (pde.type == NOT_PRESENT)
231 continue;
232 //printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
233 pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
234 // For each PTE
235 for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
236 pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
237 pte.raw <<= 32;
238 pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
239 // Skip non-present PTEs
240 if (!pte.is_present)
241 continue;
242 // If we find a matching PTE, return its virtual address
243 //if (pte.addr != 0x5555555)
244 // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
245 if (pte.addr << 12 == addr_to_find)
246 return i << NV_MMU_PT_V0_LSB | j << 12;
247 }
248 } while (++i < NV_MMU_PT_V0_SZ);
249 return 0; // No match
250}
251*/