diff options
Diffstat (limited to 'mmu.c')
-rw-r--r-- | mmu.c | 54 |
1 files changed, 10 insertions, 44 deletions
@@ -39,46 +39,6 @@ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { | |||
39 | return addr - pramin_base_va; | 39 | return addr - pramin_base_va; |
40 | } | 40 | } |
41 | 41 | ||
42 | /* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly | ||
43 | straight-forward starting with Pascal ("page table version 2"), except for a | ||
44 | few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes). | ||
45 | |||
46 | All you really need to know is that any given Page Directory Entry (PDE) | ||
47 | contains a pointer to the start of a 4k page densely filled with PDEs or Page | ||
48 | Table Entries (PTEs). | ||
49 | |||
50 | == Page Table Refresher == | ||
51 | Page tables convert virtual addresses to physical addresses, and they do this | ||
52 | via a tree structure. Leafs (PTEs) contain a physical address, and the path | ||
53 | from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs. | ||
54 | When decending, the virtual address is sliced into pieces, and one slice is | ||
55 | used at each level (as an index) to select the next-visited node (in level+1). | ||
56 | |||
57 | V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of | ||
58 | PTEs. How the virtual address is sliced to yield an index into each level and | ||
59 | a page offset is shown by Fig 1. | ||
60 | |||
61 | == Figure 1 == | ||
62 | Page Offset (12 bits) <---------------------------------------+ | ||
63 | Page Table Entry (PTE) (9 bits) <--------------------+ | | ||
64 | Page Directory Entry (PDE) 0 (8 bits) <-----+ | | | ||
65 | PDE1 (8 bits) <--------------------+ | | | | ||
66 | PDE2 (8 bits) <-----------+ | | | | | ||
67 | PDE3 (2 bits) <--+ | | | | | | ||
68 | ^ ^ ^ ^ ^ ^ | ||
69 | Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0] | ||
70 | |||
71 | The following arrays merely represent different projections of Fig. 1, and | ||
72 | only one is strictly needed to reconstruct all the others. However, due to | ||
73 | the complexity of page tables, we include all of these to aid in readability. | ||
74 | */ | ||
75 | // How many nodes/entries per level in V2 of NVIDIA's page table format | ||
76 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | ||
77 | // Size in bytes of an entry at a particular level | ||
78 | static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8}; | ||
79 | // Which bit index is the least significant in indexing each page level | ||
80 | static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12}; | ||
81 | |||
82 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window | 42 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window |
83 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { | 43 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { |
84 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); | 44 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); |
@@ -90,6 +50,7 @@ void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { | |||
90 | } | 50 | } |
91 | */ | 51 | */ |
92 | 52 | ||
53 | // Internal helper for search_page_directory(). | ||
93 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 54 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
94 | void __iomem *pde_offset, | 55 | void __iomem *pde_offset, |
95 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | 56 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), |
@@ -114,7 +75,7 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
114 | } | 75 | } |
115 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); | 76 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); |
116 | // Depth-first search of the page table | 77 | // Depth-first search of the page table |
117 | for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { | 78 | for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { |
118 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); | 79 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); |
119 | // off2addr can fail | 80 | // off2addr can fail |
120 | if (!next) { | 81 | if (!next) { |
@@ -128,12 +89,17 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
128 | return 0; | 89 | return 0; |
129 | } | 90 | } |
130 | 91 | ||
131 | /* Search a page directory of the GPU MMU | 92 | /* GPU Physical address -> Virtual address ("reverse" translation) |
93 | |||
94 | Depth-first search a page directory of the GPU MMU for where a particular | ||
95 | physical address is mapped. Upon finding a mapping, the virtual address is | ||
96 | returned. | ||
97 | |||
132 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries | 98 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries |
133 | @param off2addr Func to converts VRAM phys addresses to valid CPU VAs | 99 | @param off2addr Func to convert VRAM phys addresses to valid CPU VAs |
134 | @param addr_to_find Physical address to reconstruct the virtual address of | 100 | @param addr_to_find Physical address to reconstruct the virtual address of |
135 | @return 0 on error, otherwise the virtual address at which addr_to_find is | 101 | @return 0 on error, otherwise the virtual address at which addr_to_find is |
136 | mapped into by this page table. | 102 | mapped into by this page table. (Zero is not a valid virtual address) |
137 | */ | 103 | */ |
138 | uint64_t search_page_directory(struct nvdebug_state *g, | 104 | uint64_t search_page_directory(struct nvdebug_state *g, |
139 | void __iomem *pde_offset, | 105 | void __iomem *pde_offset, |