1 files changed, 251 insertions, 0 deletions
diff --git a/mmu.c b/mmu.c
new file mode 100644
index 0000000..26c7af5
--- /dev/null
+++ b/mmu.c
@@ -0,0 +1,251 @@
+// Helpers to deal with NVIDIA's MMU and associated page tables
+#include <linux/kernel.h>  // Kernel types
+#include "nvdebug.h"
+/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
+  a configurable 1MB window into VRAM which is mapped into BAR0 (register)
+  space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
+  and appear to be used today to bootstrap page table configuration.
+  Why is it mapped at a location called NVIDIA Private RAM Instance? Because
+  this used to point to the entirety of intance RAM, which was seperate from
+  VRAM on older NVIDIA GPUs.
+*/
+/* Convert a physical VRAM address to an offset in the PRAMIN window
+  @param addr VRAM address to convert
+  @return 0 on error, PRAMIN offset on success
+  Note: Use off2PRAMIN() instead if you want a dereferenceable address
+*/
+uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
+        uint64_t pramin_base_va;
+        bar0_window_t window;
+        window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
+        // Check if the address is valid (49 bits are addressable on-GPU)
+        if (addr & ~0x0001ffffffffffff) {
+                printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
+                       addr, __func__);
+                return 0;
+        }
+        // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
+        if (window.target != TARGET_VID_MEM)
+                return 0;
+        pramin_base_va = ((uint64_t)window.base) << 16;
+        // Protect against out-of-bounds accesses
+        if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN)
+                return 0;
+        return addr - pramin_base_va;
+}
+/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
+  straight-forward starting with Pascal ("page table version 2"), except for a
+  few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
+  All you really need to know is that any given Page Directory Entry (PDE)
+  contains a pointer to the start of a 4k page densely filled with PDEs or Page
+  Table Entries (PTEs).
+  == Page Table Refresher ==
+  Page tables convert virtual addresses to physical addresses, and they do this
+  via a tree structure. Leafs (PTEs) contain a physical address, and the path
+  from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
+  When decending, the virtual address is sliced into pieces, and one slice is
+  used at each level (as an index) to select the next-visited node (in level+1).
+  V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
+  PTEs. How the virtual address is sliced to yield an index into each level and
+  a page offset is shown by Fig 1.
+  == Figure 1 ==
+  Page Offset (12 bits) <---------------------------------------+
+  Page Table Entry (PTE) (9 bits) <--------------------+        |
+  Page Directory Entry (PDE) 0 (8 bits) <-----+        |        |
+  PDE1 (8 bits) <--------------------+        |        |        |
+  PDE2 (8 bits) <-----------+        |        |        |        |
+  PDE3 (2 bits) <--+        |        |        |        |        |
+                   ^        ^        ^        ^        ^        ^
+  Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
+  The following arrays merely represent different projections of Fig. 1, and
+  only one is strictly needed to reconstruct all the others. However, due to
+  the complexity of page tables, we include all of these to aid in readability.
+*/
+// How many nodes/entries per level in V2 of NVIDIA's page table format
+static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
+// Size in bytes of an entry at a particular level
+static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
+// Which bit index is the least significant in indexing each page level
+static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
+// Convert a GPU physical address to CPU virtual address via the PRAMIN window
+void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
+        return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
+}
+/* FIXME
+void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
+        return g->bar2 + off;
+}
+*/
+uint64_t search_page_directory_subtree(struct nvdebug_state *g,
+                                       void __iomem *pde_offset,
+                                       void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
+                                       uint64_t addr_to_find,
+                                       uint32_t level) {
+        uint64_t res, i;
+        void __iomem *next;
+        page_dir_entry_t entry;
+        if (level > sizeof(NV_MMU_PT_V2_SZ))
+                return 0;
+        // Hack to workaround PDE0 being double-size and strangely formatted
+        if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
+                pde_offset += 8;
+        entry.raw = readl(pde_offset);
+        // If we reached an invalid (unpopulated) PDE, walk back up the tree
+        if (entry.target == PD_AND_TARGET_INVALID)
+                return 0;
+        // Succeed when we reach a PTE with the address we want
+        if (entry.is_pte) {
+                printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw);
+                return (uint64_t)entry.addr << 12 == addr_to_find;
+        }
+        printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
+        // Depth-first search of the page table
+        for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) {
+                next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
+                // off2addr can fail
+                if (!next) {
+                        printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
+                        return 0;
+                }
+                res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
+                if (res)
+                        return res | (i << NV_MMU_PT_V2_LSB[level + 1]);
+        }
+        return 0;
+}
+/* Search a page directory of the GPU MMU
+  @param pde_offset   Dereferenceable pointer to the start of the PDE3 entries
+  @param off2addr     Func to converts VRAM phys addresses to valid CPU VAs
+  @param addr_to_find Physical address to reconstruct the virtual address of
+  @return 0 on error, otherwise the virtual address at which addr_to_find is
+          mapped into by this page table.
+*/
+uint64_t search_page_directory(struct nvdebug_state *g,
+                               void __iomem *pde_offset,
+                               void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
+                               uint64_t addr_to_find) {
+        uint64_t res, i;
+        // Make sure that the query is page-aligned
+        if (addr_to_find & 0xfff) {
+                printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
+                return 0;
+        }
+        // Search the top-level page directory (PDE3)
+        for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
+                if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
+                        return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]);
+        return 0;
+}
+/* GMMU Page Tables Version 1
+  This page table only contains 2 levels and is used in the Fermi, Kepler, and
+  Maxwell architectures
+*/
+// Number of entries in the PDE and PTE levels
+static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13};  // 2<<13 is an educated guess!!!
+// Which bit index is the least significant in indexing each page level
+static const int NV_MMU_PT_V1_LSB[2] = {25, 12};  // 25 is an educated guess!!!
+uint64_t search_v1_page_directory(struct nvdebug_state *g,
+                                  void __iomem *pde_offset,
+                                  void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
+                                  uint64_t addr_to_find) {
+        uint64_t j, i = 0;
+        page_dir_entry_v1_t pde;
+        page_tbl_entry_v1_t pte;
+        void __iomem *pte_offset;
+        // For each PDE
+        do {
+                // readq doesn't seem to work on BAR0
+                pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
+                pde.raw <<= 32;
+                pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
+                // Verify PDE is present
+                if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
+                        continue;
+                // Convert to a dereferencable pointer from CPU virtual address space
+                pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
+                if (!pte_offset)
+                        continue;
+//              printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
+//              printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
+                // For each PTE
+                for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
+                        // Don't overrun the PRAMIN window
+                        if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
+                                return 0;
+                        pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
+                        pte.raw <<= 32;
+                        pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
+                        // Skip non-present PTEs
+                        if (!pte.is_present)
+                                continue;
+//                      printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
+                        // If we find a matching PTE, return its virtual address
+                        if ((uint64_t)pte.addr << 12 == addr_to_find)
+                                return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
+                        
+                }
+        } while (++i < NV_MMU_PT_V1_SZ[0]);
+        return 0;
+}
+/* GMMU Page Tables Version 0
+  This page table only contains 2 levels and is used in the Tesla architecture
+*/
+/* *** UNTESTED ***
+#define NV_MMU_PT_V0_SZ 2048
+#define NV_MMU_PT_V0_LSB 29
+uint64_t search_v0_page_directory(struct nvdebug_state *g,
+                                  void __iomem *pde_offset,
+                                  void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t),
+                                  uint32_t addr_to_find) {
+        int j, i = 0;
+        page_dir_entry_v0_t pde;
+        page_tbl_entry_v0_t pte;
+        void __iomem *pte_offset;
+        // For each PDE
+        do {
+                // readq doesn't seem to work on BAR0
+                pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
+                pde.raw <<= 32;
+                pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
+                //if (pde.raw)
+                //printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
+                // Skip unpopulated PDEs
+                if (pde.type == NOT_PRESENT)
+                        continue;
+                //printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
+                pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
+                // For each PTE
+                for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
+                        pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
+                        pte.raw <<= 32;
+                        pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
+                        // Skip non-present PTEs
+                        if (!pte.is_present)
+                                continue;
+                        // If we find a matching PTE, return its virtual address
+                        //if (pte.addr != 0x5555555)
+                        //      printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
+                        if (pte.addr << 12 == addr_to_find)
+                                return i << NV_MMU_PT_V0_LSB | j << 12;
+                }
+        } while (++i < NV_MMU_PT_V0_SZ);
+        return 0;  // No match
+}
+*/

diff --git a/mmu.c b/mmu.c new file mode 100644 index 0000000..26c7af5 --- /dev/null +++ b/mmu.c
@@ -0,0 +1,251 @@
	1	// Helpers to deal with NVIDIA's MMU and associated page tables
	2	#include <linux/kernel.h> // Kernel types
	3
	4	#include "nvdebug.h"
	5
	6	/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
	7	a configurable 1MB window into VRAM which is mapped into BAR0 (register)
	8	space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
	9	and appear to be used today to bootstrap page table configuration.
	10
	11	Why is it mapped at a location called NVIDIA Private RAM Instance? Because
	12	this used to point to the entirety of intance RAM, which was seperate from
	13	VRAM on older NVIDIA GPUs.
	14	*/
	15
	16	/* Convert a physical VRAM address to an offset in the PRAMIN window
	17	@param addr VRAM address to convert
	18	@return 0 on error, PRAMIN offset on success
	19
	20	Note: Use off2PRAMIN() instead if you want a dereferenceable address
	21	*/
	22	uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
	23	uint64_t pramin_base_va;
	24	bar0_window_t window;
	25	window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
	26	// Check if the address is valid (49 bits are addressable on-GPU)
	27	if (addr & ~0x0001ffffffffffff) {
	28	printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
	29	addr, __func__);
	30	return 0;
	31	}
	32	// For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
	33	if (window.target != TARGET_VID_MEM)
	34	return 0;
	35	pramin_base_va = ((uint64_t)window.base) << 16;
	36	// Protect against out-of-bounds accesses
	37	if (addr < pramin_base_va \|\| addr > pramin_base_va + NV_PRAMIN_LEN)
	38	return 0;
	39	return addr - pramin_base_va;
	40	}
	41
	42	/* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly
	43	straight-forward starting with Pascal ("page table version 2"), except for a
	44	few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes).
	45
	46	All you really need to know is that any given Page Directory Entry (PDE)
	47	contains a pointer to the start of a 4k page densely filled with PDEs or Page
	48	Table Entries (PTEs).
	49
	50	== Page Table Refresher ==
	51	Page tables convert virtual addresses to physical addresses, and they do this
	52	via a tree structure. Leafs (PTEs) contain a physical address, and the path
	53	from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs.
	54	When decending, the virtual address is sliced into pieces, and one slice is
	55	used at each level (as an index) to select the next-visited node (in level+1).
	56
	57	V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of
	58	PTEs. How the virtual address is sliced to yield an index into each level and
	59	a page offset is shown by Fig 1.
	60
	61	== Figure 1 ==
	62	Page Offset (12 bits) <---------------------------------------+
	63	Page Table Entry (PTE) (9 bits) <--------------------+ \|
	64	Page Directory Entry (PDE) 0 (8 bits) <-----+ \| \|
	65	PDE1 (8 bits) <--------------------+ \| \| \|
	66	PDE2 (8 bits) <-----------+ \| \| \| \|
	67	PDE3 (2 bits) <--+ \| \| \| \| \|
	68	^ ^ ^ ^ ^ ^
	69	Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0]
	70
	71	The following arrays merely represent different projections of Fig. 1, and
	72	only one is strictly needed to reconstruct all the others. However, due to
	73	the complexity of page tables, we include all of these to aid in readability.
	74	*/
	75	// How many nodes/entries per level in V2 of NVIDIA's page table format
	76	static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
	77	// Size in bytes of an entry at a particular level
	78	static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8};
	79	// Which bit index is the least significant in indexing each page level
	80	static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12};
	81
	82	// Convert a GPU physical address to CPU virtual address via the PRAMIN window
	83	void __iomem phy2PRAMIN(struct nvdebug_state g, uint64_t phy) {
	84	return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
	85	}
	86
	87	/* FIXME
	88	void __iomem off2BAR2(struct nvdebug_state g, uint32_t off) {
	89	return g->bar2 + off;
	90	}
	91	*/
	92
	93	uint64_t search_page_directory_subtree(struct nvdebug_state *g,
	94	void __iomem *pde_offset,
	95	void __iomem (off2addr)(struct nvdebug_state*, uint64_t),
	96	uint64_t addr_to_find,
	97	uint32_t level) {
	98	uint64_t res, i;
	99	void __iomem *next;
	100	page_dir_entry_t entry;
	101	if (level > sizeof(NV_MMU_PT_V2_SZ))
	102	return 0;
	103	// Hack to workaround PDE0 being double-size and strangely formatted
	104	if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
	105	pde_offset += 8;
	106	entry.raw = readl(pde_offset);
	107	// If we reached an invalid (unpopulated) PDE, walk back up the tree
	108	if (entry.target == PD_AND_TARGET_INVALID)
	109	return 0;
	110	// Succeed when we reach a PTE with the address we want
	111	if (entry.is_pte) {
	112	printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw);
	113	return (uint64_t)entry.addr << 12 == addr_to_find;
	114	}
	115	printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw);
	116	// Depth-first search of the page table
	117	for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) {
	118	next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
	119	// off2addr can fail
	120	if (!next) {
	121	printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
	122	return 0;
	123	}
	124	res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
	125	if (res)
	126	return res \| (i << NV_MMU_PT_V2_LSB[level + 1]);
	127	}
	128	return 0;
	129	}
	130
	131	/* Search a page directory of the GPU MMU
	132	@param pde_offset Dereferenceable pointer to the start of the PDE3 entries
	133	@param off2addr Func to converts VRAM phys addresses to valid CPU VAs
	134	@param addr_to_find Physical address to reconstruct the virtual address of
	135	@return 0 on error, otherwise the virtual address at which addr_to_find is
	136	mapped into by this page table.
	137	*/
	138	uint64_t search_page_directory(struct nvdebug_state *g,
	139	void __iomem *pde_offset,
	140	void __iomem (off2addr)(struct nvdebug_state*, uint64_t),
	141	uint64_t addr_to_find) {
	142	uint64_t res, i;
	143	// Make sure that the query is page-aligned
	144	if (addr_to_find & 0xfff) {
	145	printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
	146	return 0;
	147	}
	148	// Search the top-level page directory (PDE3)
	149	for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
	150	if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
	151	return (res & ~0xfff) \| (i << NV_MMU_PT_V2_LSB[0]);
	152	return 0;
	153	}
	154
	155	/* GMMU Page Tables Version 1
	156	This page table only contains 2 levels and is used in the Fermi, Kepler, and
	157	Maxwell architectures
	158	*/
	159	// Number of entries in the PDE and PTE levels
	160	static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!!
	161	// Which bit index is the least significant in indexing each page level
	162	static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!!
	163	uint64_t search_v1_page_directory(struct nvdebug_state *g,
	164	void __iomem *pde_offset,
	165	void __iomem (off2addr)(struct nvdebug_state*, uint64_t),
	166	uint64_t addr_to_find) {
	167	uint64_t j, i = 0;
	168	page_dir_entry_v1_t pde;
	169	page_tbl_entry_v1_t pte;
	170	void __iomem *pte_offset;
	171	// For each PDE
	172	do {
	173	// readq doesn't seem to work on BAR0
	174	pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
	175	pde.raw <<= 32;
	176	pde.raw \|= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
	177	// Verify PDE is present
	178	if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
	179	continue;
	180	// Convert to a dereferencable pointer from CPU virtual address space
	181	pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
	182	if (!pte_offset)
	183	continue;
	184	// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
	185	// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
	186	// For each PTE
	187	for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
	188	// Don't overrun the PRAMIN window
	189	if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
	190	return 0;
	191	pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
	192	pte.raw <<= 32;
	193	pte.raw \|= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
	194	// Skip non-present PTEs
	195	if (!pte.is_present)
	196	continue;
	197	// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
	198	// If we find a matching PTE, return its virtual address
	199	if ((uint64_t)pte.addr << 12 == addr_to_find)
	200	return i << NV_MMU_PT_V1_LSB[0] \| j << NV_MMU_PT_V1_LSB[1];
	201
	202	}
	203	} while (++i < NV_MMU_PT_V1_SZ[0]);
	204	return 0;
	205	}
	206
	207	/* GMMU Page Tables Version 0
	208	This page table only contains 2 levels and is used in the Tesla architecture
	209	*/
	210	/* * UNTESTED *
	211	#define NV_MMU_PT_V0_SZ 2048
	212	#define NV_MMU_PT_V0_LSB 29
	213	uint64_t search_v0_page_directory(struct nvdebug_state *g,
	214	void __iomem *pde_offset,
	215	void __iomem (off2addr)(struct nvdebug_state*, uint32_t),
	216	uint32_t addr_to_find) {
	217	int j, i = 0;
	218	page_dir_entry_v0_t pde;
	219	page_tbl_entry_v0_t pte;
	220	void __iomem *pte_offset;
	221	// For each PDE
	222	do {
	223	// readq doesn't seem to work on BAR0
	224	pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
	225	pde.raw <<= 32;
	226	pde.raw \|= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
	227	//if (pde.raw)
	228	//printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
	229	// Skip unpopulated PDEs
	230	if (pde.type == NOT_PRESENT)
	231	continue;
	232	//printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
	233	pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
	234	// For each PTE
	235	for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
	236	pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
	237	pte.raw <<= 32;
	238	pte.raw \|= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
	239	// Skip non-present PTEs
	240	if (!pte.is_present)
	241	continue;
	242	// If we find a matching PTE, return its virtual address
	243	//if (pte.addr != 0x5555555)
	244	// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
	245	if (pte.addr << 12 == addr_to_find)
	246	return i << NV_MMU_PT_V0_LSB \| j << 12;
	247	}
	248	} while (++i < NV_MMU_PT_V0_SZ);
	249	return 0; // No match
	250	}
	251	*/