// Helpers to deal with NVIDIA's MMU and associated page tables #include // Kernel types #include "nvdebug.h" /* One of the oldest ways to access video memory on NVIDIA GPUs is by using a configurable 1MB window into VRAM which is mapped into BAR0 (register) space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs and appear to be used today to bootstrap page table configuration. Why is it mapped at a location called NVIDIA Private RAM Instance? Because this used to point to the entirety of intance RAM, which was seperate from VRAM on older NVIDIA GPUs. */ /* Convert a physical VRAM address to an offset in the PRAMIN window @param addr VRAM address to convert @return 0 on error, PRAMIN offset on success Note: Use off2PRAMIN() instead if you want a dereferenceable address */ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { uint64_t pramin_base_va; bar0_window_t window; window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); // Check if the address is valid (49 bits are addressable on-GPU) if (addr & ~0x0001ffffffffffff) { printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n", addr, __func__); return 0; } // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM if (window.target != TARGET_VID_MEM) return 0; pramin_base_va = ((uint64_t)window.base) << 16; // Protect against out-of-bounds accesses if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN) return 0; return addr - pramin_base_va; } // Convert a GPU physical address to CPU virtual address via the PRAMIN window void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); } /* FIXME void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { return g->bar2 + off; } */ // Internal helper for search_page_directory(). uint64_t search_page_directory_subtree(struct nvdebug_state *g, void __iomem *pde_offset, void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), uint64_t addr_to_find, uint32_t level) { uint64_t res, i; void __iomem *next; page_dir_entry_t entry; if (level > sizeof(NV_MMU_PT_V2_SZ)) return 0; // Hack to workaround PDE0 being double-size and strangely formatted if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16) pde_offset += 8; entry.raw = readl(pde_offset); // If we reached an invalid (unpopulated) PDE, walk back up the tree if (entry.target == PD_AND_TARGET_INVALID) return 0; // Succeed when we reach a PTE with the address we want if (entry.is_pte) { printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw); return (uint64_t)entry.addr << 12 == addr_to_find; } printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); // Depth-first search of the page table for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); // off2addr can fail if (!next) { printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__); return 0; } res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1); if (res) return res | (i << NV_MMU_PT_V2_LSB[level + 1]); } return 0; } /* GPU Physical address -> Virtual address ("reverse" translation) Depth-first search a page directory of the GPU MMU for where a particular physical address is mapped. Upon finding a mapping, the virtual address is returned. @param pde_offset Dereferenceable pointer to the start of the PDE3 entries @param off2addr Func to convert VRAM phys addresses to valid CPU VAs @param addr_to_find Physical address to reconstruct the virtual address of @return 0 on error, otherwise the virtual address at which addr_to_find is mapped into by this page table. (Zero is not a valid virtual address) */ uint64_t search_page_directory(struct nvdebug_state *g, void __iomem *pde_offset, void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), uint64_t addr_to_find) { uint64_t res, i; // Make sure that the query is page-aligned if (addr_to_find & 0xfff) { printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find); return 0; } // Search the top-level page directory (PDE3) for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++) if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0))) return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]); return 0; } /* GMMU Page Tables Version 1 This page table only contains 2 levels and is used in the Fermi, Kepler, and Maxwell architectures */ // Number of entries in the PDE and PTE levels static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!! // Which bit index is the least significant in indexing each page level static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!! uint64_t search_v1_page_directory(struct nvdebug_state *g, void __iomem *pde_offset, void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), uint64_t addr_to_find) { uint64_t j, i = 0; page_dir_entry_v1_t pde; page_tbl_entry_v1_t pte; void __iomem *pte_offset; // For each PDE do { // readq doesn't seem to work on BAR0 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4); pde.raw <<= 32; pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t)); // Verify PDE is present if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID) continue; // Convert to a dereferencable pointer from CPU virtual address space pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12); if (!pte_offset) continue; // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw); // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw); // For each PTE for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) { // Don't overrun the PRAMIN window if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN) return 0; pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4); pte.raw <<= 32; pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t)); // Skip non-present PTEs if (!pte.is_present) continue; // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw); // If we find a matching PTE, return its virtual address if ((uint64_t)pte.addr << 12 == addr_to_find) return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; } } while (++i < NV_MMU_PT_V1_SZ[0]); return 0; } /* GMMU Page Tables Version 0 This page table only contains 2 levels and is used in the Tesla architecture */ /* *** UNTESTED *** #define NV_MMU_PT_V0_SZ 2048 #define NV_MMU_PT_V0_LSB 29 uint64_t search_v0_page_directory(struct nvdebug_state *g, void __iomem *pde_offset, void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t), uint32_t addr_to_find) { int j, i = 0; page_dir_entry_v0_t pde; page_tbl_entry_v0_t pte; void __iomem *pte_offset; // For each PDE do { // readq doesn't seem to work on BAR0 pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4); pde.raw <<= 32; pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t)); //if (pde.raw) //printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw); // Skip unpopulated PDEs if (pde.type == NOT_PRESENT) continue; //printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12); pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12); // For each PTE for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) { pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4); pte.raw <<= 32; pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t)); // Skip non-present PTEs if (!pte.is_present) continue; // If we find a matching PTE, return its virtual address //if (pte.addr != 0x5555555) // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present"); if (pte.addr << 12 == addr_to_find) return i << NV_MMU_PT_V0_LSB | j << 12; } } while (++i < NV_MMU_PT_V0_SZ); return 0; // No match } */