// Helpers to deal with NVIDIA's MMU and associated page tables
#include <linux/kernel.h> // Kernel types
#include "nvdebug.h"
/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
a configurable 1MB window into VRAM which is mapped into BAR0 (register)
space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
and appear to be used today to bootstrap page table configuration.
Why is it mapped at a location called NVIDIA Private RAM Instance? Because
this used to point to the entirety of intance RAM, which was seperate from
VRAM on older NVIDIA GPUs.
*/
/* Convert a physical VRAM address to an offset in the PRAMIN window
@param addr VRAM address to convert
@return -errno on error, PRAMIN offset on success
Note: Use off2PRAMIN() instead if you want a dereferenceable address
Note: PRAMIN window is only 1MB, so returning an int is safe
*/
static int vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
uint64_t pramin_base_va;
bar0_window_t window;
window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
// Check if the address is valid (49 bits are addressable on-GPU)
if (addr & ~0x0001ffffffffffff) {
printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
addr, __func__);
return -EINVAL;
}
// For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
if (window.target != TARGET_VID_MEM)
return -EFAULT;
pramin_base_va = ((uint64_t)window.base) << 16;
// Protect against out-of-bounds accesses
if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN)
return -ERANGE;
return addr - pramin_base_va;
}
// Convert a GPU physical address to CPU virtual address via the PRAMIN window
// @return A dereferencable address, or 0 (an invalid physical address) on err
void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
int off = vram2PRAMIN(g, phy);
if (off == -ERANGE)
printk(KERN_ERR "[nvdebug] Page table walk off end of PRAMIN!\n");
if (off < 0)
return 0;
return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
}
/* FIXME
void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
return g->bar2 + off;
}
*/
// Internal helper for search_page_directory().
uint64_t search_page_directory_subtree(struct nvdebug_state *g,
void __iomem *pde_offset,
void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
uint64_t addr_to_find,
uint32_t level) {
uint64_t res, i;
void __iomem *next;
page_dir_entry_t entry;
if (level > sizeof(NV_MMU_PT_V2_SZ))
return 0;
// Hack to workaround PDE0 being double-size and strangely formatted
if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
pde_offset += 8;
entry.raw_w = readq(pde_offset);
// If we reached an invalid (unpopulated) PDE, walk back up the tree
if (entry.target == PD_AND_TARGET_INVALID)
return 0;
// Succeed when we reach a PTE with the address we want
if (entry.is_pte) {
// TODO: Handle huge pages here
printk(KERN_INFO "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w);
return (uint64_t)entry.addr << 12 == addr_to_find;
}
printk(KERN_INFO "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w);
// Depth-first search of the page table
for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) {
next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
// off2addr can fail
if (!next || !entry.addr_w) {
printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
return 0;
}
res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
if (res)
return res | (i << NV_MMU_PT_V2_LSB[level + 1]);
}
return 0;
}
/* GPU Physical address -> Virtual address ("reverse" translation)
Depth-first search a page directory of the GPU MMU for where a particular
physical address is mapped. Upon finding a mapping, the virtual address is
returned.
@param pde_offset Dereferenceable pointer to the start of the PDE3 entries
@param off2addr Func to convert VRAM phys addresses to valid CPU VAs
@param addr_to_find Physical address to reconstruct the virtual address of
@return 0 on error, otherwise the virtual address at which addr_to_find is
mapped into by this page table. (Zero is not a valid virtual address)
*/
uint64_t search_page_directory(struct nvdebug_state *g,
void __iomem *pde_offset,
void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
uint64_t addr_to_find) {
uint64_t res, i;
// Make sure that the query is page-aligned
if (addr_to_find & 0xfff) {
printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
return 0;
}
printk(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018llx\n", (u64)addr_to_find, (u64)pde_offset);
// Search the top-level page directory (PDE3)
for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]);
return 0;
}
/* GMMU Page Tables Version 1
This page table only contains 2 levels and is used in the Fermi, Kepler, and
Maxwell architectures
*/
// Number of entries in the PDE and PTE levels
static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!!
// Which bit index is the least significant in indexing each page level
static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!!
uint64_t search_v1_page_directory(struct nvdebug_state *g,
void __iomem *pde_offset,
void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
uint64_t addr_to_find) {
uint64_t j, i = 0;
page_dir_entry_v1_t pde;
page_tbl_entry_v1_t pte;
void __iomem *pte_offset;
// For each PDE
do {
// readq doesn't seem to work on BAR0
pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
pde.raw <<= 32;
pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
// Verify PDE is present
if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
continue;
// Convert to a dereferencable pointer from CPU virtual address space
pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
if (!pte_offset)
continue;
// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
// printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
// For each PTE
for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
// Don't overrun the PRAMIN window
if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
return 0;
pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
pte.raw <<= 32;
pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
// Skip non-present PTEs
if (!pte.is_present)
continue;
// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
// If we find a matching PTE, return its virtual address
if ((uint64_t)pte.addr << 12 == addr_to_find)
return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
}
} while (++i < NV_MMU_PT_V1_SZ[0]);
return 0;
}
/* GMMU Page Tables Version 0
This page table only contains 2 levels and is used in the Tesla architecture
*/
/* *** UNTESTED ***
#define NV_MMU_PT_V0_SZ 2048
#define NV_MMU_PT_V0_LSB 29
uint64_t search_v0_page_directory(struct nvdebug_state *g,
void __iomem *pde_offset,
void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t),
uint32_t addr_to_find) {
int j, i = 0;
page_dir_entry_v0_t pde;
page_tbl_entry_v0_t pte;
void __iomem *pte_offset;
// For each PDE
do {
// readq doesn't seem to work on BAR0
pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
pde.raw <<= 32;
pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
//if (pde.raw)
//printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
// Skip unpopulated PDEs
if (pde.type == NOT_PRESENT)
continue;
//printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
// For each PTE
for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
pte.raw <<= 32;
pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
// Skip non-present PTEs
if (!pte.is_present)
continue;
// If we find a matching PTE, return its virtual address
//if (pte.addr != 0x5555555)
// printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
if (pte.addr << 12 == addr_to_find)
return i << NV_MMU_PT_V0_LSB | j << 12;
}
} while (++i < NV_MMU_PT_V0_SZ);
return 0; // No match
}
*/