aboutsummaryrefslogblamecommitdiffstats
path: root/mmu.c
blob: e420864c1809815c786499e73b17e605a9238a06 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
















                                                                            
                                                   

                                                                      
                                                              
  
                                                                






                                                                                 
                               


                                                                       
                               


                                                                           
                               


                                     
                                                                              
                                                                              
                                                                 




                                                                                  








                                                               
                                               












                                                                                                  
                                        




                                                                            

                                                                                                                                                                                                                                                                                                              

                                                                  
                                                                                                                                                                                                                        
                                               
                                                          

                                                                                                        
                                             









                                                                                                        





                                                                             
                                                                              
                                                                          

                                                                            
                                                                               










                                                                                                                                             
                                                                                                                                         




















































                                                                                                                                                                                                               

















































                                                                                                                                                                 
// Helpers to deal with NVIDIA's MMU and associated page tables
#include <linux/kernel.h>  // Kernel types

#include "nvdebug.h"

/* One of the oldest ways to access video memory on NVIDIA GPUs is by using
  a configurable 1MB window into VRAM which is mapped into BAR0 (register)
  space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
  and appear to be used today to bootstrap page table configuration.

  Why is it mapped at a location called NVIDIA Private RAM Instance? Because
  this used to point to the entirety of intance RAM, which was seperate from
  VRAM on older NVIDIA GPUs.
*/

/* Convert a physical VRAM address to an offset in the PRAMIN window
  @param addr VRAM address to convert
  @return -errno on error, PRAMIN offset on success

  Note: Use off2PRAMIN() instead if you want a dereferenceable address
  Note: PRAMIN window is only 1MB, so returning an int is safe
*/
static int vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) {
	uint64_t pramin_base_va;
	bar0_window_t window;
	window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
	// Check if the address is valid (49 bits are addressable on-GPU)
	if (addr & ~0x0001ffffffffffff) {
		printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
		       addr, __func__);
		return -EINVAL;
	}
	// For unclear (debugging?) reasons, PRAMIN can point to SYSMEM
	if (window.target != TARGET_VID_MEM)
		return -EFAULT;
	pramin_base_va = ((uint64_t)window.base) << 16;
	// Protect against out-of-bounds accesses
	if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN)
		return -ERANGE;
	return addr - pramin_base_va;
}

// Convert a GPU physical address to CPU virtual address via the PRAMIN window
// @return A dereferencable address, or 0 (an invalid physical address) on err
void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) {
	int off = vram2PRAMIN(g, phy);
	if (off == -ERANGE)
		printk(KERN_ERR "[nvdebug] Page table walk off end of PRAMIN!\n");
	if (off < 0)
		return 0;
	return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy);
}

/* FIXME
void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) {
	return g->bar2 + off;
}
*/

// Internal helper for search_page_directory().
uint64_t search_page_directory_subtree(struct nvdebug_state *g,
				       void __iomem *pde_offset,
				       void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
				       uint64_t addr_to_find,
				       uint32_t level) {
	uint64_t res, i;
	void __iomem *next;
	page_dir_entry_t entry;
	if (level > sizeof(NV_MMU_PT_V2_SZ))
		return 0;
	// Hack to workaround PDE0 being double-size and strangely formatted
	if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16)
		pde_offset += 8;
	entry.raw_w = readq(pde_offset);
	// If we reached an invalid (unpopulated) PDE, walk back up the tree
	if (entry.target == PD_AND_TARGET_INVALID)
		return 0;
	// Succeed when we reach a PTE with the address we want
	if (entry.is_pte) {
		// TODO: Handle huge pages here
		printk(KERN_INFO "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w);
		return (uint64_t)entry.addr << 12 == addr_to_find;
	}
	printk(KERN_INFO "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w);
	// Depth-first search of the page table
	for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) {
		next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i);
		// off2addr can fail
		if (!next || !entry.addr_w) {
			printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__);
			return 0;
		}
		res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1);
		if (res)
			return res | (i << NV_MMU_PT_V2_LSB[level + 1]);
	}
	return 0;
}

/* GPU Physical address -> Virtual address ("reverse" translation)

   Depth-first search a page directory of the GPU MMU for where a particular
   physical address is mapped. Upon finding a mapping, the virtual address is
   returned.

  @param pde_offset   Dereferenceable pointer to the start of the PDE3 entries
  @param off2addr     Func to convert VRAM phys addresses to valid CPU VAs
  @param addr_to_find Physical address to reconstruct the virtual address of
  @return 0 on error, otherwise the virtual address at which addr_to_find is
          mapped into by this page table. (Zero is not a valid virtual address)
*/
uint64_t search_page_directory(struct nvdebug_state *g,
			       void __iomem *pde_offset,
			       void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
			       uint64_t addr_to_find) {
	uint64_t res, i;
	// Make sure that the query is page-aligned
	if (addr_to_find & 0xfff) {
		printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find);
		return 0;
	}
	printk(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018llx\n", (u64)addr_to_find, (u64)pde_offset);
	// Search the top-level page directory (PDE3)
	for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++)
		if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0)))
			return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]);
	return 0;
}

/* GMMU Page Tables Version 1
  This page table only contains 2 levels and is used in the Fermi, Kepler, and
  Maxwell architectures
*/
// Number of entries in the PDE and PTE levels
static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13};  // 2<<13 is an educated guess!!!
// Which bit index is the least significant in indexing each page level
static const int NV_MMU_PT_V1_LSB[2] = {25, 12};  // 25 is an educated guess!!!
uint64_t search_v1_page_directory(struct nvdebug_state *g,
				  void __iomem *pde_offset,
				  void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
				  uint64_t addr_to_find) {
	uint64_t j, i = 0;
	page_dir_entry_v1_t pde;
	page_tbl_entry_v1_t pte;
	void __iomem *pte_offset;
	// For each PDE
	do {
		// readq doesn't seem to work on BAR0
		pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4);
		pde.raw <<= 32;
		pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t));
		// Verify PDE is present
		if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID)
			continue;
		// Convert to a dereferencable pointer from CPU virtual address space
		pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12);
		if (!pte_offset)
			continue;
//		printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw);
//		printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw);
		// For each PTE
		for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) {
			// Don't overrun the PRAMIN window
			if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN)
				return 0;
			pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4);
			pte.raw <<= 32;
			pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t));
			// Skip non-present PTEs
			if (!pte.is_present)
				continue;
//			printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw);
			// If we find a matching PTE, return its virtual address
			if ((uint64_t)pte.addr << 12 == addr_to_find)
				return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1];
		}
	} while (++i < NV_MMU_PT_V1_SZ[0]);
	return 0;
}

/* GMMU Page Tables Version 0
  This page table only contains 2 levels and is used in the Tesla architecture
*/
/* *** UNTESTED ***
#define NV_MMU_PT_V0_SZ 2048
#define NV_MMU_PT_V0_LSB 29
uint64_t search_v0_page_directory(struct nvdebug_state *g,
				  void __iomem *pde_offset,
				  void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t),
				  uint32_t addr_to_find) {
	int j, i = 0;
	page_dir_entry_v0_t pde;
	page_tbl_entry_v0_t pte;
	void __iomem *pte_offset;
	// For each PDE
	do {
		// readq doesn't seem to work on BAR0
		pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4);
		pde.raw <<= 32;
		pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t));
		//if (pde.raw)
		//printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw);
		// Skip unpopulated PDEs
		if (pde.type == NOT_PRESENT)
			continue;
		//printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12);
		pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12);
		// For each PTE
		for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) {
			pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4);
			pte.raw <<= 32;
			pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t));
			// Skip non-present PTEs
			if (!pte.is_present)
				continue;
			// If we find a matching PTE, return its virtual address
			//if (pte.addr != 0x5555555)
			//	printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present");
			if (pte.addr << 12 == addr_to_find)
				return i << NV_MMU_PT_V0_LSB | j << 12;
		}
	} while (++i < NV_MMU_PT_V0_SZ);
	return 0;  // No match
}
*/