From 968db9791dd8efd3526d7c47a751a54e1fa95eb4 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 22 Apr 2024 18:46:26 -0400 Subject: Fix page-table traversal for version 1 page tables - Correct V1 page table defines using information from kern_gmmu_fmt_gm10x.c in NVIDIA's open-gpu-kernel-modules repo. - Verify page table format in search_v1_page_directory() - Better, more controllable logging in mmu.c Tested on GM204 (GTX 970). --- mmu.c | 44 ++++++++++++++++++++++++++++++-------------- nvdebug.h | 10 +++++----- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/mmu.c b/mmu.c index eaf7d5f..6784b9f 100644 --- a/mmu.c +++ b/mmu.c @@ -7,13 +7,13 @@ #include "nvdebug.h" -// Uncomment to print every PDE and PTE walked for debugging -//#define DEBUG -#ifdef DEBUG -#define printk_debug printk -#else -#define printk_debug(...) -#endif +/* Set logging level for MMU operations + g_verbose >= 1: Log a single message describing the MMU operation + g_verbose >= 2: Log every PDE and PTE traversed +*/ +int g_verbose = 0; +#define printk_debug if (g_verbose >= 2) printk +#define printk_info if (g_verbose >= 1) printk /* Convert a page directory (PD) pointer and aperture to be kernel-accessible @@ -24,7 +24,7 @@ @param pd_ap PD-type aperture (target address space) for `addr` @return A dereferencable kernel address, or an ERR_PTR-wrapped error */ -void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET pd_ap) { +static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET pd_ap) { struct iommu_domain *dom; phys_addr_t phys; @@ -55,7 +55,7 @@ void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET p // Check for, and translate through, the I/O MMU (if any) if ((dom = iommu_get_domain_for_dev(g->dev))) { phys = iommu_iova_to_phys(dom, addr); - printk(KERN_ERR "[nvdebug] I/O MMU translated SYS_MEM I/O VA %#lx to physical address %llx.\n", addr, phys); + printk_debug(KERN_DEBUG "[nvdebug] I/O MMU translated SYS_MEM I/O VA %#lx to physical address %#llx.\n", addr, phys); } else phys = addr; @@ -94,13 +94,14 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, // Succeed when we reach a PTE with the address we want if (entry.is_pte) { // TODO: Handle huge pages here - printk_debug(KERN_INFO "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w); + printk_debug(KERN_DEBUG "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, entry.is_privileged, entry.is_readonly, entry.atomics_disabled, entry.raw_w); return (uint64_t)entry.addr << 12 == addr_to_find && entry.aperture == addr_to_find_aperture; } - printk_debug(KERN_INFO "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w); + printk_debug(KERN_DEBUG "[nvdebug] Found PDE pointing to %#018llx in ap '%s' vol '%d' at lvl %d (raw: %#018llx)\n", ((u64)entry.addr_w) << 12, pd_target_to_text(entry.target), entry.is_volatile, level, entry.raw_w); // Depth-first search of the page table for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { uint64_t next = ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i; + printk_debug(KERN_DEBUG "[nvdebug] Searching index %llu in lvl %d\n", i, level + 1); res = search_page_directory_subtree(g, next, entry.target, addr_to_find, addr_to_find_aperture, level + 1); if (res) return res | (i << NV_MMU_PT_V2_LSB[level + 1]); @@ -134,7 +135,7 @@ uint64_t search_page_directory(struct nvdebug_state *g, printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find); return 0; } - printk(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018lx\n", addr_to_find, (uintptr_t)pd_config.page_dir << 12); + printk_info(KERN_INFO "[nvdebug] Searching for addr %#018llx in page table with base %#018lx\n", addr_to_find, (uintptr_t)pd_config.page_dir << 12); // Search the top-level page directory (PDE3) for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++) if ((res = search_page_directory_subtree(g, ((uintptr_t)pd_config.page_dir << 12) + NV_MMU_PT_V2_ENTRY_SZ[0] * i, INST2PD_TARGET(pd_config.target), addr_to_find, addr_to_find_aperture, 0))) @@ -154,6 +155,21 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, page_tbl_entry_v1_t pte; uintptr_t pte_offset, pde_offset; void __iomem *pte_addr, *pde_addr; + + // This function only understands the Page Table Version 1 format + if (pd_config.is_ver2) { + printk(KERN_ERR "[nvdebug] Passed a Version 2 page table at %#018llx to translate_v1_page_directory()!\n", (uint64_t)pd_config.page_dir << 12); + return 0; + } + + // We only understand the Version 1 format when 128 KiB huge pages are in-use + if (pd_config.is_64k_big_page) { + printk(KERN_ERR "[nvdebug] Page Table Version 1 with 64 KiB huge pages is unsupported!\n"); + return 0; + } + + printk_info(KERN_INFO "[nvdebug] Searching V1 page table at %#018lx in %s for addr %#018llx\n", (uintptr_t)pd_config.page_dir << 12, target_to_text(pd_config.target), addr_to_find); + // For each PDE do { // Index the list of page directory entries @@ -172,7 +188,7 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID) continue; // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw); - printk_debug(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.alt_target, pde.raw); + printk_debug(KERN_DEBUG "[nvdebug] Found %s PDE at index %lld pointing to PTEs @ %#018llx in ap '%d' (raw: %#018llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", i, ((u64)pde.alt_addr) << 12, pde.alt_target, pde.raw); // For each PTE for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) { // Index the list of page table entries starting at pde.alt_addr @@ -190,7 +206,7 @@ uint64_t search_v1_page_directory(struct nvdebug_state *g, // Skip non-present PTEs if (!pte.is_present) continue; - printk_debug(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw); + printk_debug(KERN_DEBUG "[nvdebug] PTE for phy addr %#018llx, ap '%s', vol '%d', priv '%d', ro '%d', no_atomics '%d' (raw: %#018llx)\n", ((u64)pte.addr) << 12, target_to_text(pte.target), pte.is_volatile, pte.is_privileged, pte.is_readonly, pte.atomics_disabled, pte.raw); // If we find a matching PTE, return its virtual address if ((uint64_t)pte.addr << 12 == addr_to_find && pte.target == addr_to_find_aperture) return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; diff --git a/nvdebug.h b/nvdebug.h index 7564f8c..9e0058d 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -1015,19 +1015,19 @@ typedef union { Page Table Entry (PTE) (13 bits) <--------------+ | Page Directory Entry (PDE) (13 bits) <-+ | | ^ ^ ^ - Virtual address: [39, 25] [24, 12] [11, 0] + Virtual address: [39, 26] [25, 12] [11, 0] == Figure 2: 128 KiB mode == Page Offset (12 bits) <----------------------------------+ Page Table Entry (PTE) (14 bits) <--------------+ | Page Directory Entry (PDE) (12 bits) <-+ | | ^ ^ ^ - Virtual address: [39, 26] [25, 12] [11, 0] + Virtual address: [39, 27] [26, 12] [11, 0] Support: Fermi, Kepler, Maxwell, Pascal* Note: *Pascal introduces Version 2 Page Tables, but is backwards-compatible. - Note: We only implement the 64-KiB-large-page mode in nvdebug. + Note: We only implement the 128-KiB-large-page mode in nvdebug. See also: mm_gk20a.c in nvgpu (Jetson GPU driver) and kern_gmmu_fmt_gm10x.c in open-gpu-kernel-modules (open-source NVRM variant). This is @@ -1046,9 +1046,9 @@ typedef union { from the page table development process, and have no meaning now. */ // Number of entries in the PDE and PTE levels -static const int NV_MMU_PT_V1_SZ[2] = {8192, 8192}; +static const int NV_MMU_PT_V1_SZ[2] = {4096, 16384}; // 2^12 and 2^14 // Which bit index is the least significant in indexing each page level -static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; +static const int NV_MMU_PT_V1_LSB[2] = {27, 12}; // V1 Page Directory Entry target enum V1_PD_TARGET { -- cgit v1.2.2